In [1]:
import pandas as pd
from textblob import TextBlob
import texthero

In [2]:
data = pd.read_csv('data/joe_biden_tweets_2020.csv')
data.head()

Unnamed: 0,timestamp,url,tweet,replies,retweets,quotes,likes
0,2020-01-01 01:15,https://twitter.com/JoeBiden/status/1212180387...,Our final fundraising deadline of 2019 is just...,411,269,32,948
1,2020-01-01 18:35,https://twitter.com/JoeBiden/status/1212442112...,Every single human being deserves to be treate...,1136,2423,182,11574
2,2020-01-02 00:01,https://twitter.com/JoeBiden/status/1212524152...,With just over one month until the Iowa Caucus...,332,368,29,1457
3,2020-01-02 01:05,https://twitter.com/JoeBiden/status/1212540258...,This election is about the soul of our nation ...,5199,10192,1153,44886
4,2020-01-02 02:07,https://twitter.com/JoeBiden/status/1212556035...,Every day that Donald Trump remains in the Whi...,1070,2005,128,9581


- Using TextBlob evaluate the sentiment and polarization of each tweet
- show the five most positive tweets (based on polarity)
- show the five most negative tweets (based on polarity)
- show the five most subjective tweets
- Using spacy perform named entity extraction
- What are the five most frequent `PERSON` entities
- What are the five most frequent `EVENT` entities

In [3]:
sentiment = data.tweet.apply(lambda tweet: TextBlob(tweet).sentiment)
data['sentiment'] = sentiment.apply(lambda x: x.polarity)
data['subjectivity'] = sentiment.apply(lambda x: x.subjectivity)
data.head()

Unnamed: 0,timestamp,url,tweet,replies,retweets,quotes,likes,sentiment,subjectivity
0,2020-01-01 01:15,https://twitter.com/JoeBiden/status/1212180387...,Our final fundraising deadline of 2019 is just...,411,269,32,948,-0.075,0.475
1,2020-01-01 18:35,https://twitter.com/JoeBiden/status/1212442112...,Every single human being deserves to be treate...,1136,2423,182,11574,-0.295238,0.452381
2,2020-01-02 00:01,https://twitter.com/JoeBiden/status/1212524152...,With just over one month until the Iowa Caucus...,332,368,29,1457,0.0,0.0
3,2020-01-02 01:05,https://twitter.com/JoeBiden/status/1212540258...,This election is about the soul of our nation ...,5199,10192,1153,44886,0.0,0.0
4,2020-01-02 02:07,https://twitter.com/JoeBiden/status/1212556035...,Every day that Donald Trump remains in the Whi...,1070,2005,128,9581,0.033333,0.075


In [4]:
data.sort_values('sentiment', ascending=False).tweet.head()

2024    When they say “the best is yet to come,” that’...
1267    Make no mistake: I believe our best days still...
1891    I know a thing or two about being Vice Preside...
607     It's been a great night — thank you for all of...
1471    It’s in some of our darkest moments of despair...
Name: tweet, dtype: object

In [5]:
list(_)

['When they say “the best is yet to come,” that’s a threat. https://t.co/SoKKF9KQYO',
 'Make no mistake: I believe our best days still lie ahead. We can and will transform this nation — together.',
 'I know a thing or two about being Vice President — and I know @KamalaHarris is the best partner I could have for the fight ahead.',
 "It's been a great night — thank you for all of your support! We’re bringing this party together and it's going to take all of us to keep it going. Chip in now to take us across the finish line and defeat Donald Trump: https://t.co/wn91EhVZ5V https://t.co/vncQYgE6Eg",
 "It’s in some of our darkest moments of despair that we’ve made some of our greatest progress. If we stand together, as one America, we'll rise stronger than before. https://t.co/buzkmkM1Gf"]

In [6]:
data.sort_values('sentiment', ascending=False).tweet.iloc[-5:]

615     Donald Trump's "Remain in Mexico" policy is da...
627     We’re building a campaign powered by the backb...
153     34 U.S. troops have been diagnosed with trauma...
2372    Donald Trump is the worst president we've ever...
825     Families across the country are impacted by CO...
Name: tweet, dtype: object

In [7]:
list(_)

['Donald Trump\'s "Remain in Mexico" policy is dangerous, inhumane, and goes against everything we stand for as a nation of immigrants. My administration will end it. https://t.co/toYzMaPP1Y',
 'We’re building a campaign powered by the backbone and the base of the Democratic party. A campaign that will defeat Donald Trump and restore the soul of this nation. Join us: https://t.co/gnaFCACYrW',
 '34 U.S. troops have been diagnosed with traumatic brain injury after Iranian missiles struck their base in Iraq. Donald Trump brought us to the brink of war—then dismissed the injuries as “headaches.”\n\nThe idea that the commander in chief would take this lightly is disgusting. https://t.co/Cn8Bb7KLy4',
 "Donald Trump is the worst president we've ever had.",
 'Families across the country are impacted by COVID-19—and it can be especially difficult to explain to children. \n\nJill and I spoke with ItsJudysLife, JHouseVlogs and The Curly Coopers to answer their questions about the crisis. Tune in 

In [8]:
data.sort_values('subjectivity', ascending=False).tweet.head()

2372    Donald Trump is the worst president we've ever...
1906    There are only 78 days until Election Day — an...
627     We’re building a campaign powered by the backb...
2558    Donald Trump is running TV ads taking Dr. Fauc...
360     Lots of people on stage talked about health ca...
Name: tweet, dtype: object

In [9]:
list(_)

["Donald Trump is the worst president we've ever had.",
 'There are only 78 days until Election Day — and @KamalaHarris and I need your help to make Donald Trump a one-term president.\n\nSign up today and let’s get to work: https://t.co/lyhRyWPhvS',
 'We’re building a campaign powered by the backbone and the base of the Democratic party. A campaign that will defeat Donald Trump and restore the soul of this nation. Join us: https://t.co/gnaFCACYrW',
 'Donald Trump is running TV ads taking Dr. Fauci out of context and without his permission.\n\nSo, here’s a message from the President in his own words. https://t.co/WCYbIfrQLR',
 'Lots of people on stage talked about health care tonight. I’m the only one who’s gotten anything done. #DemDebate']

In [10]:
texthero.named_entities(data.tweet.head())

0    [(2019, DATE, 34, 38), (just hours, TIME, 42, ...
1                                                   []
2    [(one month, DATE, 15, 24), (the Iowa Caucus, ...
3    [(Donald Trump, PERSON, 52, 64), (https://t.co...
4    [(Every day, DATE, 0, 9), (Donald Trump, PERSO...
dtype: object

In [11]:
data['entities'] = texthero.named_entities(data.tweet)

In [12]:
entities = pd.DataFrame(data.entities.sum(), columns=['text', 'entity', 'start','end'])
entities['count'] = 1
entities.head()

Unnamed: 0,text,entity,start,end,count
0,2019,DATE,34,38,1
1,just hours,TIME,42,52,1
2,Donald Trump,PERSON,148,160,1
3,one,CARDINAL,163,166,1
4,midnight,TIME,199,207,1


In [13]:
counts = entities.groupby(['entity', 'text'])['count'].count()

In [14]:
counts.loc['PERSON'].sort_values(ascending=False).head()

text
Donald Trump      512
Trump             159
Jill               38
Donald Trump's     34
Obama              16
Name: count, dtype: int64

In [15]:
counts.loc['EVENT'].sort_values(ascending=False).head()

text
Super Tuesday            4
7PM ET                   2
New Year                 2
the Great Depression     2
the Armenian Genocide    2
Name: count, dtype: int64