# Imports

In [1]:
import numpy as np
import pandas as pd

import twint

# Fixes runtime errors with twint
import nest_asyncio
nest_asyncio.apply()

from textblob import TextBlob

import matplotlib
%matplotlib inline

# Scrape Tweets

In [2]:
def twint_search(search, username=None, since=None, until=None, limit=None):
    '''
    Function to return a pandas dataframe of tweets in English containing term using twint search method.
    Required parameter: search term.
    Optional parameters: username, start date (since) and end date (until) to search, maximum number of tweets (limit).
    '''
    c = twint.Config()
    c.Lang = 'en'
    c.Search = search
    c.Username = username
    c.Since = since
    c.Until = until
    c.Limit = limit
    c.Pandas = True
    # Hide the printing of every tweet during scrape
    c.Hide_output = True
    twint.run.Search(c)
    df = twint.storage.panda.Tweets_df
    return df

In [3]:
def search_loop(start_date, end_date, search, username=None, limit=None):
    '''
    Function to loop over date range and perform twint_search function for each day, returning one combined dataframe.
    Required parameters: start date, end date, search term.
    Optional parameters: username, maximum number of tweets per day (limit).
    '''
    df = pd.DataFrame()
    date_range = pd.Series(pd.date_range(start_date, end_date))
    for d in range(len(date_range) - 1):
        since = date_range[d].strftime('%Y-%m-%d')
        until = date_range[d + 1].strftime('%Y-%m-%d')
        day_df = twint_search(search=search, username=username, since=since, until=until, limit=limit)
        df = pd.concat([df, day_df])
        del day_df
    df.reset_index(drop=True, inplace=True)
    return df

In [4]:
df = search_loop('2020-01-01', '2020-02-01', search='(mask OR masks OR facemask OR facemasks)', limit=250)

In [5]:
len(df)

8060

In [6]:
df.head()

Unnamed: 0,id,conversation_id,created_at,date,timezone,place,tweet,hashtags,cashtags,user_id,...,geo,source,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest
0,1212523886329380865,1212523886329380865,1577923196000,2020-01-01 18:59:56,EDT,,SpongeBob: slides down Sand Mountain\nThe tree...,[],[],1099209894295678977,...,,,,,,"[{'user_id': '1099209894295678977', 'username'...",,,,
1,1212523880558026753,1212523880558026753,1577923195000,2020-01-01 18:59:55,EDT,,"Started 2020 with freshly washed bed sheets, a...",[],[],57777541,...,,,,,,"[{'user_id': '57777541', 'username': 'standtal...",,,,
2,1212523828888449024,1212421340281614338,1577923183000,2020-01-01 18:59:43,EDT,,Using the coverage of his yellow card to mask ...,"[#classicjose, #coverup]",[],346609834,...,,,,,,"[{'user_id': '346609834', 'username': '5_Times...",,,,
3,1212523807128186882,1212402852011937792,1577923177000,2020-01-01 18:59:37,EDT,,The mask pic.twitter.com/vHi9pC9S62,[],[],1070005555777945600,...,,,,,,"[{'user_id': '1070005555777945600', 'username'...",,,,
4,1212523752925229056,1212512506838519811,1577923164000,2020-01-01 18:59:24,EDT,,"I know what you mean, I've been depressed too ...",[#hugs],[],48659042,...,,,,,,"[{'user_id': '48659042', 'username': 'IBdaSwee...",,,,


# Clean Tweets

In [7]:
# filter_words = [
#     'boxing',
#     'fencing',
#     #'football',
#     'hockey',
#     'ski',
#     'skiing']

# Analysis

In [8]:
df['polarity'] = df['tweet'].apply(lambda x: TextBlob(x).sentiment.polarity)
df['subjectivity'] = df['tweet'].apply(lambda x: TextBlob(x).sentiment.subjectivity)

In [9]:
df['polarity'].describe()

count    8060.000000
mean        0.064562
std         0.282592
min        -1.000000
25%         0.000000
50%         0.000000
75%         0.200000
max         1.000000
Name: polarity, dtype: float64

In [10]:
df['subjectivity'].describe()

count    8060.000000
mean        0.380963
std         0.313029
min         0.000000
25%         0.000000
50%         0.402778
75%         0.600000
max         1.000000
Name: subjectivity, dtype: float64

In [11]:
df.keys()

Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
       'tweet', 'hashtags', 'cashtags', 'user_id', 'user_id_str', 'username',
       'name', 'day', 'hour', 'link', 'retweet', 'nlikes', 'nreplies',
       'nretweets', 'quote_url', 'search', 'near', 'geo', 'source',
       'user_rt_id', 'user_rt', 'retweet_id', 'reply_to', 'retweet_date',
       'translate', 'trans_src', 'trans_dest', 'polarity', 'subjectivity'],
      dtype='object')

In [12]:
len(df['tweet'].unique())

8011

In [13]:
df['tweet'].value_counts()

Mask                                                                                                                                                                                                                                                                                      6
Mask off  https://twitter.com/_TshegoX/status/984378190499057664 …                                                                                                                                                                                                                        6
The mask slips, She does want your power,. Remember, she`s the boss,  and we`re just screaming from the cheap seats. She has no consideration for either party. She wants to impose her rules without giving you freedom of choice. It is wrong to take something from others. Period.    4
Mask off                                                                                                                                            

In [15]:
len(df[df['tweet'].str.contains('#mask')])

38

In [17]:
df[df['tweet'].str.contains('#mask')].head(38)

Unnamed: 0,id,conversation_id,created_at,date,timezone,place,tweet,hashtags,cashtags,user_id,...,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest,polarity,subjectivity
246,1212516935876927490,1212516935876927490,1577921539000,2020-01-01 18:32:19,EDT,,Come check out our podcast! I’m the sound guy ...,"[#podcast, #pbta, #masks, #listenforthesignal]",[],1136620077745414144,...,,,,"[{'user_id': '1136620077745414144', 'username'...",,,,,0.5,0.4
624,1213245966855364609,1213245966855364609,1578095354000,2020-01-03 18:49:14,EDT,,Wuh-oh. Looks like this ghost's gone feral.\n\...,"[#ghost, #art, #illustration, #monster, #mask,...",[],1571077800,...,,,,"[{'user_id': '1571077800', 'username': 'lobste...",,,,,0.0,0.0
625,1213245924249604098,1213245924249604098,1578095344000,2020-01-03 18:49:04,EDT,,Halloween ghosts are sad they're two months la...,"[#ghosts, #spectral, #incorporeal, #masks, #gh...",[],1571077800,...,,,,"[{'user_id': '1571077800', 'username': 'lobste...",,,,,-0.4,0.8
714,1213243521852788737,1213243521852788737,1578094771000,2020-01-03 18:39:31,EDT,,This is beyond comprehension. Too much emphasi...,"[#mask, #style]",[],1124074814585524224,...,,,,"[{'user_id': '1124074814585524224', 'username'...",,,,,0.1,0.35
862,1213609093018992640,1213609093018992640,1578181930000,2020-01-04 18:52:10,EDT,,Are you looking for the manufacturer focuses o...,"[#easybreathsnorkelingmask, #maskandsnorkelset...",[],905519087815708672,...,,,,"[{'user_id': '905519087815708672', 'username':...",,,,,0.175,0.275
1126,1213971795541278721,1213971795541278721,1578268405000,2020-01-05 18:53:25,EDT,,This art piece caught my attention. \n#art #ar...,"[#art, #artpiece, #torn, #mask, #hands, #frame...",[],41771046,...,,,,"[{'user_id': '41771046', 'username': 'GeorgeGe...",,,,,0.0,0.0
1262,1213968917200195585,1213968917200195585,1578267718000,2020-01-05 18:41:58,EDT,,Are you ready for RKD universal size full face...,"[#headsnorkelmask, #maskandsnorkelset]",[],905519087815708672,...,,,,"[{'user_id': '905519087815708672', 'username':...",,,,,0.183333,0.35
1509,1214330657960341508,1214330657960341508,1578353964000,2020-01-06 18:39:24,EDT,,Thank you @ganea_marius perfect picture. Love...,"[#teddybear, #bear, #fashion, #maskoff, #mask,...",[],878992878516785152,...,,,,"[{'user_id': '878992878516785152', 'username':...",,,,,0.75,0.8
1588,1214697682314416129,1214697682314416129,1578441470000,2020-01-07 18:57:50,EDT,,It's happening! Lucy's Noelle sculpt is starti...,"[#deltarune, #animegao, #kigurumi, #mask, #cos...",[],1165422560080740352,...,,,,"[{'user_id': '1165422560080740352', 'username'...",,,,,0.25,0.43125
1624,1214696852135796737,1214696852135796737,1578441272000,2020-01-07 18:54:32,EDT,,Terremoto atento a las acciones\n.\n.\n.\n.\n....,"[#zona23, #deathmatch, #luchalibre, #luchalibr...",[],103664390,...,,,,"[{'user_id': '103664390', 'username': 'ElJorge...",,,,,0.0,0.0


In [16]:
for t in df[df['tweet'].str.contains('#mask')]['tweet']:
    print(t)

Come check out our podcast! I’m the sound guy / producer, and play the Nova in our campaign of Masks! #podcast #pbta #masks #ListenForTheSignal https://twitter.com/signalcity/status/1212504864766464002 …
Wuh-oh. Looks like this ghost's gone feral.

#ghost #art #illustration #monster #mask #ghosts #ghostly #creepycute #spectral  https://www.instagram.com/p/B61BgMQHf9N/?igshid=e3b10wfc2xns …
Halloween ghosts are sad they're two months late.

#ghosts #spectral #incorporeal #masks #ghost #mask #illustration #art  https://www.instagram.com/p/B6tVKyEn1an/?igshid=1q30rdrktmmag …
This is beyond comprehension. Too much emphasis on flu protection. What about flu prevention? Contain millions of tiny flu-particles during cough and sneeze. Wear your #mask? Spin it into a #style? https://apple.news/AednQObCgSwmX-gA44QxhiQ …
Are you looking for the manufacturer focuses on producing RKD full-dry clip joint universal size full face snorkel mask R20S? Contact us: #easybreathsnorkelingmask #maskandsnorke