# Scraping tweets

## snscrape (Python wrapper)
snscrape is good for scraping historical tweets. It uses Twitter's advanced search. Searching parameters can be found here: https://github.com/igorbrigadir/twitter-advanced-search

In [14]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [15]:
from tqdm import tqdm
from datetime import datetime
import pickle

In [16]:
#user description example
sntwitter.TwitterUserScraper('karthik_es').entity.description

'Descent of Lord Murga |\nBeliever of Aseevagam philosophy |\nKing Ravana & Raja Raja Cholan |\nThis world is for all & everyone are kin |'

In [17]:
#this is the US flag emoji
USflagemoji='ðŸ‡ºðŸ‡¸'

In [18]:
keywords = [
            '#TwitterTakeOver', '#ElonMusk', '#ElonMuskTwitter',
            '#ElonMuskBuysTwitter', '@elonmusk', 'Elon Musk', 
            'Elon', 'Musk'
           ]

In [None]:
#tweets from when Elon made the offer on April 14th 2022
start = datetime.now()

maxTweets = 20000
startdate='2022-04-14'
enddate='2022-04-16'

tweets_list = []
keyword = ' OR '.join(keywords)

for i,tweet in enumerate(sntwitter.TwitterSearchScraper('{} since:{} until:{}'.format(keyword,startdate,enddate)).get_items()):
    if i>maxTweets:
        break
    try:
        description = sntwitter.TwitterUserScraper(tweet.user.username).entity.description
        US_Flag = USflagemoji in description #check if american flag emoji is in the description
    except:
        print(f'error at {tweet.user.username}')
        continue
    tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username, description, US_Flag, keyword])
    
tweets20220414 = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Description', 'US Flag', 'Keyword'])

end = datetime.now()
time_taken = end - start
print('Time: ',time_taken) 

In [None]:
with open('tweets20220414.pkl', 'wb') as handle:
    pickle.dump(tweets20220414, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
#tweets from when Twitter accepted Elon's offer
start = datetime.now()

maxTweets = 20000
startdate='2022-04-25'
enddate='2022-04-27'

tweets_list = []
keyword = ' OR '.join(keywords)

for i,tweet in enumerate(sntwitter.TwitterSearchScraper('{} since:{} until:{}'.format(keyword,startdate,enddate)).get_items()):
    if i>maxTweets:
        break
    try:
        description = sntwitter.TwitterUserScraper(tweet.user.username).entity.description
        US_Flag = USflagemoji in description #check if american flag emoji is in the description
    except:
        print(f'error at {tweet.user.username}')
        continue
    tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username, description, US_Flag, keyword])
    
tweets20220425 = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Description', 'US Flag', 'Keyword'])

end = datetime.now()
time_taken = end - start
print('Time: ',time_taken) 
    

In [None]:
with open('tweets20220425.pkl', 'wb') as handle:
    pickle.dump(tweets20220425, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
#new set of keywords for control
keywords2 = [
            '#ElonMusk', 
            '@elonmusk',
            'Elon Musk',
            'Elon',
            'Musk'
           ]

In [None]:
#tweets about Elon from a year ago as control
start = datetime.now()

maxTweets = 20000
startdate='2021-04-05'
enddate='2021-04-07'

tweets_list = []
keyword = ' OR '.join(keywords2)

for i,tweet in enumerate(sntwitter.TwitterSearchScraper('{} since:{} until:{}'.format(keyword,startdate,enddate)).get_items()):
    if i>maxTweets:
        break
    try:
        description = sntwitter.TwitterUserScraper(tweet.user.username).entity.description
        US_Flag = USflagemoji in description #check if american flag emoji is in the description
    except:
        print(f'error at {tweet.user.username}')
        continue
    tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username, description, US_Flag, keyword])
    
tweets20210405 = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Description', 'US Flag', 'Keyword'])

end = datetime.now()
time_taken = end - start
print('Time: ',time_taken) 
    

In [None]:
with open('tweets20210405.pkl', 'wb') as handle:
    pickle.dump(tweets20210405, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
#tweets from when Elon said he would lift Trump ban
start = datetime.now()

maxTweets = 20000
startdate='2022-05-10'
enddate='2022-05-12'

tweets_list = []
keyword = ' OR '.join(keywords)

for i,tweet in enumerate(sntwitter.TwitterSearchScraper('{} since:{} until:{}'.format(keyword,startdate,enddate)).get_items()):
    if i>maxTweets:
        break
    try:
        description = sntwitter.TwitterUserScraper(tweet.user.username).entity.description
        US_Flag = USflagemoji in description #check if american flag emoji is in the description
    except:
        print(f'error at {tweet.user.username}')
        continue
    tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.user.username, description, US_Flag, keyword])
    
tweets20220510 = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username', 'Description', 'US Flag', 'Keyword'])

end = datetime.now()
time_taken = end - start
print('Time: ',time_taken) 
    

In [None]:
with open('tweets20220510.pkl', 'wb') as handle:
    pickle.dump(tweets20220510, handle, protocol=pickle.HIGHEST_PROTOCOL)