# Get Tweets

This script extracts all the tweets with hashtag #covid-19 related to the day before today (yesterday) and saves them into a .csv file.
We use the `tweepy` library, which can be installed with the command `pip install tweepy`.

Firstly, we import the configuration file, called `config.py`, which is located in the same directory of this script.

In [35]:
from config import *
import tweepy
import datetime

import sys
import logging

logger = logging.getLogger('tweets_search')

In [13]:
import pandas as pd
# import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [36]:
print(f"logger.root.level = {logger.root.level}, logger.root.name = {logger.root.name}")
print(f"logger.name = {logger.name}")

logger.root.level = 20, logger.root.name = root
logger.name = tweets_search


In [30]:
format = "%(asctime)s - %(levelname)s - %(message)s"
# logging.basicConfig(format=format, stream=sys.stdout, level = logging.DEBUG)
logging.basicConfig(format=format, stream=sys.stdout, level = logging.INFO)

In [31]:
print(logger.root.level)

10


In [32]:
logger.root.level = 20

In [37]:
print(logger.root.level)

20


We setup the connection to our Twitter App by using the `OAuthHandler()` class and its `access_token()` function. Then we call the Twitter API through the `API()` function.

In [5]:
auth = tweepy.OAuthHandler(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET)
auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET)
api = tweepy.API(auth,wait_on_rate_limit=True, wait_on_rate_limit_notify = True)

In [31]:
api.me()

2021-06-20 22:48:36,158 - DEBUG - PARAMS: {'screen_name': b'vasiange'}
2021-06-20 22:48:36,162 - DEBUG - Signing request <PreparedRequest [GET]> using client <Client client_key=pw0ihLFxH3nwDrd4HBd7pqUrc, client_secret=****, resource_owner_key=1360011857969479682-iLrxBUlqdtExwkqiN9iZsHYDXIFTZz, resource_owner_secret=****, signature_method=HMAC-SHA1, signature_type=AUTH_HEADER, callback_uri=None, rsa_key=None, verifier=None, realm=None, encoding=utf-8, decoding=None, nonce=None, timestamp=None>
2021-06-20 22:48:36,165 - DEBUG - Including body in call to sign: False
2021-06-20 22:48:36,176 - DEBUG - Collected params: [('screen_name', 'vasiange'), ('oauth_nonce', '174152913811685627841624218516'), ('oauth_timestamp', '1624218516'), ('oauth_version', '1.0'), ('oauth_signature_method', 'HMAC-SHA1'), ('oauth_consumer_key', 'pw0ihLFxH3nwDrd4HBd7pqUrc'), ('oauth_token', '1360011857969479682-iLrxBUlqdtExwkqiN9iZsHYDXIFTZz')]
2021-06-20 22:48:36,181 - DEBUG - Normalized params: oauth_consumer_key

User(_api=<tweepy.api.API object at 0x7f7714101e10>, _json={'id': 1360011857969479682, 'id_str': '1360011857969479682', 'name': 'vasiange', 'screen_name': 'vasiange', 'location': '', 'profile_location': None, 'description': '', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 0, 'friends_count': 15, 'listed_count': 0, 'created_at': 'Thu Feb 11 23:45:06 +0000 2021', 'favourites_count': 0, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 0, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://abs.twimg.com/sticky/default_profile_images/default_profile_normal.png', 'profile_image_url_https': 'https://abs.twimg.com/sticky/default_profile_images/default_profile_normal.p

In [7]:
api.rate_limit_status()

2021-06-21 13:45:23,452 - DEBUG - PARAMS: {}
2021-06-21 13:45:23,456 - DEBUG - Signing request <PreparedRequest [GET]> using client <Client client_key=pw0ihLFxH3nwDrd4HBd7pqUrc, client_secret=****, resource_owner_key=1360011857969479682-iLrxBUlqdtExwkqiN9iZsHYDXIFTZz, resource_owner_secret=****, signature_method=HMAC-SHA1, signature_type=AUTH_HEADER, callback_uri=None, rsa_key=None, verifier=None, realm=None, encoding=utf-8, decoding=None, nonce=None, timestamp=None>
2021-06-21 13:45:23,457 - DEBUG - Including body in call to sign: False
2021-06-21 13:45:23,458 - DEBUG - Collected params: [('oauth_nonce', '75468200399434852471624272323'), ('oauth_timestamp', '1624272323'), ('oauth_version', '1.0'), ('oauth_signature_method', 'HMAC-SHA1'), ('oauth_consumer_key', 'pw0ihLFxH3nwDrd4HBd7pqUrc'), ('oauth_token', '1360011857969479682-iLrxBUlqdtExwkqiN9iZsHYDXIFTZz')]
2021-06-21 13:45:23,459 - DEBUG - Normalized params: oauth_consumer_key=pw0ihLFxH3nwDrd4HBd7pqUrc&oauth_nonce=75468200399434852

eset': 1624273224},
   '/friendships/show': {'limit': 180, 'remaining': 180, 'reset': 1624273224}},
  'guide': {'/guide': {'limit': 180, 'remaining': 180, 'reset': 1624273224},
   '/guide/get_explore_locations': {'limit': 100,
    'remaining': 100,
    'reset': 1624273224},
   '/guide/explore_locations_with_autocomplete': {'limit': 200,
    'remaining': 200,
    'reset': 1624273224}},
  'auth': {'/auth/csrf_token': {'limit': 15,
    'remaining': 15,
    'reset': 1624273224}},
  'blocks': {'/blocks/list': {'limit': 15,
    'remaining': 15,
    'reset': 1624273224},
   '/blocks/ids': {'limit': 15, 'remaining': 15, 'reset': 1624273224}},
  'geo': {'/geo/similar_places': {'limit': 15,
    'remaining': 15,
    'reset': 1624273224},
   '/geo/place_page': {'limit': 75, 'remaining': 75, 'reset': 1624273224},
   '/geo/id/:place_id': {'limit': 75, 'remaining': 75, 'reset': 1624273224},
   '/geo/reverse_geocode': {'limit': 15, 'remaining': 15, 'reset': 1624273224},
   '/geo/search': {'limit': 15,

Now we setup dates. We need to setup today and yesterday.

In [6]:
today = datetime.date.today()
since= today - datetime.timedelta(days=2)
until= today
until, since
# (datetime.date(2021, 6, 7), datetime.date(2021, 6, 6))

(datetime.date(2021, 6, 21), datetime.date(2021, 6, 19))

In [39]:
    logger.debug(f"full_text: '{until, since}'")

2021-06-22 02:34:50,212 - DEBUG - full_text: '(datetime.date(2021, 6, 21), datetime.date(2021, 6, 19))'


We search for tweets on Twitter by using the `Cursor()` function. 
We pass the `api.search` parameter to the cursor, as well as the query string, which is specified through the `q` parameter of the cursor.
The query string can receive many parameters, such as the following (not mandatory) ones:
* `from:` - to specify a specific Twitter user profile
* `since:` - to specify the beginning date of search
* `until:` - to specify the ending date of search
The cursor can also receive other parameters, such as the language and the `tweet_mode`. If `tweet_mode='extended'`, all the text of the tweet is returned, otherwise only the first 140 characters.

In [None]:
# # example 
# code tweets = tweepy.Cursor(api.search, tweet_mode=’extended’) 
# for tweet in tweets:
#     content = tweet.full_text

In [None]:
# tweets_list = tweepy.Cursor(api.search, q="#Covid-19 since:" + str(yesterday)+ " until:" + str(today),tweet_mode='extended', lang='en').items()

In [None]:
# tweets_list = tweepy.Cursor(api.search, q=f"#Covid-19 since:{str(yesterday)} until:{str(today)}",tweet_mode='extended', lang='en').items()

In [21]:
tweets_list = tweepy.Cursor(api.search, q=['pfizer','astrazeneca'],since= str(since), until=str(until),tweet_mode='extended', lang='en').items()

Now we loop across the `tweets_list`, and, for each tweet, we extract the text, the creation date, the number of retweets and the favourite count. We store every tweet into a list, called `output`.

In [8]:
import time
seconds = 5
start = time.time()
time.sleep(seconds)
end = time.time()
logger.info(f"elapsed_time: '{end - start}'")

2021-06-21 22:38:34,326 - INFO - elapsed_time: '5.005592107772827'


---
# TEST

---

In [34]:
tweets_list2 = tweepy.Cursor(api.search, q=['pfizer','astrazeneca'],since= str(since), until=str(until),tweet_mode='extended', lang='en').items(2)

import time
start = time.time()
output = []
for tweet in tweets_list2:
    # text = tweet._json["full_text"]
    #print(text) 
    # https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets           
    # "geo": null,"coordinates": null,"place": null,"contributors": null,
    # "is_quote_status": false,"retweet_count": 988,"favorite_count": 3875,
    # "favorited": false,"retweeted": false,"possibly_sensitive": false,"lang": "en"
    logger.info(f"full_text: '{tweet.id}'")
    # favourite_count = tweet.favorite_count
    # retweet_count = tweet.retweet_count
    # created_at = tweet.created_at
    
#     line = {'text' : text, 'favourite_count' : favourite_count, 'retweet_count' : retweet_count, 'created_at' : created_at}
#     output.append(line)
#     logger.info(f"Append list length : { len(output)}")
# end = time.time()
# logger.info(f"elapsed_time: '{end - start}'")

2021-06-22 02:18:48,749 - DEBUG - PARAMS: {'q': b"['pfizer', 'astrazeneca']", 'since': b'2021-06-19', 'until': b'2021-06-21', 'tweet_mode': b'extended', 'lang': b'en'}
2021-06-22 02:18:48,756 - DEBUG - Signing request <PreparedRequest [GET]> using client <Client client_key=pw0ihLFxH3nwDrd4HBd7pqUrc, client_secret=****, resource_owner_key=1360011857969479682-iLrxBUlqdtExwkqiN9iZsHYDXIFTZz, resource_owner_secret=****, signature_method=HMAC-SHA1, signature_type=AUTH_HEADER, callback_uri=None, rsa_key=None, verifier=None, realm=None, encoding=utf-8, decoding=None, nonce=None, timestamp=None>
2021-06-22 02:18:48,762 - DEBUG - Including body in call to sign: False
2021-06-22 02:18:48,767 - DEBUG - Collected params: [('q', "['pfizer', 'astrazeneca']"), ('since', '2021-06-19'), ('until', '2021-06-21'), ('tweet_mode', 'extended'), ('lang', 'en'), ('oauth_nonce', '80562232051496740461624317528'), ('oauth_timestamp', '1624317528'), ('oauth_version', '1.0'), ('oauth_signature_method', 'HMAC-SHA1')

---

In [24]:
import time
start = time.time()
output = []
for tweet in tweets_list:
    text = tweet._json["full_text"]
    #print(text) 
    # https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets           
    # "geo": null,"coordinates": null,"place": null,"contributors": null,
    # "is_quote_status": false,"retweet_count": 988,"favorite_count": 3875,
    # "favorited": false,"retweeted": false,"possibly_sensitive": false,"lang": "en"
    logger.debug(f"full_text: '{text}'")
    favourite_count = tweet.favorite_count
    retweet_count = tweet.retweet_count
    created_at = tweet.created_at
    
    line = {'text' : text, 'favourite_count' : favourite_count, 'retweet_count' : retweet_count, 'created_at' : created_at}
    output.append(line)
    logger.info(f"Append list length : { len(output)}")
end = time.time()
logger.info(f"elapsed_time: '{end - start}'")

 Scott Morrison and Greg Hunt said no and put all their…'
2021-06-21 16:01:13,058 - INFO - Append list length : 6471
2021-06-21 16:01:13,066 - DEBUG - full_text: 'RT @JohnRHewson: For the record, how many of our political leaders had Pfizer rather than AstraZeneca?'
2021-06-21 16:01:13,071 - INFO - Append list length : 6472
2021-06-21 16:01:13,072 - DEBUG - full_text: 'RT @JohnRHewson: Talk about making the best of a bad call - claiming that those aged 50-60 who were hesitant about getting the AstraZeneca…'
2021-06-21 16:01:13,073 - INFO - Append list length : 6473
2021-06-21 16:01:13,077 - DEBUG - full_text: 'RT @Fiona_M_Russell: I am over 50 &amp; had my first AstraZeneca vaccine dose &amp; plan on getting my second dose next month. Two doses provide &gt;9…'
2021-06-21 16:01:13,084 - INFO - Append list length : 6474
2021-06-21 16:01:13,091 - DEBUG - full_text: 'RT @JohnRHewson: For the record, how many of our political leaders had Pfizer rather than AstraZeneca?'
2021-06-21 16:01:13,

In [25]:
output

19, 43)},
 {'text': 'Good morning #YXE - the walk-in clinic at Prairieland will have AstraZeneca (2nd dose) and Moderna (1st dose) today. The drive-thru has Pfizer (1st or 2nd dose). For the latest info visit https://t.co/Mh44eNmrrF.',
  'favourite_count': 43,
  'retweet_count': 27,
  'created_at': datetime.datetime(2021, 6, 20, 14, 19, 36)},
 {'text': 'RT @NorthLincsCNews: Walk-in Covid-19 jabs are available at the Crosby ONE Centre in a pop-up vaccination clinic on Tuesday 22 June.\nNo app…',
  'favourite_count': 0,
  'retweet_count': 4,
  'created_at': datetime.datetime(2021, 6, 20, 14, 19, 26)},
 {'text': 'RT @DrEricDing: 13) Vaccine effectiveness against #DeltaVariant according to @PHE_uk — averaging AstraZeneca &amp; Pfizer together (even if dif…',
  'favourite_count': 0,
  'retweet_count': 94,
  'created_at': datetime.datetime(2021, 6, 20, 14, 19, 4)},
 {'text': 'RT @JoannaBlythman: Official data, obtained by the Sunday Times, show that the MHRA received 2,734 reports of period 

Finally, we convert the `output` list to a `pandas DataFrame` and we store results.

In [26]:
df = pd.DataFrame(output)


In [31]:
df.to_csv('output2.csv', mode='a', header = False, index = False )
#df.to_csv('output.csv')

In [32]:
df.shape

(6510, 4)

In [33]:
df.head(10)

Unnamed: 0,text,favourite_count,retweet_count,created_at
0,@breakfasttv I received the Astrazeneca vaccin...,0,0,2021-06-20 23:59:37
1,#Pfizer\n#AstraZeneca\n#Moderna\n#JohnsonAndJo...,3,5,2021-06-20 23:59:33
2,RT @TimWattsMP: Can you believe this?\n\nThe M...,0,212,2021-06-20 23:58:46
3,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:57:54
4,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:56:29
5,RT @DrEricDing: 13) Vaccine effectiveness agai...,0,94,2021-06-20 23:54:35
6,RT @james00000001: WHO TO BELIEVE?!\nSQott or ...,0,32,2021-06-20 23:54:18
7,RT @ADHDadultlife: Has anyone in the UK had a ...,0,1,2021-06-20 23:53:18
8,RT @TimWattsMP: Can you believe this?\n\nThe M...,0,212,2021-06-20 23:51:49
9,Not close to what is really happening with the...,0,0,2021-06-20 23:51:45


In [10]:
df2 = pd.read_csv('output2.csv', names=['text',	'favourite_count',	'retweet_count','created_at'])
df2.head(5)

Unnamed: 0,text,favourite_count,retweet_count,created_at
0,@breakfasttv I received the Astrazeneca vaccin...,0,0,2021-06-20 23:59:37
1,#Pfizer\n#AstraZeneca\n#Moderna\n#JohnsonAndJo...,3,5,2021-06-20 23:59:33
2,RT @TimWattsMP: Can you believe this?\n\nThe M...,0,212,2021-06-20 23:58:46
3,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:57:54
4,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:56:29


In [37]:
#df = pd.DataFrame(output)
df.to_csv('output3.csv', mode='a', header=True, index = False)

In [11]:
df3 = pd.read_csv('output3.csv')
df3.head(5)

Unnamed: 0,text,favourite_count,retweet_count,created_at
0,@breakfasttv I received the Astrazeneca vaccin...,0,0,2021-06-20 23:59:37
1,#Pfizer\n#AstraZeneca\n#Moderna\n#JohnsonAndJo...,3,5,2021-06-20 23:59:33
2,RT @TimWattsMP: Can you believe this?\n\nThe M...,0,212,2021-06-20 23:58:46
3,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:57:54
4,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:56:29


In [12]:
df3.shape

(6510, 4)

In [14]:
#nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

In [17]:
df3['rating'] = df3['text'].apply(sid.polarity_scores)

In [18]:
df3

Unnamed: 0,text,favourite_count,retweet_count,created_at,rating
0,@breakfasttv I received the Astrazeneca vaccin...,0,0,2021-06-20 23:59:37,"{'neg': 0.049, 'neu': 0.752, 'pos': 0.199, 'co..."
1,#Pfizer\n#AstraZeneca\n#Moderna\n#JohnsonAndJo...,3,5,2021-06-20 23:59:33,"{'neg': 0.0, 'neu': 0.603, 'pos': 0.397, 'comp..."
2,RT @TimWattsMP: Can you believe this?\n\nThe M...,0,212,2021-06-20 23:58:46,"{'neg': 0.124, 'neu': 0.876, 'pos': 0.0, 'comp..."
3,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:57:54,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:56:29,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
...,...,...,...,...,...
6505,@Fab485617452 @JohnRHewson I'm 73 and my wife ...,6,0,2021-06-19 00:02:33,"{'neg': 0.0, 'neu': 0.927, 'pos': 0.073, 'comp..."
6506,RT @kyle_minogue: @bjornradstrom Pfizer - 12-1...,0,2,2021-06-19 00:02:21,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
6507,"RT @JohnRHewson: For the record, how many of o...",0,890,2021-06-19 00:01:32,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
6508,"RT @JohnRHewson: For the record, how many of o...",0,890,2021-06-19 00:00:27,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."


In [23]:
df3['negative_nltk'] = df3['rating'].apply(lambda x : x['neg'])
df3['positive_nltk'] = df3['rating'].apply(lambda x : x['pos'])
df3['neutral_nltk'] = df3['rating'].apply(lambda x : x['neu'])
df3['ncompound_nltk'] = df3['rating'].apply(lambda x : x['compound'])

df3 = df3.drop('rating', axis=1)
df3

Unnamed: 0,text,favourite_count,retweet_count,created_at,negative_nltk,positive_nltk,neutral_nltk,ncompound_nltk
0,@breakfasttv I received the Astrazeneca vaccin...,0,0,2021-06-20 23:59:37,0.049,0.199,0.752,0.7793
1,#Pfizer\n#AstraZeneca\n#Moderna\n#JohnsonAndJo...,3,5,2021-06-20 23:59:33,0.000,0.397,0.603,0.7639
2,RT @TimWattsMP: Can you believe this?\n\nThe M...,0,212,2021-06-20 23:58:46,0.124,0.000,0.876,-0.4019
3,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:57:54,0.000,0.000,1.000,0.0000
4,RT @DrEricDing: 5) One dose of the vaccine is ...,0,270,2021-06-20 23:56:29,0.000,0.000,1.000,0.0000
...,...,...,...,...,...,...,...,...
6505,@Fab485617452 @JohnRHewson I'm 73 and my wife ...,6,0,2021-06-19 00:02:33,0.000,0.073,0.927,0.3818
6506,RT @kyle_minogue: @bjornradstrom Pfizer - 12-1...,0,2,2021-06-19 00:02:21,0.000,0.000,1.000,0.0000
6507,"RT @JohnRHewson: For the record, how many of o...",0,890,2021-06-19 00:01:32,0.000,0.000,1.000,0.0000
6508,"RT @JohnRHewson: For the record, how many of o...",0,890,2021-06-19 00:00:27,0.000,0.000,1.000,0.0000
