`NB`: Twitter access tokens are needed to run this notebook

In [1]:
import numpy as np
import pandas as pd
import tweepy
import json
import requests
from datetime import datetime

In [2]:
pd.set_option ('display.max_colwidth', None)
pd.set_option ('display.max_columns',  None)

In [3]:
## Reading api and access tokens
with open ('./keys.json', 'r') as keys:
  tokens = json.load (keys)


api_key      = tokens ['api_key']
access_token = tokens ['access_token']
bearer_token = tokens ['bearer_token']

access_token_secret = tokens ['access_token_secret']
api_key_secret      = tokens ['api_key_secret']

In [4]:
## Instantiate the client
client = tweepy.Client(
  bearer_token=bearer_token,
  consumer_key=api_key,
  consumer_secret=api_key_secret,
  access_token=access_token,
  access_token_secret=access_token_secret,
  return_type=requests.Response,
  wait_on_rate_limit=False
)

In [5]:
hashtags = ['GetYourPVC', 'election2023', 'NigeriaElection', 'ShowYourPVC','2023Elections', 'RevolutionNow', 'WeCantContinueLikeThis', 'Nigeria Presidential']

In [6]:
tweets = []

for hashtag in hashtags:
  next_token = None
  while next_token != "":
    try:
      response = client.search_recent_tweets(
        hashtag,
        end_time=None,
        expansions='author_id',
        max_results=100,
        media_fields=None,
        next_token=next_token,
        place_fields=None,
        poll_fields=None,
        since_id=None,
        start_time=None,
        tweet_fields=['id','author_id','text','created_at','public_metrics'],
        until_id=None,
        user_fields=['username', 'location'],
        user_auth=True,
      )
      data       = response.json ()
      tweets = [*tweets, *data['data']]
      next_token = data ['meta'].get ('next_token', "")
    except (tweepy.errors.TooManyRequests):
      start = datetime.now ()
      while (datetime.now () - start).seconds < (16 * 60):
        ## Pass process for 16mins
        continue

      print ('continue')

      ## Continue after 16mins
      continue
      

continue
continue


In [7]:
for i, tweet in enumerate (tweets):
  public_metrics = tweet['public_metrics']
  del tweet ['public_metrics']
  tweets [i] = {**tweet, **public_metrics}

In [8]:
len (tweets)

40131

In [9]:
df = pd.DataFrame (tweets)
df.duplicated ('text').sum ()

29241

In [10]:
df.head ()

Unnamed: 0,id,text,author_id,created_at,retweet_count,reply_count,like_count,quote_count,withheld
0,1525622531327242242,"RT @Flawlex2: If you came out for EndSars protests and marching, then this is the right time to march again and make you march count. Let's…",1573327596,2022-05-14T23:42:10.000Z,22,0,0,0,
1,1525620603805392897,"RT @Flawlex2: If you came out for EndSars protests and marching, then this is the right time to march again and make you march count. Let's…",1607860832,2022-05-14T23:34:31.000Z,22,0,0,0,
2,1525616708744105989,"RT @Flawlex2: If you came out for EndSars protests and marching, then this is the right time to march again and make you march count. Let's…",1254880591511719936,2022-05-14T23:19:02.000Z,22,0,0,0,
3,1525615669265543168,RT @obilo1: Hear what late Dora Akunyeli said about Peter Obi.. \n\n#GetYourPVC\n#peterObi2023 https://t.co/fDOP2QvfzX,270844470,2022-05-14T23:14:54.000Z,50,0,0,0,
4,1525615294483468290,"RT @Flawlex2: If you came out for EndSars protests and marching, then this is the right time to march again and make you march count. Let's…",1256238250441793536,2022-05-14T23:13:25.000Z,22,0,0,0,


In [11]:
# df_2.drop_duplicates ('text').to_csv ('../Data/data_process_2.csv')

In [12]:
# df.drop_duplicates ('text').to_csv ('../../Temp/150522_TWEETS.csv', index=False)

In [21]:
df.shape

(10890, 9)

In [13]:
final_df = pd.read_csv ('../../Data/NG_ELECTION_TWEETS.csv')

In [25]:
final_df.head()

Unnamed: 0,id,text,created_at,author_id,retweet_count,reply_count,like_count,quote_count
0,1507404536511565825,"RT @AtedoPeterside: Nobody ""says"" it better than our GoAmbassador @mrmacaronii. Take matters into your own hands by acquiring your PVC now.…",2022-03-25T17:10:22.000Z,1220591726731124736,24,0,0,0
1,1507403883924103170,RT @1dernet: Getting Our PVC And Registering To Vote Come 2023 Is The Nonviolent Part To Take.\n#GetYourPVC \n@DeleFarotimi \n@Tsngcampaign ht…,2022-03-25T17:07:46.000Z,1169272207014543360,19,0,0,0
2,1507403359040544775,RT @Chude__: Hear what late Dora Akunyeli said about Peter Obi.. \n\n#GetYourPVC\n#Competense2023\n#peterObi2023 https://t.co/L0u1FqY4dB,2022-03-25T17:05:41.000Z,1387165423083433985,72,0,0,0
3,1507402890121465859,"RT @obi_Nwosu: Do you want a better Nigeria? Who is your dream president? Please note, You will be needing a voters card to vote for him or…",2022-03-25T17:03:49.000Z,1119585791146168327,29,0,0,0
4,1507400790285111299,#GoNigeria #GetYourPVC https://t.co/Oq5ZqTE8fg,2022-03-25T16:55:29.000Z,1313043015917801472,0,0,0,0


In [15]:
df.drop_duplicates ('text', inplace=True)

In [16]:
final = pd.concat ([final_df, df], axis='rows')

In [26]:
final.shape

(23437, 9)

In [24]:
final.head()

Unnamed: 0,id,text,created_at,author_id,retweet_count,reply_count,like_count,quote_count,withheld
0,1507404536511565825,"RT @AtedoPeterside: Nobody ""says"" it better than our GoAmbassador @mrmacaronii. Take matters into your own hands by acquiring your PVC now.…",2022-03-25T17:10:22.000Z,1220591726731124736,24,0,0,0,
1,1507403883924103170,RT @1dernet: Getting Our PVC And Registering To Vote Come 2023 Is The Nonviolent Part To Take.\n#GetYourPVC \n@DeleFarotimi \n@Tsngcampaign ht…,2022-03-25T17:07:46.000Z,1169272207014543360,19,0,0,0,
2,1507403359040544775,RT @Chude__: Hear what late Dora Akunyeli said about Peter Obi.. \n\n#GetYourPVC\n#Competense2023\n#peterObi2023 https://t.co/L0u1FqY4dB,2022-03-25T17:05:41.000Z,1387165423083433985,72,0,0,0,
3,1507402890121465859,"RT @obi_Nwosu: Do you want a better Nigeria? Who is your dream president? Please note, You will be needing a voters card to vote for him or…",2022-03-25T17:03:49.000Z,1119585791146168327,29,0,0,0,
4,1507400790285111299,#GoNigeria #GetYourPVC https://t.co/Oq5ZqTE8fg,2022-03-25T16:55:29.000Z,1313043015917801472,0,0,0,0,


In [17]:

## Writing data out to a temporary folder.
final.drop_duplicates ('text', inplace=True)
final.to_csv ('../../Data/NG_ELECTION_TWEETS.csv')

In [19]:
# import os


# file_ls = os.listdir ('../../Temp')

# ## Check if excel file containing all tweets exist, if not create a new data frame else read the existing dataframe
# tweets_df = pd.DataFrame () if 'NG_ELECTION_TWEETS.csv' not in file_ls else pd.read_csv ('../Data/NG_ELECTION_TWEETS.csv')

# for file in file_ls:
#   if file.endswith ('.csv') and 'CLEANED' not in file and not file.startswith ('NG_ELECTION'):
#     tweets_df = pd.concat ([tweets_df, pd.read_csv (f'../Data/{file}')])


# # tweets_df.drop ('Unnamed: 0', axis=1, inplace=True)
# tweets_df.drop_duplicates ('text', inplace=True)
# tweets_df.to_csv ('../Data/NG_ELECTION_TWEETS.csv', index=False)
