## Importing necessary library

In [None]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
import itertools
import plotly.graph_objects as go
from datetime import datetime

## Creating a data frame called "df" for storing the data to be scraped.  Here, "2019 Elections" was the search keyword"

In [None]:
df = pd.DataFrame(itertools.islice(sntwitter.TwitterSearchScraper(
    '"2019 elections"').get_items(), 5000000))

## Reading the column names from the dataframe to check the attributes

In [None]:
df.columns

## Calculate the time for scraping the 5000000 tweets

Here our search parameters are modified to search for tweets around Abuja within __2017-01-01 to 2021-10-23__ using the keyword __2019 elections__. 

__NB:__ we set the result to be returned to __5000000__ so we can get as much as possible results (tweets).

In [None]:
# Set start time
start_time = datetime.now()
#Creating dataframe called 'data' and storing the tweets
data = pd.DataFrame(itertools.islice(sntwitter.TwitterSearchScraper(
    '"2019 elections near:Abuja since:2017-01-01 until:2021-10-23"').get_items(), 5000000))
# Set end time
end_time = datetime.now()
#Printing the time duration for scraping these tweets
print('Duration: {}'.format(end_time - start_time))

In [None]:
#keeping only date, id, content, user, and url and stored into dataframe called 'df'
df = data[['date', 'id', 'content', 'username', 'url']]

In [None]:
# If you don't have transformers library installed before, kindly install it using the command:
# !pip install transformers.

# PS: Remember to remove the leading # in front of "pip install transformers"

In [None]:
#Importing the pipeline from Transformers.
from transformers import pipeline
sentiment_classifier = pipeline('sentiment-analysis')

In [None]:
#Taking only 1000000 (20%) records and creating new dataframe called df1
df1 = df.head(1000000)
# Passing the tweets into the sentiment pipeline and extracting the sentiment score and label
df1 = (df1.assign(sentiment = lambda x: x['content'].apply(lambda s: sentiment_classifier(s)))
.assign(
label = lambda x: x['sentiment'].apply(lambda s: (s[0]['label'])),
score = lambda x: x['sentiment'].apply(lambda s: (s[0]['score']))))
df1.head()

In [None]:
#checking the 1000th tweet, to check the sentiment label whether it is "positive" or “negative”
df1['content'][1000]

In [None]:
# Visualizing the sentiments
fig = go.Figure()
fig.add_trace(go.Bar(x = df1["score"],
y = df1["label"],
orientation = "h")) #set orientation to horizontal because we want to flip the x and y-axis
fig.update_layout(plot_bgcolor = "white")
fig.show()

In [None]:
# Taking the entire 5000000 (100%) records and creating new dataframe called df1
df2 = df
# Passing the tweets into the sentiment pipeline and extracting the sentiment score and label
df2 = (df2.assign(sentiment = lambda x: x['content'].apply(lambda s: sentiment_classifier(s)))
.assign(
label = lambda x: x['sentiment'].apply(lambda s: (s[0]['label'])),
score = lambda x: x['sentiment'].apply(lambda s: (s[0]['score']))))
df2.head()

In [None]:
#Visualizing the sentiments
fig1 = go.Figure()
fig1.add_trace(go.Bar(x = df2["Sentiment score"],
y = df2["Sentiment label"],
orientation = "h")) #set orientation to horizontal because we want to flip the x and y-axis
fig1.update_layout(plot_bgcolor = "white")
fig1.show()

In [None]:
df2.to_csv('Abj-Elect-Tweets-Sentiment.csv', index=True)

In [None]:
df1.to_csv('Abj-Elect-Tweets-Sentiment1.csv', index=True)