<a href="https://colab.research.google.com/github/Tee-A/Twitter_Scraping_Using_SnScrape_and_Sentiment-Analysis_Demo/blob/main/Twitter_Scraping_and_Sentiment_Analysis_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing the Snscrape Library

In [1]:
!pip install snscrape



# Importing the necessary Libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
import snscrape.modules.twitter as sntwitter
import plotly.graph_objects as go
from datetime import datetime

In [3]:
# Noting the time it takes to scrape 50000 tweets

start_time = datetime.now()

#Creating dataframe called 'data' and storing the tweets from May 1st 2021 to 30th Juy 2021 for 'Vaccine'
tweets = pd.DataFrame(itertools.islice(sntwitter.TwitterSearchScraper('"#vaccine since:2021-05-01 until:2021-07-30"').get_items(),50000 ))

end_time = datetime.now()

# Printing the time taken to scrape these tweets
print('Duuration: {} '.format(end_time - start_time))

Duuration: 0:14:20.827540 


In [4]:
tweets.head()

Unnamed: 0,url,date,content,id,username,outlinks,outlinksss,tcooutlinks,tcooutlinksss
0,https://twitter.com/datrufwilfreeu/status/1420...,2021-07-29 23:59:59+00:00,Act Now America: Say NO To Vaccine Passports h...,1420896922278240256,datrufwilfreeu,[https://standforhealthfreedom.com/action/say-...,https://standforhealthfreedom.com/action/say-n...,[https://t.co/elL62YMo7J],https://t.co/elL62YMo7J
1,https://twitter.com/ignacio1981sala/status/142...,2021-07-29 23:59:59+00:00,So they are forcing my sister-in-law to take t...,1420896922181804032,ignacio1981sala,[],,[],
2,https://twitter.com/Bdj68Brad/status/142089692...,2021-07-29 23:59:59+00:00,@DAVEWILLISON1 Medical autonomy is a right Dav...,1420896920453599234,Bdj68Brad,[],,[],
3,https://twitter.com/DrSallan_/status/142089691...,2021-07-29 23:59:58+00:00,@_AlexHirsch The fact people are arguing under...,1420896917958057984,DrSallan_,[],,[],
4,https://twitter.com/einerdrake/status/14208969...,2021-07-29 23:59:58+00:00,Find out which vaccine is seeing the most brea...,1420896917316272130,einerdrake,[https://www.foxla.com/news/over-25-of-new-cov...,https://www.foxla.com/news/over-25-of-new-covi...,[https://t.co/prEPEBH9gV],https://t.co/prEPEBH9gV


In [5]:
tweets.shape, tweets.columns

((50000, 9),
 Index(['url', 'date', 'content', 'id', 'username', 'outlinks', 'outlinksss',
        'tcooutlinks', 'tcooutlinksss'],
       dtype='object'))

In [6]:
#keeping only random 10000 records of date, id, content and user columns
tweets_df = tweets[['date', 'id', 'content', 'username']].sample(frac = 0.2, random_state=4097).reset_index(drop=True)

In [7]:
tweets_df.shape

(10000, 4)

In [8]:
tweets_df.head()

Unnamed: 0,date,id,content,username
0,2021-07-29 20:19:36+00:00,1420841461155729410,@tezzaminds @GregHuntMP My impression of NSW i...,DarkMatterzine
1,2021-07-29 23:15:27+00:00,1420885713009135625,@POTUS But didn’t you say people with the vacc...,Terry_Roades
2,2021-07-29 23:00:17+00:00,1420881897572511747,@WaynesWhirled I've had covid and it was not a...,AmyBeePhoenix
3,2021-07-29 23:55:36+00:00,1420895817385263106,Joe and his minions used a false report coming...,SCWOFTX
4,2021-07-29 22:06:55+00:00,1420868466672902146,This ‘personal responsibility’ rhetoric is giv...,DisabilityStor1


# Now we would figure out the public sentiments towards the vaccine

In [9]:
# We use a pre trained model from the Hugging Face Transformers Library to perform sentiment analysis
# Installing the Library

!pip install transformers



## Sentiment Analysis

In [10]:
#Importing pipeline from Transformers

from transformers import pipeline
sentiment_classifier = pipeline('sentiment-analysis')

In [11]:
# Passing the tweets into the sentiment pipeline and extracting the sentiment score and label

tweets_df = (tweets_df.assign(sentiment = lambda x: x['content'].apply(lambda s: sentiment_classifier(s)))
.assign(label = lambda x: x['sentiment'].apply(lambda s: (s[0]['label'])),
        score = lambda x: x['sentiment'].apply(lambda s: (s[0]['score']))
        ))

In [12]:
# Checking Top 20 of our new dataframe
tweets_df.head(20)

Unnamed: 0,date,id,content,username,sentiment,label,score
0,2021-07-29 20:19:36+00:00,1420841461155729410,@tezzaminds @GregHuntMP My impression of NSW i...,DarkMatterzine,"[{'label': 'NEGATIVE', 'score': 0.997952520847...",NEGATIVE,0.997953
1,2021-07-29 23:15:27+00:00,1420885713009135625,@POTUS But didn’t you say people with the vacc...,Terry_Roades,"[{'label': 'NEGATIVE', 'score': 0.999254345893...",NEGATIVE,0.999254
2,2021-07-29 23:00:17+00:00,1420881897572511747,@WaynesWhirled I've had covid and it was not a...,AmyBeePhoenix,"[{'label': 'NEGATIVE', 'score': 0.997293174266...",NEGATIVE,0.997293
3,2021-07-29 23:55:36+00:00,1420895817385263106,Joe and his minions used a false report coming...,SCWOFTX,"[{'label': 'NEGATIVE', 'score': 0.999765217304...",NEGATIVE,0.999765
4,2021-07-29 22:06:55+00:00,1420868466672902146,This ‘personal responsibility’ rhetoric is giv...,DisabilityStor1,"[{'label': 'NEGATIVE', 'score': 0.995846033096...",NEGATIVE,0.995846
5,2021-07-29 22:29:05+00:00,1420874044442820613,@MikeTheTiger You should let the university le...,JsonCulverhouse,"[{'label': 'NEGATIVE', 'score': 0.778448998928...",NEGATIVE,0.778449
6,2021-07-29 21:22:25+00:00,1420857265699966976,Me &amp; @l0ve_live_TY really gonna have to ge...,_Katrionaaa,"[{'label': 'NEGATIVE', 'score': 0.997768938541...",NEGATIVE,0.997769
7,2021-07-29 23:44:27+00:00,1420893010590535680,"Vaccinated people. Even worse, the vaxxed are ...",johnflipside,"[{'label': 'NEGATIVE', 'score': 0.998426556587...",NEGATIVE,0.998427
8,2021-07-29 23:44:11+00:00,1420892945440509954,These @GOP governors prohibiting masking and p...,sophiesmother95,"[{'label': 'NEGATIVE', 'score': 0.997048377990...",NEGATIVE,0.997048
9,2021-07-29 22:02:34+00:00,1420867370386657280,@ChipMulligan930 @cnnbrk That's fine - I was s...,TaintedSaint2,"[{'label': 'POSITIVE', 'score': 0.928253114223...",POSITIVE,0.928253


# Checking the Tweets randomly & analyzing the sentiments

In [13]:
# Checking the tweets randomly and analyizng the sentiments
tweets_df['content'][16]

'@TheAnuhart @JamesSurowiecki Again, I understand that, but when we are facing such a significant level of vaccine hesitancy, being prepared to address things like this that appear incongruent and misleading to those who are on the fence about the vaccine is more helpful than just handwaving it away.'

In [14]:
tweets_df['content'][9]

'@ChipMulligan930 @cnnbrk That\'s fine - I was specifically told by my doctor NOT to get the vaccine at this point due to medical issues. My youngest had a severe reaction to the first one and can\'t get the 2nd. You\'re not going to "force" us to do a damn thing (and I probably cook better at home anyway).'

In [15]:
tweets_df['content'][13]

'This your future?\nVaccine mandates are unacceptable. Contact your elected representatives now. Tweet. Tell your employer no. Do something!\n#NoVaccineForMe #NoMoreMasks #NoMoreLockdowns #masksdontwork #FireFauci  #NoVaccinePassports #EndTheLockdowns #Trump #FoxNews #NoVaccineForMe https://t.co/12vWkiSd8s'

## Visualizing the sentiments

In [16]:
fig = go.Figure()

fig.add_trace(go.Bar(x = tweets_df["score"],
                    y = tweets_df["label"],
                orientation = "h")) #set orientation to horizontal because we want to flip the x and y-axis

fig.update_layout(plot_bgcolor = "black")              
                                #apply our custom category order

fig.show()