# Twitter Pulse Checker

![preview](https://cdn.pixabay.com/photo/2013/06/07/09/53/twitter-117595_960_720.png)

In [None]:
#Connecting to dataset
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# import lots of stuff
import sys
import os
import re
import tweepy
# import BERT
from tweepy import OAuthHandler
from textblob import TextBlob

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from IPython.display import clear_output
from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns
% matplotlib inline

from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS

In [None]:
# install Flair
!pip install --upgrade git+https://github.com/flairNLP/flair.git

clear_output()

In [None]:
# import Flair stuff
from flair.data import Sentence
from flair.models import SequenceTagger

tagger = SequenceTagger.load('ner')

clear_output()

In [None]:
#import Flair Classifier
from flair.models import TextClassifier

classifier = TextClassifier.load('en-sentiment')

clear_output()

### Authenticate with Twitter API

In [None]:
#@title Enter Twitter Credentials
TWITTER_KEY = '87gqIlmDpPVS3OJMoEYsMiNYU' #@param {type:"string"}
TWITTER_SECRET_KEY = '4wjZYPW9P7WM74Aq1gpd6v5ylKvdqWpSh2mCDsInH5OkfxK43e' #@param {type:"string"}

In [None]:
# Authenticate
auth = tweepy.AppAuthHandler(TWITTER_KEY, TWITTER_SECRET_KEY)

api = tweepy.API(auth, wait_on_rate_limit=True,
				   wait_on_rate_limit_notify=True)

if (not api):
    print ("Can't Authenticate")
    sys.exit(-1)


###Lets start scraping!

The Twitter scrape code here was taken from: https://bhaskarvk.github.io/2015/01/how-to-use-twitters-search-rest-api-most-effectively.

My thanks to the author.

We need to provide a Search term and a Max Tweet count. Twitter lets you to request 45,000 tweets every 15 minutes  so setting something below that works.

In [None]:
#@title Twitter Search API Inputs
#@markdown ### Enter Search Query:
searchQuery = 'Petrol' #@param {type:"string"}
#@markdown ### Enter Max Tweets To Scrape:
#@markdown #### The Twitter API Rate Limit (currently) is 45,000 tweets every 15 minutes.
maxTweets = 21800 #@param {type:"slider", min:0, max:45000, step:100}
Filter_Retweets = False #@param {type:"boolean"}

tweetsPerQry = 100  # this is the max the API permits
tweet_lst = []

if Filter_Retweets:
  searchQuery = searchQuery + ' -filter:retweets'  # to exclude retweets

# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId = None

# If results only below a specific ID are, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id = -10000000000

tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
while tweetCount < maxTweets:
    try:
        if (max_id <= 0):
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang="en")
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        lang="en", since_id=sinceId)
        else:
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        lang="en", max_id=str(max_id - 1))
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        lang="en", max_id=str(max_id - 1),
                                        since_id=sinceId)
        if not new_tweets:
            print("No more tweets found")
            break
        for tweet in new_tweets:
          if hasattr(tweet, 'reply_count'):
            reply_count = tweet.reply_count
          else:
            reply_count = 0
          if hasattr(tweet, 'retweeted'):
            retweeted = tweet.retweeted
          else:
            retweeted = "NA"
            
          # fixup search query to get topic
          topic = searchQuery[:searchQuery.find('-')].capitalize().strip()
          
          # fixup date
          tweetDate = tweet.created_at.date()
          
          tweet_lst.append([tweetDate, topic, 
                      tweet.id, tweet.user.screen_name, tweet.user.name, tweet.text, tweet.favorite_count, 
                      reply_count, tweet.retweet_count, retweeted])

        tweetCount += len(new_tweets)
        print("Downloaded {0} tweets".format(tweetCount))
        max_id = new_tweets[-1].id
    except tweepy.TweepError as e:
        # Just exit if any error
        print("some error : " + str(e))
        break

clear_output()
print("Downloaded {0} tweets".format(tweetCount))

Downloaded 1300 tweets


##Data Sciencing

Let's load the tweet data into a Pandas Dataframe so we can do Data Science to it. 

The data is also saved down in a tweets.csv file in case you want to download it.

In [None]:
pd.set_option('display.max_colwidth', -1)

# load it into a pandas dataframe
tweet_df = pd.DataFrame(tweet_lst, columns=['tweet_dt', 'topic', 'id', 'username', 'name', 'tweet', 'like_count', 'reply_count', 'retweet_count', 'retweeted'])
tweet_df.to_csv('tweets.csv')
tweet_df.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,tweet_dt,topic,id,username,name,tweet,like_count,reply_count,retweet_count,retweeted
0,2021-03-08,Petrol,1368803101378048001,YootDigmeMusic,Yoot Digme 🌈,@Ben_Geman @JigarShahDC 6/ you must say 25% is a win:\n\nbattery production times costly than digging for oil.\n\nfor 5… https://t.co/rz0WyOcBk9,0,0,0,False
1,2021-03-08,Petrol,1368803070742790146,YootDigmeMusic,Yoot Digme 🌈,"@Ben_Geman @JigarShahDC 5/ (!) 128g CO2 EV &gt; 120g CO2 petrol car\n\nBattery for EV is from lithium, nickel, cobalt, i… https://t.co/iO1uwYZRC0",0,0,0,False
2,2021-03-08,Petrol,1368803043026886664,YootDigmeMusic,Yoot Digme 🌈,@Ben_Geman @JigarShahDC 4/ The numbers of CO2 for electric cars (EVs)\n\nSummary:\n\nPetrol car = 120g CO2/km\n\nEV = 0.2… https://t.co/HjL3fegULR,0,0,0,False
3,2021-03-08,Petrol,1368802947413475331,YootDigmeMusic,Yoot Digme 🌈,"@Ben_Geman @JigarShahDC 1/ There’s an illusion that you decarbonize with EV, why?\n\nCalifornia - 225g CO2 /kWh avera… https://t.co/piWYAIleQW",0,0,0,False
4,2021-03-08,Petrol,1368802863359664132,alaudin_osman,Alaudin Osman,Answers sought for The Conversation @CapitalFMMw\nWhy are convoys of our president and Vice President driven so fast… https://t.co/GiNAQAdXsb,0,0,0,False


Unfortunately Twitter does not let you filter by date when you request tweets. However, we can do this at this stage. I have set it up to pull yesterday + todays Tweets by default.

In [None]:
#@title Filter By Date Range
today = datetime.now().date()
yesterday = today - timedelta(1)

start_dt = '2021-03-29' #@param {type:"date"}
end_dt = '2021-03-11' #@param {type:"date"}

if start_dt == '':
  start_dt = yesterday
else:
  start_dt = datetime.strptime(start_dt, '%Y-%m-%d').date()

if end_dt == '':
  end_dt = today
else:
  end_dt = datetime.strptime(end_dt, '%Y-%m-%d').date()


tweet_df = tweet_df[(tweet_df['tweet_dt'] >= start_dt) 
                    & (tweet_df['tweet_dt'] <= end_dt)]
tweet_df.shape

(1300, 10)

## NER and Sentiment Analysis

Now let's do some NER / Sentiment Analysis. We will use the Flair library: https://github.com/zalandoresearch/flair

###NER

Previosuly, we extracted, and then appended the Tags as separate rows in our dataframe. This helps us later on to Group by Tags.

We also create a new 'Hashtag' Tag as Flair does not recognize it and it's a big one in this context.

### Sentiment Analysis

We use the Flair Classifier to get Polarity and Result and add those fields to our dataframe.

**Warning:** This can be slow if you have lots of tweets.

In [None]:
# predict NER
nerlst = []

for index, row in tqdm(tweet_df.iterrows(), total=tweet_df.shape[0]):
  cleanedTweet = row['tweet'].replace("#", "")
  sentence = Sentence(cleanedTweet, use_tokenizer=True)
  
  # predict NER tags
  tagger.predict(sentence)

  # get ner
  ners = sentence.to_dict(tag_type='ner')['entities']
  
  # predict sentiment
  classifier.predict(sentence)
  
  label = sentence.labels[0]
  response = {'result': label.value, 'polarity':label.score}
  
  # get hashtags
  hashtags = re.findall(r'#\w+', row['tweet'])
  if len(hashtags) >= 1:
    for hashtag in hashtags:
      ners.append({ 'type': 'Hashtag', 'text': hashtag })
  
  for ner in ners:
    adj_polarity = response['polarity']
    if response['result'] == 'NEGATIVE':
      adj_polarity = response['polarity'] * -1
    try:
      ner['type']
    except:
      ner['type'] = ''      
    nerlst.append([ row['tweet_dt'], row['topic'], row['id'], row['username'], 
                   row['name'], row['tweet'], ner['type'], ner['text'], response['result'], 
                   response['polarity'], adj_polarity, row['like_count'], row['reply_count'], 
                  row['retweet_count'] ])

clear_output()

In [None]:
df_ner = pd.DataFrame(nerlst, columns=['tweet_dt', 'topic', 'id', 'username', 'name', 'tweet', 'tag_type', 'tag', 'sentiment', 'polarity', 
                                       'adj_polarity','like_count', 'reply_count', 'retweet_count'])
df_ner

Unnamed: 0,tweet_dt,topic,id,username,name,tweet,tag_type,tag,sentiment,polarity,adj_polarity,like_count,reply_count,retweet_count
0,2021-03-08,Petrol,1368803101378048001,YootDigmeMusic,Yoot Digme 🌈,@Ben_Geman @JigarShahDC 6/ you must say 25% is a win:\n\nbattery production times costly than digging for oil.\n\nfor 5… https://t.co/rz0WyOcBk9,,Geman,POSITIVE,0.926951,0.926951,0,0,0
1,2021-03-08,Petrol,1368803101378048001,YootDigmeMusic,Yoot Digme 🌈,@Ben_Geman @JigarShahDC 6/ you must say 25% is a win:\n\nbattery production times costly than digging for oil.\n\nfor 5… https://t.co/rz0WyOcBk9,,JigarShahDC,POSITIVE,0.926951,0.926951,0,0,0
2,2021-03-08,Petrol,1368803070742790146,YootDigmeMusic,Yoot Digme 🌈,"@Ben_Geman @JigarShahDC 5/ (!) 128g CO2 EV &gt; 120g CO2 petrol car\n\nBattery for EV is from lithium, nickel, cobalt, i… https://t.co/iO1uwYZRC0",,Geman,POSITIVE,0.829397,0.829397,0,0,0
3,2021-03-08,Petrol,1368803070742790146,YootDigmeMusic,Yoot Digme 🌈,"@Ben_Geman @JigarShahDC 5/ (!) 128g CO2 EV &gt; 120g CO2 petrol car\n\nBattery for EV is from lithium, nickel, cobalt, i… https://t.co/iO1uwYZRC0",,JigarShahDC,POSITIVE,0.829397,0.829397,0,0,0
4,2021-03-08,Petrol,1368803043026886664,YootDigmeMusic,Yoot Digme 🌈,@Ben_Geman @JigarShahDC 4/ The numbers of CO2 for electric cars (EVs)\n\nSummary:\n\nPetrol car = 120g CO2/km\n\nEV = 0.2… https://t.co/HjL3fegULR,,Geman,NEGATIVE,0.982261,-0.982261,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123,2021-03-07,Petrol,1368614059071913988,AtifAbKarim1,AtifAbKarim1,@Purupurea_u 3rd Feb 2020.\n\nMuhammad Qasim saw that Imran Khan asks the people of Pakistan to adopt simplicity amon… https://t.co/zfnASepzmx,,Imran Khan,NEGATIVE,0.591955,-0.591955,0,0,0
1124,2021-03-07,Petrol,1368614059071913988,AtifAbKarim1,AtifAbKarim1,@Purupurea_u 3rd Feb 2020.\n\nMuhammad Qasim saw that Imran Khan asks the people of Pakistan to adopt simplicity amon… https://t.co/zfnASepzmx,,Pakistan,NEGATIVE,0.591955,-0.591955,0,0,0
1125,2021-03-07,Petrol,1368613968323866626,sultankhan55786,Sultan khan,"Corona, unemployment, economic recession, price of petrol, price of gas and #FarmersProtests .... In the eyes of Mo… https://t.co/quaXDI3waJ",,Corona,NEGATIVE,0.856710,-0.856710,5,0,1
1126,2021-03-07,Petrol,1368613968323866626,sultankhan55786,Sultan khan,"Corona, unemployment, economic recession, price of petrol, price of gas and #FarmersProtests .... In the eyes of Mo… https://t.co/quaXDI3waJ",,FarmersProtests,NEGATIVE,0.856710,-0.856710,5,0,1


Let's filter out obvious tags like #Seattle that would show up for this search. You can comment this portion out or use different Tags for your list.