In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import re
import time
import string
import warnings

# for all NLP related operations on text
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import *
from nltk.classify import NaiveBayesClassifier
from wordcloud import WordCloud

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# To consume Twitter's API
import tweepy
from tweepy import OAuthHandler 

# To identify the sentiment of text
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
from textblob.np_extractors import ConllExtractor

# ignoring all the warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# downloading stopwords corpus
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')
nltk.download('averaged_perceptron_tagger')
nltk.download('movie_reviews')
nltk.download('punkt')
nltk.download('conll2000')
nltk.download('brown')
stopwords = set(stopwords.words("english"))

# for showing all the plots inline
%matplotlib inline

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is alr

In [18]:
class TwitterClient(object): 
    def __init__(self): 
        #Initialization method. 
        try: 
            # create OAuthHandler object 
            auth = OAuthHandler('MWagHIMZgBCE8ExJw4ZEhBeqd', 'pHoXxG9mXVDlLInne7xajFJ8WdyPKhuXdAwJOnYu9qUOT0Go8U') 
            # set access token and secret 
            auth.set_access_token('18837312-7UAgD8xarB9RgUKv263ftqGKnYkSIHAgjzVvf7K1f', 'pjc3MOSDmCXz3mAFt28gfjWdQqNVqcSUzA4BhVChNFQqP') 
            # create tweepy API object to fetch tweets 
            # add hyper parameter 'proxy' if executing from behind proxy "proxy='http://172.22.218.218:8085'"
            self.api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
            
        except tweepy.TweepError as e:
            print(f"Error: Twitter Authentication Failed - \n{str(e)}")

    def get_tweets(self, query, maxTweets = 1000):
        #Function to fetch tweets. 
        # empty list to store parsed tweets 
        tweets = [] 
        sinceId = None
        max_id = -1
        tweetCount = 0
        tweetsPerQry = 100

        while tweetCount < maxTweets:
            try:
                if (max_id <= 0):
                    if (not sinceId):
                        new_tweets = self.api.search(q=query, geo = '43.651070,-79.347015,250km', lang='en', count=tweetsPerQry)
                    else:
                        new_tweets = self.api.search(q=query, count=tweetsPerQry,geo = '43.651070,-79.347015,250km', lang='en',
                                                since_id=sinceId)
                else:
                    if (not sinceId):
                        new_tweets = self.api.search(q=query, count=tweetsPerQry,geo = '43.651070,-79.347015,250km', lang='en',
                                                max_id=str(max_id - 1))
                    else:
                        new_tweets = self.api.search(q=query, count=tweetsPerQry,geo = '43.651070,-79.347015,250km', lang='en',
                                                max_id=str(max_id - 1),
                                                since_id=sinceId)
                if not new_tweets:
                    print("No more tweets found")
                    break

                for tweet in new_tweets:
                    parsed_tweet = {} 
                    parsed_tweet['tweets'] = tweet.text 

                    # appending parsed tweet to tweets list 
                    if tweet.retweet_count > 0: 
                        # if tweet has retweets, ensure that it is appended only once 
                        if parsed_tweet not in tweets: 
                            tweets.append(parsed_tweet) 
                    else: 
                        tweets.append(parsed_tweet) 
                        
                tweetCount += len(new_tweets)
                print("Downloaded {0} tweets".format(tweetCount))
                max_id = new_tweets[-1].id

            except tweepy.TweepError as e:
                # Just exit if any error
                print("Tweepy error : " + str(e))
                break
        
        return pd.DataFrame(tweets)

In [19]:
twitter_client = TwitterClient()

# calling function to get tweets
tweets_df = twitter_client.get_tweets('HSBC', maxTweets=7000)
print(f'tweets_df Shape - {tweets_df.shape}')
# tweets_df.head(10)
# len(tweets_df)

Downloaded 74 tweets
Downloaded 173 tweets
Downloaded 267 tweets
Downloaded 367 tweets
Downloaded 462 tweets
Downloaded 541 tweets
Downloaded 641 tweets
Downloaded 732 tweets
Downloaded 821 tweets
Downloaded 907 tweets
Downloaded 999 tweets
Downloaded 1090 tweets
Downloaded 1164 tweets
Downloaded 1249 tweets
Downloaded 1328 tweets
Downloaded 1401 tweets
Downloaded 1472 tweets
Downloaded 1544 tweets
Downloaded 1621 tweets
Downloaded 1688 tweets
Downloaded 1782 tweets
Downloaded 1872 tweets
Downloaded 1946 tweets
Downloaded 2034 tweets
Downloaded 2119 tweets
Downloaded 2199 tweets
Downloaded 2289 tweets
Downloaded 2370 tweets
Downloaded 2451 tweets
Downloaded 2543 tweets
Downloaded 2618 tweets
Downloaded 2687 tweets
Downloaded 2764 tweets
Downloaded 2827 tweets
Downloaded 2894 tweets
Downloaded 2963 tweets
Downloaded 3022 tweets
Downloaded 3087 tweets
Downloaded 3146 tweets
Downloaded 3205 tweets
Downloaded 3279 tweets
Tweepy error : Failed to send request: HTTPSConnectionPool(host='api.

In [20]:
tweets_df

Unnamed: 0,tweets
0,RT @IvanPentchoukov: Leaked files expose mass ...
1,@stuart_onyeche @andy4wm Many companies &amp; ...
2,RT @PM_Thornton: The Telegraph: Senior staff a...
3,RT @benedictrogers: This may be part of the ex...
4,RT @JPMediaBoss: Dominion Voting Systems paten...
...,...
639,#xrpcommunity \nNow that changes things.... 🤔\...
640,@RobTheDadBod @waltshaub @Scotus I advocated a...
641,RT @MissMaga2016: Boeing\nAirbus\nRolls Royce\...
642,@HSBC_UK I’m closing my HSBC bank account afte...


In [21]:
# Sentiment classification for above tweets

# 1 way
def fetch_sentiment_using_SIA(text):
    sid = SentimentIntensityAnalyzer()
    polarity_scores = sid.polarity_scores(text)
    return 'neg' if polarity_scores['neg'] > polarity_scores['pos'] else 'pos'

# 2 way
def fetch_sentiment_using_textblob(text):
    analysis = TextBlob(text)
    return 'pos' if analysis.sentiment.polarity >= 0 else 'neg'

In [13]:
! git fetch
! git add Sentiment_Analysis_updated.ipynb
! git commit -m "New code push" Sentiment_Analysis_updated.ipynb
! git push origin

The file will have its original line endings in your working directory


[main b56c6a4] New code push
 1 file changed, 4 insertions(+), 5 deletions(-)


The file will have its original line endings in your working directory
To https://github.com/Ashishkhurana01/NLP.git
   abbe4f1..b56c6a4  main -> main
