In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import re
import time
import string
import warnings

# for all NLP related operations on text
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import *
from nltk.classify import NaiveBayesClassifier
from wordcloud import WordCloud

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

# To consume Twitter's API
import tweepy
from tweepy import OAuthHandler 

# To identify the sentiment of text
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
from textblob.np_extractors import ConllExtractor

# ignoring all the warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# downloading stopwords corpus
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')
nltk.download('averaged_perceptron_tagger')
nltk.download('movie_reviews')
nltk.download('punkt')
nltk.download('conll2000')
nltk.download('brown')
stopwords = set(stopwords.words("english"))

# for showing all the plots inline
%matplotlib inline

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\khura\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is alr

In [8]:
class TwitterClient(object): 
    def __init__(self): 
        #Initialization method. 
        try: 
            # create OAuthHandler object 
            auth = OAuthHandler('MWagHIMZgBCE8ExJw4ZEhBeqd', 'pHoXxG9mXVDlLInne7xajFJ8WdyPKhuXdAwJOnYu9qUOT0Go8U') 
            # set access token and secret 
            auth.set_access_token('18837312-7UAgD8xarB9RgUKv263ftqGKnYkSIHAgjzVvf7K1f', 'pjc3MOSDmCXz3mAFt28gfjWdQqNVqcSUzA4BhVChNFQqP') 
            # create tweepy API object to fetch tweets 
            # add hyper parameter 'proxy' if executing from behind proxy "proxy='http://172.22.218.218:8085'"
            self.api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
            
        except tweepy.TweepError as e:
            print(f"Error: Twitter Authentication Failed - \n{str(e)}")

    def get_tweets(self, query, maxTweets = 1000):
        #Function to fetch tweets. 
        # empty list to store parsed tweets 
        tweets = [] 
        sinceId = None
        max_id = -1
        tweetCount = 0
        tweetsPerQry = 100

        while tweetCount < maxTweets:
            try:
                if (max_id <= 0):
                    if (not sinceId):
                        new_tweets = self.api.search(q=query, count=tweetsPerQry)
                    else:
                        new_tweets = self.api.search(q=query, count=tweetsPerQry,
                                                since_id=sinceId)
                else:
                    if (not sinceId):
                        new_tweets = self.api.search(q=query, count=tweetsPerQry,
                                                max_id=str(max_id - 1))
                    else:
                        new_tweets = self.api.search(q=query, count=tweetsPerQry,
                                                max_id=str(max_id - 1),
                                                since_id=sinceId)
                if not new_tweets:
                    print("No more tweets found")
                    break

                for tweet in new_tweets:
                    parsed_tweet = {} 
                    parsed_tweet['tweets'] = tweet.text 

                    # appending parsed tweet to tweets list 
                    if tweet.retweet_count > 0: 
                        # if tweet has retweets, ensure that it is appended only once 
                        if parsed_tweet not in tweets: 
                            tweets.append(parsed_tweet) 
                    else: 
                        tweets.append(parsed_tweet) 
                        
                tweetCount += len(new_tweets)
                print("Downloaded {0} tweets".format(tweetCount))
                max_id = new_tweets[-1].id

            except tweepy.TweepError as e:
                # Just exit if any error
                print("Tweepy error : " + str(e))
                break
        
        return pd.DataFrame(tweets)

In [10]:
twitter_client = TwitterClient()

# calling function to get tweets
tweets_df = twitter_client.get_tweets('HSBC', maxTweets=7000)
print(f'tweets_df Shape - {tweets_df.shape}')
tweets_df.head(10)
len(tweets_df)

Downloaded 100 tweets
Downloaded 198 tweets
Downloaded 298 tweets
Downloaded 383 tweets
Downloaded 483 tweets
Downloaded 580 tweets
Downloaded 674 tweets
Downloaded 766 tweets
Downloaded 862 tweets
Downloaded 954 tweets
Downloaded 1047 tweets
Downloaded 1138 tweets
Downloaded 1235 tweets
Downloaded 1329 tweets
Downloaded 1420 tweets
Downloaded 1497 tweets
Downloaded 1590 tweets
Downloaded 1676 tweets
Downloaded 1757 tweets
Downloaded 1836 tweets
Downloaded 1913 tweets
Downloaded 1973 tweets
Downloaded 2046 tweets
Downloaded 2123 tweets
Downloaded 2201 tweets
Downloaded 2271 tweets
Downloaded 2346 tweets
Downloaded 2437 tweets
Downloaded 2531 tweets
Downloaded 2618 tweets
Downloaded 2701 tweets
Downloaded 2782 tweets
Downloaded 2853 tweets
Downloaded 2937 tweets
Downloaded 3029 tweets
Downloaded 3113 tweets
Downloaded 3190 tweets
Downloaded 3275 tweets
Downloaded 3350 tweets
Downloaded 3425 tweets
Downloaded 3503 tweets
Downloaded 3581 tweets
Downloaded 3668 tweets
Downloaded 3768 tweet

1108

In [11]:
tweets_df.head(10)

Unnamed: 0,tweets
0,RT @yukou_takahashi: かなり濃厚な情報で満載なので、さらに抜粋・翻訳して...
1,RT @Video_Forensics: Leaked files expose mass ...
2,RT @silvano_trotta: Retour sur les 2 millions ...
3,RT @JohnHemmings2: “This is no surprise but a ...
4,"@whereisindia Hello, apologize for any inconve..."
5,@Merkez_Bankasi @facc_ag @invest @SocieteGener...
6,@HSBC_UK customer for over 25 years and this b...
7,Last British governor of HK Patten asked \n@HS...
8,RT @Stand_with_HK: Patten🇬🇧 asked @HSBC to spe...
9,RT @naoyafujiwara: HSBC、ドイツ銀行の上級スタッフも中国共産党員だった...


In [12]:
! git fetch
! git add Sentiment_Analysis_updated.ipynb
! git commit -m "New code push" Sentiment_Analysis_updated.ipynb
! git push origin

The file will have its original line endings in your working directory


[main abbe4f1] New code push
 1 file changed, 504 insertions(+), 2 deletions(-)


The file will have its original line endings in your working directory
To https://github.com/Ashishkhurana01/NLP.git
   454d9d6..abbe4f1  main -> main
