In [1]:
import GetOldTweets3 as got
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

import pickle
import plotly.graph_objects as go


In [2]:
%%time

text_query = 'USA Election 2020'
count = 100
# Creation of query object
tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query).setMaxTweets(count)
# Creation of list that contains all tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)

Wall time: 1min 40s


In [3]:
# Creating list of chosen tweet data
text_tweets = [[tweet.date, tweet.text] for tweet in tweets]

In [4]:
text_tweets[0]

[datetime.datetime(2020, 6, 8, 6, 45, 31, tzinfo=datetime.timezone.utc),
 '@realDonaldTrump would "Sign An US Presidentual Humane Executive Order, in June-2020, (Before The 2020-POTUS-Election), To Bring Back 4+Millions of Poor Foreign Born US Immigration Seeking Workers of USA, if they (Worked+Served+Contributed) in to USA, (1998-2016), Then Deported".']

In [5]:
df = pd.DataFrame(text_tweets, columns =['Date', 'Tweets']) 

In [6]:
def getTweets(query,count):
    text_query = query
    # Creation of query object
    tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query).setMaxTweets(count)
    # Creation of list that contains all tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)
    text_tweets = [[tweet.date, tweet.text] for tweet in tweets]
    df = pd.DataFrame(text_tweets, columns =['Date', 'Tweets']) 
    return df


In [2]:
def preprocess(tweet):
    #LowerCase 
    tweet=tweet.lower()

    #Replacing URL
    tweet=tweet.replace(r'https?://[^\s<>"]+|www\.[^\s<>"]+', "URL")

    #Removing Username
    tweet=tweet.replace(r'@[^\s]+',"")

    #Removing Non-Alpha Numeric Chars
    tweet=tweet.replace(r'[^A-Za-z0-9 ]+',"")


    stop_words = stopwords.words('english') 
    text_tokens = word_tokenize(tweet)
    tokens_without_sw = [word for word in text_tokens if not word in stop_words]
    

    #Lementize
    wordlem = WordNetLemmatizer()
    tokens_without_sw=[wordlem.lemmatize(word) for word in tokens_without_sw]
    filtered_sentence = (" ").join(tokens_without_sw)



    return filtered_sentence


def load_models():
    
    # Load the vectoriser.
    file = open('../Models/tfidf-ngram-(1,3).pickle', 'rb')
    # file = open('C:/Users/mavin/Desktop/TwitterSentiment/Models/tfidf-ngram-(1,3).pickle', 'rb')
    vectorizer = pickle.load(file)
    file.close()
    
    # Load the LR Model.
    
    file = open('../Models/svc.pickle', 'rb')
    # file = open('C:/Users/mavin/Desktop/TwitterSentiment/Models/svc.pickle', 'rb')
    
    lr = pickle.load(file)
    file.close()
    
    return vectorizer, lr



def predict(vectorizer,model,tweets):

    print ("----------------PreProcessing--------------------------")
    preproc=[]
    for tweet in tweets:
        preproc.append(preprocess(tweet))

    print ("----------------Vectorising--------------------------")
    vect=vectorizer.transform(preproc)
    
    print ("----------------Predicting--------------------------")
    sent=model.predict(vect)


    data = []
    for text, pred in zip(tweets, sent):
        data.append((text,pred))

    df=pd.DataFrame(data,columns=["Tweets","Sentiment"])
    df = df.replace([0,1], ["Negative","Positive"])

    return df



In [8]:
%%time
tw1=getTweets('USA Election 2020',100)
tw2=getTweets('USA Election 2016',100)

Wall time: 3min 7s


In [14]:
vect,model=load_models()
tw1_pred=predict(vect,model,tw1["Tweets"].tolist())
tw2_pred=predict(vect,model,tw2["Tweets"].tolist())

----------------PreProcessing--------------------------
----------------Vectorising--------------------------
----------------Predicting--------------------------
----------------PreProcessing--------------------------
----------------Vectorising--------------------------
----------------Predicting--------------------------


In [15]:
tw1_pred["Date"]=tw1["Date"]
tw2_pred["Date"]=tw2["Date"]
# tdf["Date"]=df["Date"]

In [16]:
tw1_pred

Unnamed: 0,Tweets,Sentiment,Date
0,"@realDonaldTrump would ""Sign An US Presidentua...",Negative,2020-06-08 06:45:31+00:00
1,Rightfully so ... I am beyond ready 2 move OUT...,Negative,2020-06-08 04:50:16+00:00
2,•November 2020-things will get better •Next ye...,Negative,2020-06-08 04:25:25+00:00
3,What we need next Sir is to abolish them/demRA...,Negative,2020-06-08 03:34:17+00:00
4,YES! PEACE FOR THE WORLD Mr President Donald J...,Positive,2020-06-08 03:30:24+00:00
...,...,...,...
95,USA need to ride this out..... An EVIL RELIGIO...,Negative,2020-06-06 19:51:25+00:00
96,USA need to ride this out..... An EVIL RELIGIO...,Negative,2020-06-06 19:50:03+00:00
97,https://www.google.com/amp/s/amp.theguardian.c...,Negative,2020-06-06 19:42:55+00:00
98,Corbyn is a commie.Antifa carry Soviet flags.A...,Negative,2020-06-06 19:32:42+00:00


In [96]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=tw1_pred["Date"],y=tw1_pred["Sentiment"],name='USA Election 2020'))
fig.show()

In [98]:
val=tw1_pred["Sentiment"].value_counts().values
val2=tw2_pred["Sentiment"].value_counts().values

In [99]:
fig=go.Figure()
fig.add_trace(go.Pie(labels=['Negative','Positive'],values=val,name='Election 2020'))
fig.show()

In [100]:
fig=go.Figure()
fig.add_trace(go.Bar(x=['Negative','Positive'],y=val,name='Election 2020'))
fig.add_trace(go.Bar(x=['Negative','Positive'],y=val2,name='Election 2016'))

fig.update_layout(title='Election 2016 v Election 2020',title_x=0.5,
                   xaxis_title='Sentiment',
                   yaxis_title='Number of Tweets')

fig.show()

In [95]:
val=tw1_pred["Sentiment"].value_counts().values
val2=tw2_pred["Sentiment"].value_counts().values

array([53, 47], dtype=int64)

In [7]:
%%time
vect,model=load_models()

CPU times: user 3.22 s, sys: 416 ms, total: 3.64 s
Wall time: 3.65 s


In [5]:
from joblib import dump, load
dump(vect, '../Models/tfidf.joblib')
dump(model,'../Models/svc.joblib') 

['../Models/svc.joblib']

In [6]:
%%time
vect2=load('../Models/tfidf.joblib')
model2=load('../Models/svc.joblib')

CPU times: user 15.6 s, sys: 683 ms, total: 16.3 s
Wall time: 16.6 s
