In [1]:
!pip install textblob
!pip install plotly



In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from textblob import TextBlob
import re
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
dataset = pd.read_csv("drive/MyDrive/Colab Notebooks/tweets_11-06-2020.csv", 
                encoding='latin-1', error_bad_lines=False)

In [5]:
dataset.head()

Unnamed: 0,id,text,isRetweet,isDeleted,device,favorites,retweets,date
0,98454970654916608,Republicans and Democrats have both created ou...,f,f,TweetDeck,49,255,2011-08-02 18:07:48
1,1234653427789070336,I was thrilled to be back in the Great city of...,f,f,Twitter for iPhone,73748,17404,2020-03-03 01:34:50
2,1218010753434820614,RT @CBS_Herridge: READ: Letter to surveillance...,t,f,Twitter for iPhone,0,7396,2020-01-17 03:22:47
3,1304875170860015617,The Unsolicited Mail In Ballot Scam is a major...,f,f,Twitter for iPhone,80527,23502,2020-09-12 20:10:58
4,1218159531554897920,RT @MZHemingway: Very friendly telling of even...,t,f,Twitter for iPhone,0,9081,2020-01-17 13:13:59


In [8]:
df = pd.DataFrame(data=[tweet for tweet in dataset.text], columns=['Tweet'])

In [11]:
df['Date'] = np.array([dt for dt in dataset.date])

In [12]:
df.head()

Unnamed: 0,Tweet,Date
0,Republicans and Democrats have both created ou...,2011-08-02 18:07:48
1,I was thrilled to be back in the Great city of...,2020-03-03 01:34:50
2,RT @CBS_Herridge: READ: Letter to surveillance...,2020-01-17 03:22:47
3,The Unsolicited Mail In Ballot Scam is a major...,2020-09-12 20:10:58
4,RT @MZHemingway: Very friendly telling of even...,2020-01-17 13:13:59


In [14]:
df['Date'] = pd.to_datetime(df['Date'])

In [15]:
df.dtypes

Tweet            object
Date     datetime64[ns]
dtype: object

In [16]:
def cleanUpTweet(txt):
    # Remove mentions
    txt = re.sub(r'@[A-Za-z0-9_]+', '', txt)
    # Remove hashtags
    txt = re.sub(r'#', '', txt)
    # Remove retweets:
    txt = re.sub(r'RT : ', '', txt)
    # Remove urls
    txt = re.sub(r'https?:\/\/[A-Za-z0-9\.\/]+', '', txt)
    return txt

In [17]:
df['Tweet'] = df['Tweet'].apply(cleanUpTweet)


In [18]:
def getTextSubjectivity(txt):
    return TextBlob(txt).sentiment.subjectivity

def getTextPolarity(txt):
    return TextBlob(txt).sentiment.polarity

In [19]:
df['Subjectivity'] = df['Tweet'].apply(getTextSubjectivity)
df['Polarity'] = df['Tweet'].apply(getTextPolarity)

In [20]:
df.head(10)

Unnamed: 0,Tweet,Date,Subjectivity,Polarity
0,Republicans and Democrats have both created ou...,2011-08-02 18:07:48,0.2,0.2
1,I was thrilled to be back in the Great city of...,2020-03-03 01:34:50,0.483333,0.458929
2,READ: Letter to surveillance court obtained by...,2020-01-17 03:22:47,0.3,0.05
3,The Unsolicited Mail In Ballot Scam is a major...,2020-09-12 20:10:58,0.454762,0.021131
4,Very friendly telling of events here about Com...,2020-01-17 13:13:59,0.5,0.26875
5,President announced historic steps to protect...,2020-01-17 00:11:56,0.200794,0.095238
6,"âIâm running as a proud Democrat, for the ...",2020-10-12 22:22:39,0.9,0.35
7,Getting a little exercise this morning!,2020-02-01 16:14:02,0.5,-0.234375
8,,2020-10-23 04:52:14,0.0,0.0
9,,2020-10-23 04:46:53,0.0,0.0


In [21]:
df = df.drop(df[df['Tweet'] == ''].index)

In [22]:
def getTextAnalysis(a):
    if a < 0:
        return "Negative"
    elif a == 0:
        return "Neutral"
    else:
        return "Positive"

In [23]:
df['Sentiment'] = df['Polarity'].apply(getTextAnalysis)

In [24]:
positive = df[df['Sentiment'] == 'Positive']

print(str(positive.shape[0]/(df.shape[0])*100) + " % of positive tweets")

53.709761356033006 % of positive tweets


In [26]:
start_date = '2020-01-01'
end_date = '2020-10-01'

In [27]:
mask = (df['Date'] > start_date) & (df['Date'] <= end_date)

In [28]:
newdf = df.loc[mask]


In [29]:
newdf.head()

Unnamed: 0,Tweet,Date,Subjectivity,Polarity,Sentiment
1,I was thrilled to be back in the Great city of...,2020-03-03 01:34:50,0.483333,0.458929,Positive
2,READ: Letter to surveillance court obtained by...,2020-01-17 03:22:47,0.3,0.05,Positive
3,The Unsolicited Mail In Ballot Scam is a major...,2020-09-12 20:10:58,0.454762,0.021131,Positive
4,Very friendly telling of events here about Com...,2020-01-17 13:13:59,0.5,0.26875,Positive
5,President announced historic steps to protect...,2020-01-17 00:11:56,0.200794,0.095238,Positive


In [30]:
newdf.to_csv('trump_tweets_v1.csv', index=False)