In [None]:
# Install Libraries
pip install -U textblob
pip install pandas
pip install numpy
pip install plotly
pip install seaborn
pip install matplotlib
pip install wordcloud

# Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from textblob import TextBlob
from wordcloud import WordCloud
import plotly.graph_objects as go
import plotly.express as px

#Reading csv files for Trump and Biden
trumpdata=pd.read_csv(r"C:\Users\Surya\Desktop\twitter\data\Trumpall2.csv")
bidendata=pd.read_csv(r"C:\Users\Surya\Desktop\twitter\data\Bidenall2.csv")

#check few rows from trumpdata
trumpdata.head()

#check few rows from bidendata
bidendata.head()

#read a random field from trumpdata
trumpdata['text'][23]

#read a random field from bidendata
bidendata['text'][696]

#sentiment analysis of a field from trumpdata
tbo1=TextBlob(trumpdata['text'][23])
print(tbo1.sentiment)

#sentiment analysis of a field from bidendata
tbo2=TextBlob(bidendata['text'][696])
print(tbo2.sentiment)

#sentiment polarity on trumpdata
def find_pol(review):
    return TextBlob(review).sentiment.polarity

trumpdata['Sentiment_Polarity']=trumpdata['text'].apply(find_pol)
trumpdata.head()

#sentiment polarity on bidendata
def find_pol(review):
    return TextBlob(review).sentiment.polarity
bidendata['Sentiment_Polarity']=bidendata['text'].apply(find_pol)
bidendata.head()

#adding expression label in trumpdata
trumpdata['Expression label']=np.where(trumpdata['Sentiment_Polarity']>0,'Positive','Negative')
trumpdata['Expression label'][trumpdata.Sentiment_Polarity==0]="Neutral"
trumpdata.tail()

#adding expression label in bidendata
bidendata['Expression label']=np.where(bidendata['Sentiment_Polarity']>0,'Positive','Negative')
bidendata['Expression label'][bidendata.Sentiment_Polarity==0]="Neutral"
bidendata.head()

#Analyzing positive, negative and neutral replies in trumpdata
new1=trumpdata.groupby('Expression label').count()
x=list(new1['Sentiment_Polarity'])
y=list(new1.index)
tuple_list=list(zip(x,y))
df=pd.DataFrame(tuple_list,columns=['x','y'])

df['color']='blue'
df['color'][1]='red'
df['color'][2]='green'

fig = go.Figure(go.Bar(x=df['x'],
                y=df['y'],
                orientation ='h',
                marker={'color': df['color']}))
fig.show()

#analyzing positive, negative and neutral replies in bidendata
new2=bidendata.groupby('Expression label').count()
x=list(new2['Sentiment_Polarity'])
y=list(new2.index)
tuple_list=list(zip(x,y))
df=pd.DataFrame(tuple_list,columns=['x','y'])

df['color']='blue'
df['color'][1]='red'
df['color'][2]='green'

fig = go.Figure(go.Bar(x=df['x'],
                y=df['y'],
                orientation ='h',
                marker={'color': df['color']}))
fig.show()

#Dropping all the rows in trumpdata whose statement is neutral with polarity=0
review1=trumpdata[trumpdata['Sentiment_Polarity']==0.0000]

cond1=trumpdata['Sentiment_Polarity'].isin(review1['Sentiment_Polarity'])
trumpdata.drop(trumpdata[cond1].index, inplace=True)
trumpdata.shape

#Dropping all the rows in bidendata whose statement is neutral with polarity=0
review2=bidendata[bidendata['Sentiment_Polarity']==0.0000]

cond2=bidendata['Sentiment_Polarity'].isin(review2['Sentiment_Polarity'])
bidendata.drop(bidendata[cond2].index, inplace=True)
bidendata.shape

#Balancing the both datasets for a fair prediction
#trumpdata
np.random.seed(10)
remove_n=324
drop_indices=np.random.choice(trumpdata.index, remove_n, replace=False)
df_subset_trump=trumpdata.drop(drop_indices)
df_subset_trump.shape


#bidendata
np.random.seed(10)
remove_n=31
drop_indices=np.random.choice(bidendata.index, remove_n, replace=False)
df_subset_biden=bidendata.drop(drop_indices)
df_subset_biden.shape

#Data visualization

#Donald Trump
#distribution plot
sns.distplot(df_subset_trump['Sentiment_Polarity'])
#boxplot
sns.boxplot([df_subset_trump.Sentiment_Polarity])
plt.show()


#Joe Biden 
#distribution plot
sns.distplot(df_subset_biden['Sentiment_Polarity'])
#boxplot
sns.boxplot([df_subset_biden.Sentiment_Polarity])
plt.show()


#Analyzing both the datasets together to get a clear picture

#Trump
count1=df_subset_trump.groupby('Expression label').count()
print(count1)
negative_per1 = (count1['Sentiment_Polarity'][0]/1000)*100
positive_per1 = (count1['Sentiment_Polarity'][1]/1000)*100

#Biden
count2=df_subset_biden.groupby('Expression label').count()
print(count2)
negative_per2=(count2['Sentiment_Polarity'][0]/1000)*100
positive_per2=(count2['Sentiment_Polarity'][1]/1000)*100

#Analysis of positive and negative reviews from both the datasets
politicians=['Donald Trump','Joe Biden']
list_pos=[positive_per1,positive_per2]
list_neg=[negative_per1,negative_per2]

fig = go.Figure(data=[
    go.Bar(name='Positive', x=politicians, y=list_pos),
    go.Bar(name='Negative', x=politicians, y=list_neg)
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()


#Analyzing most positive and most negative reviews on both datasets

#Most positive reviews for Donald Trump
most_positive1=df_subset_trump[df_subset_trump.Sentiment_Polarity==1].text.head()
pos_txt1=list(most_positive1)
pos1=df_subset_trump[df_subset_trump.Sentiment_Polarity==1].Sentiment_Polarity.head()
pos_pol1=list(pos1)

fig=go.Figure(data=[go.Table(columnorder=[1,2],
                            columnwidth=[50,400],
                            header=dict(values=['Polarity','Most positive replies in Trump\'s handle'],
                            fill_color='paleturquoise',
                            align='left'),
                         cells=dict(values=[pos_pol1, pos_txt1],
                               fill_color='lavender',
                               align='left'))])
fig.show()

#Most positive reviews for Joe Biden
most_positive2=df_subset_biden[df_subset_biden.Sentiment_Polarity==1].text.tail()
pos_txt2=list(most_positive2)
pos2=df_subset_biden[df_subset_biden.Sentiment_Polarity==1].Sentiment_Polarity.tail()
pos_pol2=list(pos2)

fig=go.Figure(data=(go.Table(columnorder=[1,2],
                            columnwidth=[50,400],
                            header=dict(values=['Polarity','Most positive replies on Biden\'s handle'],
                            fill_color='paleturquoise',
                            align='left'),
                            cells=dict(values=[pos_pol2, pos_txt2],
                            fill_color='lavender',
                            align='left'))))
fig.show()

#Most negative reviews for Donald Trump
most_negative1=df_subset_trump[df_subset_trump.Sentiment_Polarity==-1].text.head()
neg_txt1=list(most_negative1)
neg1=df_subset_trump[df_subset_trump.Sentiment_Polarity==-1].Sentiment_Polarity.head()
neg_pol1=list(neg1)

fig=go.Figure(data=(go.Table(columnorder=[1,2],
                            columnwidth=[50,400],
                            header=dict(values=['Polarity','Most negative replies on Trump\'s handle'],
                            fill_color='paleturquoise',
                            align='left'),
                            cells=dict(values=[neg_pol1, neg_txt1],
                            fill_color='lavender',
                            align='left'))))
fig.show()

#Most negative reviews for Joe Biden
most_negative2=df_subset_biden[df_subset_biden.Sentiment_Polarity==-1].text.head()
neg_txt2=list(most_negative2)
neg2=df_subset_biden[df_subset_biden.Sentiment_Polarity==-1].Sentiment_Polarity.head()
neg_pol2=list(neg2)

fig=go.Figure(data=(go.Table(columnorder=[1,2],
                            columnwidth=[50,400],
                            header=dict(values=['Polarity','Most negative replies on Biden\'s handle'],
                                       fill_color='paleturquoise',
                               align='left'),
                cells=dict(values=[neg_pol2, neg_txt2],
                           fill_color='lavender',
                           align='left'))))
fig.show()

#Generating wordclouds for both the politicians' handle

#trump
text=str(df_subset_trump.text)
wordcloud=WordCloud(max_font_size=100,max_words=500,scale=10,relative_scaling=.6,background_color="black",colormap="rainbow").generate(text)
#display the generated wordcloud
plt.figure(figsize=(20,30))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis("off")
plt.show()

#Biden
text=str(df_subset_biden.text)
wordcloud=WordCloud(max_font_size=100,max_words=500,scale=10,relative_scaling=.6,background_color="black",colormap="rainbow").generate(text)
plt.figure(figsize=(20,30))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis("off")
plt.show()

#Comparision between negative replies on both the handles
labels=['Trump_Negative','Biden_Negative']
sizes=list_neg
explode=(0.1,0.1)
fig1,ax1=plt.subplots()
ax1.pie(sizes,explode=explode,labels=labels,autopct='%1.1f%%',shadow=True,startangle=90)
ax1.set_title('Negative tweets on both handles')
plt.show()

#Comparision between positive replies on both the handles
labels=['Trump_Positive','Biden_Positive']
sizes=list_pos
explode=(0.1,0.1)
fig2,ax2=plt.subplots()
ax2.pie(sizes,explode=explode,labels=labels,autopct='%1.1f%%',shadow=True,startangle=90)
ax2.set_title('Positive tweets on both handles')
plt.show()