In [1]:
import pandas as pd
import numpy as np
from textblob import TextBlob
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
import plotly.express as xp

import seaborn as sns

In [2]:
modi_df = pd.read_csv("data/ModiRelatedTweetsWithSentiment.csv")
rahul_df = pd.read_csv("data/RahulRelatedTweetsWithSentiment.csv")

In [3]:
modi_df.drop(['Unnamed: 0', 'Date', 'Emotion'], axis=1, inplace=True)
modi_df.head()

Unnamed: 0,User,Tweet
0,advosushildixit,@anjanaomkashyap I am seeing you as future #bj...
1,jiaeur,#LokSabhaElections2019 \n23rd May 2019 will re...
2,PVenkatGandhi,#LokSabhaElections2019 \n23rd May 2019 will re...
3,TheNirbhay1,PM Modi creates a new record of being the only...
4,ShakeChilli,@abhijitmajumder Appointment of Successor! \n\...


In [4]:
rahul_df.drop(['Unnamed: 0', 'Date', 'Emotion'], axis=1, inplace=True)
rahul_df.head()

Unnamed: 0,User,Tweet
0,Sunnysweet16,Wonder why no academic or journalist asks INC ...
1,drnitinchaube,Congrats for the change #australiavotes2019 an...
2,mrvivek07,Peopel Say “Govt Ne 70 Years Kya kiya”.\nUnse ...
3,JosephPravinP,"@ajaymaken @RahulGandhi And as a final touch, ..."
4,VandanaMegastar,#LokSabhaElections2019 Anyone not having mass ...


In [5]:
modi_df["Tweet"] = modi_df["Tweet"].astype(str)
rahul_df["Tweet"] = rahul_df["Tweet"].astype(str)

In [6]:
TextBlob(modi_df['Tweet'][4]).sentiment

Sentiment(polarity=0.0987878787878788, subjectivity=0.43151515151515146)

In [7]:
def polarity(text):
    return TextBlob(text).sentiment.polarity

In [8]:
modi_df["Polarity"] = modi_df["Tweet"].apply(polarity)

In [9]:
rahul_df["Polarity"] = rahul_df["Tweet"].apply(polarity)

In [10]:
modi_df.head()

Unnamed: 0,User,Tweet,Polarity
0,advosushildixit,@anjanaomkashyap I am seeing you as future #bj...,0.35
1,jiaeur,#LokSabhaElections2019 \n23rd May 2019 will re...,0.8
2,PVenkatGandhi,#LokSabhaElections2019 \n23rd May 2019 will re...,0.8
3,TheNirbhay1,PM Modi creates a new record of being the only...,0.312121
4,ShakeChilli,@abhijitmajumder Appointment of Successor! \n\...,0.098788


In [11]:
def classify(polarity):
    if polarity >0:
        return "Posative"
    elif polarity == 0.0000:
        return "Neutral"
    else:
        return "Negative"
    
modi_df['Label']=modi_df['Polarity'].apply(classify)
rahul_df['Label']=rahul_df['Polarity'].apply(classify)

In [12]:
modi_df.shape

(25688, 4)

In [13]:
rahul_df.shape

(14261, 4)

In [14]:
#modi
np.random.seed(10)
remove_n = 11688
drop_indices = np.random.choice(modi_df.index,remove_n,replace=False)
modi_df = modi_df.drop(drop_indices)

#rahul
np.random.seed(10)
remove_n = 261
drop_indices = np.random.choice(rahul_df.index,remove_n,replace=False)
rahul_df = rahul_df.drop(drop_indices)



In [15]:
modi_df.shape

(14000, 4)

In [16]:
rahul_df.shape

(14000, 4)

In [17]:
modi_df_count = modi_df.groupby('Label').count()
modi_df_count

Unnamed: 0_level_0,User,Tweet,Polarity
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Negative,2746,2746,2746
Neutral,4435,4438,4438
Posative,6816,6816,6816


In [18]:
modi_pos_rate = modi_df_count['Polarity'][2]
modi_neg_rate = modi_df_count['Polarity'][0]
modi_neut_rate = modi_df_count['Polarity'][1]
print("Posative: {}".format(modi_pos_rate))
print("Neutral: {}".format(modi_neut_rate))
print("Negative: {}".format(modi_neg_rate))

Posative: 6816
Neutral: 4438
Negative: 2746



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [19]:
rahul_df_count = rahul_df.groupby('Label').count()
rahul_df_count

Unnamed: 0_level_0,User,Tweet,Polarity
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Negative,3116,3116,3116
Neutral,4793,4793,4793
Posative,6091,6091,6091


In [20]:
rahul_pos_rate = rahul_df_count['Polarity'][2]
rahul_neg_rate = rahul_df_count['Polarity'][0]
rahul_neut_rate = rahul_df_count['Polarity'][1]
print("Posative: {}".format(rahul_pos_rate))
print("Neutral: {}".format(rahul_neut_rate))
print("Negative: {}".format(rahul_neg_rate))

Posative: 6091
Neutral: 4793
Negative: 3116



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [21]:
politicians = ['Modi','Rahul']

neg_list = [modi_neg_rate, rahul_neg_rate]
pos_list = [modi_pos_rate, rahul_pos_rate]
neut_list = [modi_neut_rate, rahul_neut_rate]


fig = go.Figure(
data = [
    go.Bar(name='Negative',x=politicians,y=neg_list),
    go.Bar(name='Positive',x=politicians,y=pos_list),
    go.Bar(name='Neutral',x=politicians,y=neut_list)
]
)
fig.update_layout(barmode='group')
fig.show()