# Part 1

## Imports


In [77]:
%config Completer.use_jedi = False

In [None]:
import numpy as np
import pandas as pd
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk import pos_tag
nltk.download('stopwords')
from nltk.corpus import stopwords
nltk.download('wordnet')
from nltk.corpus import wordnet
import re
nltk.download('averaged_perceptron_tagger')
nltk.download('sentiwordnet')
  

## Handling Dataset

Reading the dataset and adding new features

In [34]:
df= pd.read_csv("dataset.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,Data,Date,Time,tweetcaption
0,0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...
1,1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...
2,2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...
3,3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...
4,4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...


In [36]:
df = df.drop('Unnamed: 0', axis=1)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...


Cleaning the tweet captions


In [39]:
# Define a function to clean the text
def clean(text):
# Removes all special characters and numericals leaving the alphabets
    text = re.sub('[^A-Za-z]+', ' ', text)
    return text

# Cleaning the text in the review column
df['Cleaned Tweetcaptions'] = df['tweetcaption'].apply(clean)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...


Adding Parts of Speech (POS) for Vader analysis

In [44]:
# POS tagger dictionary
pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}
def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
    return newlist

df['POS tagged'] = df['Cleaned Tweetcaptions'].apply(token_stop_pos)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,POS tagged
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,"[(Love, v), (vacation, n), (vibes, n), (amazin..."
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,"[(Best, n), (Camera, n), (Smartphone, n), (k, ..."
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,"[(shree, a), (problem, n), (people, n), (probl..."
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ..."
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n..."


Adding Lemma column

In [47]:
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
def lemmatize(pos_data):
    lemma_rew = " "
    for word, pos in pos_data:
        if not pos:
            lemma = word
            lemma_rew = lemma_rew + " " + lemma
        else:
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_rew = lemma_rew + " " + lemma
    return lemma_rew

df['Lemma'] = df['POS tagged'].apply(lemmatize)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,POS tagged,Lemma
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Love vacation vibe amaze beautiful cabo mexi...
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Best Camera Smartphone k Please vote help re...
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,"[(shree, a), (problem, n), (people, n), (probl...",shree problem people problem Stupid Communis...
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Rhea Chakraborty Heartbreaking Post Sushant ...
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",stand Sunita Yadav Stop Transfer woman empow...


In [19]:
df.Data.nunique()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35266 entries, 0 to 35265
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Unnamed: 0    35266 non-null  int64 
 1   Data          35266 non-null  object
 2   Date          35266 non-null  object
 3   Time          35266 non-null  object
 4   tweetcaption  35266 non-null  object
dtypes: int64(1), object(4)
memory usage: 1.6+ MB


As we can see there are no null values

## Building Functions

I have used three rule based analysis models:
1. <strong>VADER</strong> (Valence Aware Dictionary for Sentiment Reasoning): It is a model used for text sentiment analysis that is sensitive to both polarity (positive/negative) and intensity (strength) of emotion.
2. <strong>TextBlob</strong>: TextBlob returns polarity and subjectivity of a sentence. Polarity lies between [-1,1], -1 defines a negative sentiment and 1 defines a positive sentiment.
3. <strong>SentiWordNet</strong> :SentiWordNet is an opinion lexicon derived from the WordNet database where each term is
associated with numerical scores indicating positive and negative sentiment information. 

##### VADER

In [32]:
sid = SentimentIntensityAnalyzer() #Vader Sentiment analyser

In [49]:
df['scores']=df['Lemma'].apply(lambda lemma: sid.polarity_scores(lemma)) #applying Vader analysis on Lemma
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,POS tagged,Lemma,NegativeScore,positiveScore,neutralScore,scores
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,"{'neg': 0.059, 'neu': 0.742, 'pos': 0.199, 'co..."
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,"{'neg': 0.031, 'neu': 0.833, 'pos': 0.136, 'co..."
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,"[(shree, a), (problem, n), (people, n), (probl...",shree problem people problem Stupid Communis...,0.072,0.131,0.797,"{'neg': 0.123, 'neu': 0.689, 'pos': 0.188, 'co..."
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,"{'neg': 0.126, 'neu': 0.661, 'pos': 0.214, 'co..."
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,"{'neg': 0.213, 'neu': 0.586, 'pos': 0.201, 'co..."


In [53]:
df['compound'] = df['scores'].apply(lambda score_dict: score_dict['compound'])
df['sentiment_type']=''
df.loc[df.compound>0,'sentiment_type']='POSITIVE'
df.loc[df.compound==0,'sentiment_type']='NEUTRAL'
df.loc[df.compound<0,'sentiment_type']='NEGATIVE'

In [54]:
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,POS tagged,Lemma,NegativeScore,positiveScore,neutralScore,scores,compound,sentiment_type
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,"{'neg': 0.059, 'neu': 0.742, 'pos': 0.199, 'co...",0.9747,POSITIVE
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,"{'neg': 0.031, 'neu': 0.833, 'pos': 0.136, 'co...",0.9349,POSITIVE
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,"[(shree, a), (problem, n), (people, n), (probl...",shree problem people problem Stupid Communis...,0.072,0.131,0.797,"{'neg': 0.123, 'neu': 0.689, 'pos': 0.188, 'co...",0.8522,POSITIVE
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,"{'neg': 0.126, 'neu': 0.661, 'pos': 0.214, 'co...",0.9517,POSITIVE
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,"{'neg': 0.213, 'neu': 0.586, 'pos': 0.201, 'co...",0.2023,POSITIVE


In [56]:
df['sentiment_type'].value_counts()

POSITIVE    27652
NEGATIVE     7468
NEUTRAL       146
Name: sentiment_type, dtype: int64

#### TextBlob

In [57]:
from textblob import TextBlob


In [60]:
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity
  
 #Create a function to get the polarity
def getPolarity(text):
   return TextBlob(text).sentiment.polarity
  
 #Create two new columns ‘Subjectivity’ & ‘Polarity’
# df[‘TextBlob_Subjectivity’] =    df[‘tweet’].apply(getSubjectivity)
df ['TextBlob_Polarity'] = df["Lemma"].apply(getPolarity)
def getAnalysis(score):
    if score < 0:
        return "Negative"
    elif score == 0:
        return "Neutral"
    else:
        return "Positive"
df["TextBlob_Analysis"] = df["TextBlob_Polarity"].apply(getAnalysis )


In [61]:
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,POS tagged,Lemma,NegativeScore,positiveScore,neutralScore,scores,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,"{'neg': 0.059, 'neu': 0.742, 'pos': 0.199, 'co...",0.9747,POSITIVE,0.142695,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,"{'neg': 0.031, 'neu': 0.833, 'pos': 0.136, 'co...",0.9349,POSITIVE,0.200033,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,"[(shree, a), (problem, n), (people, n), (probl...",shree problem people problem Stupid Communis...,0.072,0.131,0.797,"{'neg': 0.123, 'neu': 0.689, 'pos': 0.188, 'co...",0.8522,POSITIVE,0.11875,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,"{'neg': 0.126, 'neu': 0.661, 'pos': 0.214, 'co...",0.9517,POSITIVE,0.142857,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,"{'neg': 0.213, 'neu': 0.586, 'pos': 0.201, 'co...",0.2023,POSITIVE,0.213228,Positive


comparing analysis of vader and TextBlob

In [63]:
print(df['sentiment_type'].value_counts())
print(df['TextBlob_Analysis'].value_counts())

POSITIVE    27652
NEGATIVE     7468
NEUTRAL       146
Name: sentiment_type, dtype: int64
Positive    29343
Negative     5675
Neutral       248
Name: TextBlob_Analysis, dtype: int64


Here I tried to normalize the scores to come up with a new scoring system, however it turned out that it is not very useful so I dropped this idea

In [67]:
df.describe()

Unnamed: 0,NegativeScore,positiveScore,neutralScore,compound,TextBlob_Polarity
count,35266.0,35266.0,35266.0,35266.0,35266.0
mean,0.067057,0.149762,0.783177,0.544859,0.186714
std,0.057859,0.082912,0.080023,0.712728,0.198521
min,0.0,0.0,0.305,-0.9994,-0.7
25%,0.021,0.091,0.735,0.4567,0.046178
50%,0.054,0.134,0.788,0.9531,0.172222
75%,0.101,0.192,0.837,0.9883,0.309599
max,0.455,0.695,1.0,0.9999,1.0


In [68]:
normalized_df=(df-df.mean())/df.std()
normalized_df.head()

Unnamed: 0,Cleaned Tweetcaptions,Data,Date,Lemma,NegativeScore,POS tagged,TextBlob_Analysis,TextBlob_Polarity,Time,compound,neutralScore,positiveScore,scores,sentiment_type,tweetcaption
0,,,,,-0.450346,,,-0.221735,,0.603092,0.210229,0.11142,,,
1,,,,,-0.675029,,,0.067092,,0.547251,1.184946,-0.684608,,,
2,,,,,0.085436,,,-0.342351,,0.431218,0.17274,-0.226289,,,
3,,,,,0.811335,,,-0.220917,,0.570822,-0.876956,0.280275,,,
4,,,,,1.4681,,,0.133556,,-0.48063,-0.914445,-0.141861,,,


In [84]:
df['nNegativeScore']=normalized_df['NegativeScore']
df['nneutralScore']=normalized_df['neutralScore']
df['npostiveScore']=normalized_df['positiveScore']
df['ncompound']=normalized_df['compound']
df['nTextBlob_Polarity']=normalized_df['TextBlob_Polarity']

In [70]:
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,POS tagged,Lemma,NegativeScore,positiveScore,neutralScore,scores,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,"{'neg': 0.059, 'neu': 0.742, 'pos': 0.199, 'co...",0.9747,POSITIVE,0.142695,Positive,-0.450346,-0.450346,-0.450346,-0.450346,-0.221735
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,"{'neg': 0.031, 'neu': 0.833, 'pos': 0.136, 'co...",0.9349,POSITIVE,0.200033,Positive,-0.675029,-0.675029,-0.675029,-0.675029,0.067092
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,"[(shree, a), (problem, n), (people, n), (probl...",shree problem people problem Stupid Communis...,0.072,0.131,0.797,"{'neg': 0.123, 'neu': 0.689, 'pos': 0.188, 'co...",0.8522,POSITIVE,0.11875,Positive,0.085436,0.085436,0.085436,0.085436,-0.342351
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,"{'neg': 0.126, 'neu': 0.661, 'pos': 0.214, 'co...",0.9517,POSITIVE,0.142857,Positive,0.811335,0.811335,0.811335,0.811335,-0.220917
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,"{'neg': 0.213, 'neu': 0.586, 'pos': 0.201, 'co...",0.2023,POSITIVE,0.213228,Positive,1.4681,1.4681,1.4681,1.4681,0.133556


In [72]:
df=df.drop(["POS tagged",'scores'],axis=1)

In [73]:
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,POSITIVE,0.142695,Positive,-0.450346,-0.450346,-0.450346,-0.450346,-0.221735
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,POSITIVE,0.200033,Positive,-0.675029,-0.675029,-0.675029,-0.675029,0.067092
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,POSITIVE,0.11875,Positive,0.085436,0.085436,0.085436,0.085436,-0.342351
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,POSITIVE,0.142857,Positive,0.811335,0.811335,0.811335,0.811335,-0.220917
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,POSITIVE,0.213228,Positive,1.4681,1.4681,1.4681,1.4681,0.133556


In [74]:
df[df['sentiment_type'] =="NEUTRAL"].head() # checking neutral values

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity
116,DMKAgainstHindusCulture,7/14/2020,10:59:15,Get ready dmk going to loss in 2021 DMKAgainst...,Get ready dmk going to loss in DMKAgainstHindu...,Get ready dmk go loss DMKAgainstHindusCultur...,0.087,0.085,0.829,-0.0,NEUTRAL,0.215476,Positive,0.344686,0.344686,0.344686,0.344686,0.144883
131,DMKAgainstHindusCulture,7/14/2020,11:59:15,Get ready dmk going to loss in 2021 DMKAgainst...,Get ready dmk going to loss in DMKAgainstHindu...,Get ready dmk go loss DMKAgainstHindusCultur...,0.087,0.085,0.829,-0.0,NEUTRAL,0.215476,Positive,0.344686,0.344686,0.344686,0.344686,0.144883
901,ZTStreamParty,7/15/2020,12:59:18,"Stream no 79 ZaraThehroBingo ZTStreamParty,Str...",Stream no ZaraThehroBingo ZTStreamParty Stream...,Stream ZaraThehroBingo ZTStreamParty Stream ...,0.415,0.0,0.585,0.0,NEUTRAL,0.0,Neutral,6.013606,6.013606,6.013606,6.013606,-0.940525
970,ZTStreamParty,7/15/2020,14:59:17,"Stream no 79 ZaraThehroBingo ZTStreamParty,Str...",Stream no ZaraThehroBingo ZTStreamParty Stream...,Stream ZaraThehroBingo ZTStreamParty Stream ...,0.415,0.0,0.585,0.0,NEUTRAL,0.0,Neutral,6.013606,6.013606,6.013606,6.013606,-0.940525
1639,DronePrathap,7/16/2020,16:59:20,The book Our Moon has blood clots has been wri...,The book Our Moon has blood clots has been wri...,book Moon blood clot write SKOOLPEDIA Online...,0.029,0.066,0.905,0.0,NEUTRAL,-0.016667,Negative,-0.657745,-0.657745,-0.657745,-0.657745,-1.024479


In [None]:
cmax= df['compound'].max()
cmin= df['compound'].min()
df['nncompound'] = df.apply(lambda x:(x['compound']-cmin)/(cmax-cmin), axis=1) # normalizing 
df.head()

In [None]:
cmax= df['TextBlob_Polarity'].mean()
cmin= df['TextBlob_Polarity'].std()
df['nn1TextBlob_Polarity'] = df.apply(lambda x:(x['TextBlob_Polarity']-cmax)/(cmin), axis=1)
df.head()

In [None]:
cmax= df['compound'].std()
cmin= df['compound'].mean()
df['nn1compound'] = df.apply(lambda x:(x['nncompound']-cmin)/(cmax), axis=1) #standardizing
df.head()

I found out that normalizing the scores did not help so I dropped these columns

In [82]:
df=df.drop(["nncompound","nnTextBlob_Polarity","nn1TextBlob_Polarity","nn1compound"],axis=1) # dropping these as I found out that these were not useful

Creating a new score by taking the average of scores of VADER and TextBlob

In [86]:
df['average0'] = df.apply(lambda x:(x['compound']+x['TextBlob_Polarity'])/(2), axis=1) # finding average
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity,average0
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,POSITIVE,0.142695,Positive,-0.450346,0.210229,0.11142,0.603092,-0.221735,0.558697
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,POSITIVE,0.200033,Positive,-0.675029,1.184946,-0.684608,0.547251,0.067092,0.567466
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,POSITIVE,0.11875,Positive,0.085436,0.17274,-0.226289,0.431218,-0.342351,0.485475
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,POSITIVE,0.142857,Positive,0.811335,-0.876956,0.280275,0.570822,-0.220917,0.547279
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,POSITIVE,0.213228,Positive,1.4681,-0.914445,-0.141861,-0.48063,0.133556,0.207764


In [87]:
def getAnalysis(score):
    if score < 0:
        return "Negative"
    elif score == 0:
        return "Neutral"
    else:
        return "Positive"
df["new_score"] = df["average0"].apply(getAnalysis ) #Analysing the new score
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,POSITIVE,0.142695,Positive,-0.450346,0.210229,0.11142,0.603092,-0.221735,0.558697,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,POSITIVE,0.200033,Positive,-0.675029,1.184946,-0.684608,0.547251,0.067092,0.567466,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,POSITIVE,0.11875,Positive,0.085436,0.17274,-0.226289,0.431218,-0.342351,0.485475,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,POSITIVE,0.142857,Positive,0.811335,-0.876956,0.280275,0.570822,-0.220917,0.547279,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,POSITIVE,0.213228,Positive,1.4681,-0.914445,-0.141861,-0.48063,0.133556,0.207764,Positive


Comparing the analysis of new score with the two models

In [91]:
print(df["new_score"].value_counts())
print(df['TextBlob_Analysis'].value_counts())
print(df['sentiment_type'].value_counts())


Positive    27858
Negative     7330
Neutral        78
Name: new_score, dtype: int64
Positive    29343
Negative     5675
Neutral       248
Name: TextBlob_Analysis, dtype: int64
POSITIVE    27652
NEGATIVE     7468
NEUTRAL       146
Name: sentiment_type, dtype: int64


#### SentiWordNet

Adding Parts of Speech Column(POS) 

In [93]:
# POS tagger dictionary
pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV} 
def token_stop_pos(text):
    tags = pos_tag(word_tokenize(text))
    newlist = []
    for word, tag in tags:
        if word.lower() not in set(stopwords.words('english')):
            newlist.append(tuple([word, pos_dict.get(tag[0])]))
    return newlist

df['POS tagged'] = df['Cleaned Tweetcaptions'].apply(token_stop_pos)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,...,TextBlob_Polarity,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score,POS tagged
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,...,0.142695,Positive,-0.450346,0.210229,0.11142,0.603092,-0.221735,0.558697,Positive,"[(Love, v), (vacation, n), (vibes, n), (amazin..."
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,...,0.200033,Positive,-0.675029,1.184946,-0.684608,0.547251,0.067092,0.567466,Positive,"[(Best, n), (Camera, n), (Smartphone, n), (k, ..."
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,...,0.11875,Positive,0.085436,0.17274,-0.226289,0.431218,-0.342351,0.485475,Positive,"[(shree, a), (problem, n), (people, n), (probl..."
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,...,0.142857,Positive,0.811335,-0.876956,0.280275,0.570822,-0.220917,0.547279,Positive,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ..."
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,...,0.213228,Positive,1.4681,-0.914445,-0.141861,-0.48063,0.133556,0.207764,Positive,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n..."


Applying SentiWordNet and modifying it to get the score as well

In [98]:
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
def sentiwordnetanalysis(pos_data):
    sentiment = 0
    tokens_count = 0
    for word, pos in pos_data:
        if not pos:
            continue
        lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
        if not lemma:
            continue
        synsets = wordnet.synsets(lemma, pos=pos)
        if not synsets:
            continue
        # Take the first sense, the most common
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        sentiment += swn_synset.pos_score() - swn_synset.neg_score()
        tokens_count += 1
        # print(swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score())
        if not tokens_count:
            return 0
        if sentiment>0:
            return "Positive"
        if sentiment==0:
            return "Neutral"
        else:
            return "Negative"

df['SWN analysis'] = df['POS tagged'].apply(sentiwordnetanalysis)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,...,TextBlob_Analysis,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score,POS tagged,SWN analysis
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,...,Positive,-0.450346,0.210229,0.11142,0.603092,-0.221735,0.558697,Positive,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,...,Positive,-0.675029,1.184946,-0.684608,0.547251,0.067092,0.567466,Positive,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,...,Positive,0.085436,0.17274,-0.226289,0.431218,-0.342351,0.485475,Positive,"[(shree, a), (problem, n), (people, n), (probl...",Negative
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,...,Positive,0.811335,-0.876956,0.280275,0.570822,-0.220917,0.547279,Positive,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Neutral
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,...,Positive,1.4681,-0.914445,-0.141861,-0.48063,0.133556,0.207764,Positive,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",Neutral


In [100]:
df['SWN analysis'].value_counts()

Neutral     23928
Positive     8337
Negative     2991
Name: SWN analysis, dtype: int64

Changing the function to output sentiment score

In [101]:
def sentiwordnetscore(pos_data):
    sentiment = 0
    tokens_count = 0
    for word, pos in pos_data:
        if not pos:
            continue
        lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
        if not lemma:
            continue
        synsets = wordnet.synsets(lemma, pos=pos)
        if not synsets:
            continue
        # Take the first sense, the most common
        synset = synsets[0]
        swn_synset = swn.senti_synset(synset.name())
        sentiment += swn_synset.pos_score() - swn_synset.neg_score()
        tokens_count += 1
        return sentiment;
df['SWN_score'] = df['POS tagged'].apply(sentiwordnetscore)
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,...,nNegativeScore,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score,POS tagged,SWN analysis,SWN_score
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,...,-0.450346,0.210229,0.11142,0.603092,-0.221735,0.558697,Positive,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Positive,0.5
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,...,-0.675029,1.184946,-0.684608,0.547251,0.067092,0.567466,Positive,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Positive,0.25
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,...,0.085436,0.17274,-0.226289,0.431218,-0.342351,0.485475,Positive,"[(shree, a), (problem, n), (people, n), (probl...",Negative,-0.625
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,...,0.811335,-0.876956,0.280275,0.570822,-0.220917,0.547279,Positive,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Neutral,0.0
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,...,1.4681,-0.914445,-0.141861,-0.48063,0.133556,0.207764,Positive,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",Neutral,0.0


#### Final score

Finding the final score by taking the average of the scores of VADER, TextBlob and SentiWordNet as it will give more a generalized prediction giving it a representation of all the three models.

In [103]:

df['average1'] = df.apply(lambda x:(x['compound']+x['TextBlob_Polarity']+x['SWN_score'])/(3), axis=1) #Finding the average score
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,...,nneutralScore,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score,POS tagged,SWN analysis,SWN_score,average1
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,...,0.210229,0.11142,0.603092,-0.221735,0.558697,Positive,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Positive,0.5,0.539132
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,...,1.184946,-0.684608,0.547251,0.067092,0.567466,Positive,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Positive,0.25,0.461644
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,...,0.17274,-0.226289,0.431218,-0.342351,0.485475,Positive,"[(shree, a), (problem, n), (people, n), (probl...",Negative,-0.625,0.115317
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,...,-0.876956,0.280275,0.570822,-0.220917,0.547279,Positive,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Neutral,0.0,0.364852
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,...,-0.914445,-0.141861,-0.48063,0.133556,0.207764,Positive,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",Neutral,0.0,0.138509


In [104]:
df["newest_score"] = df["average1"].apply(getAnalysis ) # Adding the analysis
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,...,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score,POS tagged,SWN analysis,SWN_score,average1,newest_score
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,...,0.11142,0.603092,-0.221735,0.558697,Positive,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Positive,0.5,0.539132,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,...,-0.684608,0.547251,0.067092,0.567466,Positive,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Positive,0.25,0.461644,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,...,-0.226289,0.431218,-0.342351,0.485475,Positive,"[(shree, a), (problem, n), (people, n), (probl...",Negative,-0.625,0.115317,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,...,0.280275,0.570822,-0.220917,0.547279,Positive,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Neutral,0.0,0.364852,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,...,-0.141861,-0.48063,0.133556,0.207764,Positive,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",Neutral,0.0,0.138509,Positive


Comparing the analysis of all the models

In [105]:
print(df["new_score"].value_counts())
print(df['TextBlob_Analysis'].value_counts())
print(df['sentiment_type'].value_counts())
print(df['SWN analysis'].value_counts())
print(df['newest_score'].value_counts())

Positive    27858
Negative     7330
Neutral        78
Name: new_score, dtype: int64
Positive    29343
Negative     5675
Neutral       248
Name: TextBlob_Analysis, dtype: int64
POSITIVE    27652
NEGATIVE     7468
NEUTRAL       146
Name: sentiment_type, dtype: int64
Neutral     23928
Positive     8337
Negative     2991
Name: SWN analysis, dtype: int64
Positive    27851
Negative     7347
Neutral        68
Name: newest_score, dtype: int64


In [106]:
df.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Cleaned Tweetcaptions,Lemma,NegativeScore,positiveScore,neutralScore,compound,...,npostiveScore,ncompound,nTextBlob_Polarity,average0,new_score,POS tagged,SWN analysis,SWN_score,average1,newest_score
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,...,0.11142,0.603092,-0.221735,0.558697,Positive,"[(Love, v), (vacation, n), (vibes, n), (amazin...",Positive,0.5,0.539132,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone under k Please vote and...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,...,-0.684608,0.547251,0.067092,0.567466,Positive,"[(Best, n), (Camera, n), (Smartphone, n), (k, ...",Positive,0.25,0.461644,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,...,-0.226289,0.431218,-0.342351,0.485475,Positive,"[(shree, a), (problem, n), (people, n), (probl...",Negative,-0.625,0.115317,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,...,0.280275,0.570822,-0.220917,0.547279,Positive,"[(Rhea, n), (Chakraborty, n), (Heartbreaking, ...",Neutral,0.0,0.364852,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,...,-0.141861,-0.48063,0.133556,0.207764,Positive,"[(stand, v), (Sunita, n), (Yadav, n), (Stop, n...",Neutral,0.0,0.138509,Positive


Cleaning the dataset by removing unwanted columns and rearranging and renaming the useful ones

In [109]:
df_final=df.drop(["Cleaned Tweetcaptions","nNegativeScore","npostiveScore","ncompound","nTextBlob_Polarity","nneutralScore","POS tagged","new_score","average0"],axis=1)
df_final.head()


Unnamed: 0,Data,Date,Time,tweetcaption,Lemma,NegativeScore,positiveScore,neutralScore,compound,sentiment_type,TextBlob_Polarity,TextBlob_Analysis,SWN analysis,SWN_score,average1,newest_score
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,POSITIVE,0.142695,Positive,Positive,0.5,0.539132,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,POSITIVE,0.200033,Positive,Positive,0.25,0.461644,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,POSITIVE,0.11875,Positive,Negative,-0.625,0.115317,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,POSITIVE,0.142857,Positive,Neutral,0.0,0.364852,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,POSITIVE,0.213228,Positive,Neutral,0.0,0.138509,Positive


In [113]:
df_final.rename(columns={'NegativeScore': 'Vader_negScore', 'positiveScore': 'Vader_posScore', 'neutralScore': 'Vader_neuScore', 'compound': 'Vader_compoundScore', 'sentiment_type': 'Vader_analysis', 'average1': 'newScore', 'newest_score': 'combinedAnalysis'}, inplace=True)
df_final.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Lemma,Vader_negScore,Vader_posScore,Vader_neuScore,Vader_compoundScore,Vader_analysis,TextBlob_Polarity,TextBlob_Analysis,SWN analysis,SWN_score,newScore,combinedAnalysis
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,POSITIVE,0.142695,Positive,Positive,0.5,0.539132,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,POSITIVE,0.200033,Positive,Positive,0.25,0.461644,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,POSITIVE,0.11875,Positive,Negative,-0.625,0.115317,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,POSITIVE,0.142857,Positive,Neutral,0.0,0.364852,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,POSITIVE,0.213228,Positive,Neutral,0.0,0.138509,Positive


In [115]:
oldcols=df_final.columns
oldcols

Index(['Data', 'Date', 'Time', 'tweetcaption', 'Lemma', 'Vader_negScore',
       'Vader_posScore', 'Vader_neuScore', 'Vader_compoundScore',
       'Vader_analysis', 'TextBlob_Polarity', 'TextBlob_Analysis',
       'SWN analysis', 'SWN_score', 'newScore', 'combinedAnalysis'],
      dtype='object')

In [119]:
newcols=['Data', 'Date', 'Time', 'tweetcaption', 'Lemma', 'Vader_negScore',
       'Vader_posScore', 'Vader_neuScore', 'Vader_compoundScore',
       'Vader_analysis', 'TextBlob_Polarity', 'TextBlob_Analysis',
       'SWN_score','SWN analysis', 'newScore', 'combinedAnalysis']
df_final = df_final.reindex(columns=newcols)
df_final.head()

Unnamed: 0,Data,Date,Time,tweetcaption,Lemma,Vader_negScore,Vader_posScore,Vader_neuScore,Vader_compoundScore,Vader_analysis,TextBlob_Polarity,TextBlob_Analysis,SWN_score,SWN analysis,newScore,combinedAnalysis
0,tuesdayvibes,7/14/2020,7:00:21,Love it here vacation vibes amazing beautiful ...,Love vacation vibe amaze beautiful cabo mexi...,0.041,0.159,0.8,0.9747,POSITIVE,0.142695,Positive,0.5,Positive,0.539132,Positive
1,realmeC11,7/14/2020,7:00:21,Best Camera Smartphone under 20k Please vote a...,Best Camera Smartphone k Please vote help re...,0.028,0.093,0.878,0.9349,POSITIVE,0.200033,Positive,0.25,Positive,0.461644,Positive
2,KPSharmaOli,7/14/2020,7:00:21,shree Why should we have a problem with the pe...,shree problem people problem Stupid Communis...,0.072,0.131,0.797,0.8522,POSITIVE,0.11875,Positive,-0.625,Negative,0.115317,Positive
3,RheaChakraborty,7/14/2020,7:00:21,Rhea Chakraborty s Heartbreaking Post On Susha...,Rhea Chakraborty Heartbreaking Post Sushant ...,0.114,0.173,0.713,0.9517,POSITIVE,0.142857,Positive,0.0,Neutral,0.364852,Positive
4,Stop_Transfer_Sunita_Yadav,7/14/2020,7:00:21,We stand for Sunita Yadav Stop the Transfer Wh...,stand Sunita Yadav Stop Transfer woman empow...,0.152,0.138,0.71,0.2023,POSITIVE,0.213228,Positive,0.0,Neutral,0.138509,Positive


In [121]:
print(df_final['TextBlob_Analysis'].value_counts())
print(df_final['Vader_analysis'].value_counts())
print(df_final['SWN analysis'].value_counts())
print(df_final['combinedAnalysis'].value_counts())

Positive    29343
Negative     5675
Neutral       248
Name: TextBlob_Analysis, dtype: int64
POSITIVE    27652
NEGATIVE     7468
NEUTRAL       146
Name: Vader_analysis, dtype: int64
Neutral     23928
Positive     8337
Negative     2991
Name: SWN analysis, dtype: int64
Positive    27851
Negative     7347
Neutral        68
Name: combinedAnalysis, dtype: int64
