# Sentimnet Analysis using Multinomial Naive Bayes and Tfidf vectorizer

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!ls '/content/drive/My Drive/Final_Practicum/sentiment140'

All_Tweets_cleaned.csv	  senti_airline_naive_tfidf.ipynb
All_Tweets_cleaned.ipynb  Sentiment140_processing2.ipynb
All_Tweets.csv		  Sentiment_Analysis_Dataset.csv
AllTweets.csv		  Sentiment_cleaned.csv
LSTM_Base		  Tfidf_Naive
model.h5		  Tweets.ipynb


In [0]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer

###Reading cleaned dataset of Sentiment140 and Election dataset

In [0]:
data = pd.read_csv('/content/drive/My Drive/Final_Practicum/sentiment140/Sentiment_cleaned.csv', usecols=['cleaned_tweets', 'label'], encoding="ISO-8859-1")
data.head(10)

Unnamed: 0,label,cleaned_tweets
0,1,opsi meant sayhope left arm ok drakardnoir
1,0,I want ice cream
2,1,Im happy right
3,1,I contribute probably HA
4,1,Heading practicete Have great day everyone
5,0,saturday night plan hasnt happen really long time
6,1,Hi Beck Im back Brisy Weekend good although wo...
7,0,My family say music depress beach Haha
8,1,pretty bore Just thinkin Haley Williams right ...
9,0,oh well thats good Mine kinda cold wish still ...


In [0]:
All_Tweets = pd.read_csv('/content/drive/My Drive/Final_Practicum/sentiment140/All_Tweets_cleaned.csv', encoding="ISO-8859-1")
All_Tweets.head(10)

Unnamed: 0,tweet_id,text
0,1.10163e+18,Modi demonstrate Kashmir potentially flashpoin...
1,1.10163e+18,Vande mataram sir
2,1.10163e+18,You Like Jhansi Kiani For All Nationalist Indi...
3,1.10163e+18,If I run frm home still leave make battle life...
4,1.10163e+18,Meri oqat hai ki mein ek Indian hu teri oqat y...
5,1.10163e+18,Dont know retain Abhi would helpedeleasing put...
6,1.10163e+18,Jay hind salute Abhinandan
7,1.10163e+18,sir alot respect come pakistan say release ind...
8,1.10163e+18,UNICEFhtpstwitercomSampathTstatus
9,1.10163e+18,Vande besharam joke modi


### TfIdf Vectorization on text of both datasets

In [0]:
vectorizer = TfidfVectorizer(stop_words = 'english')

# Fit the training data and then return the matrix
vectorized_data = vectorizer.fit_transform(data['cleaned_tweets'].values.astype('U'))
vectorized_election_data = vectorizer.transform(All_Tweets['text'].values.astype('U'))



###Splitting labbelled dataset i.e. Sentiment140 into train and text

In [0]:
seed= 7
test_size=0.2
x_train,x_test, y_train, y_test= train_test_split(vectorized_data, data['label'], test_size=test_size, random_state=seed)

###Training the model using Multinomial Naive Bayes 

In [0]:
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()

In [0]:
nb.fit(x_train, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

###Predicting the results on test data

In [0]:
y_pred_class = nb.predict(x_test)

###Model Evaluation through accuracy ,F1 score and confusion matrix

In [0]:
from sklearn import metrics

metrics.accuracy_score(y_test, y_pred_class)

0.754340625

In [0]:
metrics.f1_score(y_test, y_pred_class)

0.7526267924967667

In [0]:
metrics.confusion_matrix(y_test, y_pred_class)

array([[121803,  38470],
       [ 40141, 119586]])

### Prediction of sentiments on Election data

In [0]:
y_pred_Election_Tweets = nb.predict(vectorized_election_data)

In [0]:

All_Tweets['label']=pd.Series(y_pred_Election_Tweets.tolist())
All_Tweets.head(10)

Unnamed: 0,tweet_id,text,label
0,1.10163e+18,Modi demonstrate Kashmir potentially flashpoin...,1
1,1.10163e+18,Vande mataram sir,1
2,1.10163e+18,You Like Jhansi Kiani For All Nationalist Indi...,0
3,1.10163e+18,If I run frm home still leave make battle life...,0
4,1.10163e+18,Meri oqat hai ki mein ek Indian hu teri oqat y...,1
5,1.10163e+18,Dont know retain Abhi would helpedeleasing put...,0
6,1.10163e+18,Jay hind salute Abhinandan,1
7,1.10163e+18,sir alot respect come pakistan say release ind...,1
8,1.10163e+18,UNICEFhtpstwitercomSampathTstatus,1
9,1.10163e+18,Vande besharam joke modi,0


In [0]:
All_Tweets.to_csv('/content/drive/My Drive/Final_Practicum/sentiment140/Tfidf_Naive/new_Predicted_Tweets.csv', header=True, index=False)

In [0]:
data=pd.read_csv('/content/drive/My Drive/Final_Practicum/sentiment140/Tfidf_Naive/new_Predicted_Tweets.csv')

In [0]:
data.head()

Unnamed: 0,tweet_id,text,label
0,1.10163e+18,Modi demonstrate Kashmir potentially flashpoin...,1
1,1.10163e+18,Vande mataram sir,1
2,1.10163e+18,You Like Jhansi Kiani For All Nationalist Indi...,0
3,1.10163e+18,If I run frm home still leave make battle life...,0
4,1.10163e+18,Meri oqat hai ki mein ek Indian hu teri oqat y...,1


###Assigning a label as positive and negative to the integer values

In [0]:
data.label[data.label == 1] = 'positive'
data.label[data.label == 0] = 'negative'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [0]:
print(data.head(20))

        tweet_id                                               text     label
0   1.101630e+18  Modi demonstrate Kashmir potentially flashpoin...  positive
1   1.101630e+18                                  Vande mataram sir  positive
2   1.101630e+18  You Like Jhansi Kiani For All Nationalist Indi...  negative
3   1.101630e+18  If I run frm home still leave make battle life...  negative
4   1.101630e+18  Meri oqat hai ki mein ek Indian hu teri oqat y...  positive
5   1.101630e+18  Dont know retain Abhi would helpedeleasing put...  negative
6   1.101630e+18                         Jay hind salute Abhinandan  positive
7   1.101630e+18  sir alot respect come pakistan say release ind...  positive
8   1.101630e+18                  UNICEFhtpstwitercomSampathTstatus  positive
9   1.101630e+18                           Vande besharam joke modi  negative
10  1.101630e+18  Ho apena scoperto che Andrew durante la regist...  positive
11  1.101630e+18                                      Thanks Mod

###Saving the Predicted election data

In [0]:
data.to_csv('/content/drive/My Drive/Final_Practicum/sentiment140/Tfidf_Naive/Predicted_data.csv', header=True, index=False)