## Sentiment Analysis of labeled twitter texts

In [8]:
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
pd.set_option("display.max_colwidth", 200)
import numpy as np
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

## 1. Read tweets

In [2]:

data=open("train.txt", encoding ='utf-8')
tweet=[]
label=[]
for line in data:
    split_data=line.split("\t")
    label.append(split_data[2])
    tweet.append(split_data[3])

In [6]:
my_data=pd.DataFrame(columns={"tweet","label"})
my_data["tweet"]=tweet
my_data["label"]=label
my_data

Unnamed: 0,tweet,label
0,#ArianaGrande Ari By Ariana Grande 80% Full https://t.co/ylhCMETHHW #Singer #Actress https://t.co/lTrb1JQiEA\n,0
1,Ariana Grande KIIS FM Yours Truly CD listening party in Burbank https://t.co/ClQIcx8Z6V #ArianaGrande\n,1
2,Ariana Grande White House Easter Egg Roll in Washington https://t.co/jdjL9swWM8 #ArianaGrande\n,1
3,#CD #Musics Ariana Grande Sweet Like Candy 3.4 oz 100 ML Sealed In Box 100% Authenic New https://t.co/oFmp0bOvZy… https://t.co/WIHLch9KtK\n,1
4,SIDE TO SIDE 😘 @arianagrande #sidetoside #arianagrande #musically #comunidadgay #lgbt🌈 #LOTB… https://t.co/tEd8rftAxV\n,1
...,...,...
12374,@dansen17 update: Zac Efron kissing a puppy https://t.co/4hTw1U8Azs\n,1
12375,#zac efron sex pic skins michelle sex https://t.co/EQFbbfuJSA\n,0
12376,First Look at Neighbors 2 with Zac Efron Shirtless! (PHOTO)... https://t.co/5C68gj5D9V #zacefron\n,1
12377,zac efron poses nude #lovely libra porn https://t.co/ukneYt2Wl2\n,1


In [6]:
my_data.shape

(12379, 2)

In [7]:
# how many unique labels
my_data.label.value_counts()

0     6194
-1    3545
1     2332
-2     177
2      131
Name: label, dtype: int64

## 2. Cleaning tweets

### 2.1. Removing usernames and URLs

In [11]:
def remove_input_pattern(pattern, input_string):
    input_string=re.sub(pattern,"",input_string)
    return input_string 

In [12]:
# Taken from https://www.w3resource.com/python-exercises/re/python-re-exercise-42.php
clean_tweets=[]
for line in my_data["tweet"]:
    clean_tweets.append(remove_input_pattern(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+|@[\w]+",line))

In [13]:
my_data["clean_tweets"]=clean_tweets

In [14]:
my_data.head()

Unnamed: 0,tweet,label,clean_tweets
0,#ArianaGrande Ari By Ariana Grande 80% Full https://t.co/ylhCMETHHW #Singer #Actress https://t.co/lTrb1JQiEA\n,0,#ArianaGrande Ari By Ariana Grande 80% Full #Singer #Actress \n
1,Ariana Grande KIIS FM Yours Truly CD listening party in Burbank https://t.co/ClQIcx8Z6V #ArianaGrande\n,1,Ariana Grande KIIS FM Yours Truly CD listening party in Burbank #ArianaGrande\n
2,Ariana Grande White House Easter Egg Roll in Washington https://t.co/jdjL9swWM8 #ArianaGrande\n,1,Ariana Grande White House Easter Egg Roll in Washington #ArianaGrande\n
3,#CD #Musics Ariana Grande Sweet Like Candy 3.4 oz 100 ML Sealed In Box 100% Authenic New https://t.co/oFmp0bOvZy… https://t.co/WIHLch9KtK\n,1,#CD #Musics Ariana Grande Sweet Like Candy 3.4 oz 100 ML Sealed In Box 100% Authenic New … \n
4,SIDE TO SIDE 😘 @arianagrande #sidetoside #arianagrande #musically #comunidadgay #lgbt🌈 #LOTB… https://t.co/tEd8rftAxV\n,1,SIDE TO SIDE 😘 #sidetoside #arianagrande #musically #comunidadgay #lgbt🌈 #LOTB… \n


### 2.2 Removing symbols and numbers

In [15]:
my_data["clean_tweets"]=my_data["clean_tweets"].str.replace("[^A-Za-z#]"," ")

In [16]:
my_data.head(10)

Unnamed: 0,tweet,label,clean_tweets
0,#ArianaGrande Ari By Ariana Grande 80% Full https://t.co/ylhCMETHHW #Singer #Actress https://t.co/lTrb1JQiEA\n,0,#ArianaGrande Ari By Ariana Grande Full #Singer #Actress
1,Ariana Grande KIIS FM Yours Truly CD listening party in Burbank https://t.co/ClQIcx8Z6V #ArianaGrande\n,1,Ariana Grande KIIS FM Yours Truly CD listening party in Burbank #ArianaGrande
2,Ariana Grande White House Easter Egg Roll in Washington https://t.co/jdjL9swWM8 #ArianaGrande\n,1,Ariana Grande White House Easter Egg Roll in Washington #ArianaGrande
3,#CD #Musics Ariana Grande Sweet Like Candy 3.4 oz 100 ML Sealed In Box 100% Authenic New https://t.co/oFmp0bOvZy… https://t.co/WIHLch9KtK\n,1,#CD #Musics Ariana Grande Sweet Like Candy oz ML Sealed In Box Authenic New
4,SIDE TO SIDE 😘 @arianagrande #sidetoside #arianagrande #musically #comunidadgay #lgbt🌈 #LOTB… https://t.co/tEd8rftAxV\n,1,SIDE TO SIDE #sidetoside #arianagrande #musically #comunidadgay #lgbt #LOTB
5,Hairspray Live! Previews at the Macy's Thanksgiving Day Parade! https://t.co/GaFTqInolL #arianagrande #televisionnbc\n,1,Hairspray Live Previews at the Macy s Thanksgiving Day Parade #arianagrande #televisionnbc
6,#LindsayLohan Is ‘Feeling Thankful’ After Blasting #ArianaGrande For Wearing ‘#TooMuch… https://t.co/Acf8ogvPxd https://t.co/EOSHtG9ay4\n,1,#LindsayLohan Is Feeling Thankful After Blasting #ArianaGrande For Wearing #TooMuch
7,I hate her but... I love her songs Dammit ._.#ArianaGrande\n,-1,I hate her but I love her songs Dammit #ArianaGrande
8,Ariana Grande 【Right There ft. Big Sean】#アリアナ #arianagrande https://t.co/4rZOKYDnut\n,0,Ariana Grande Right There ft Big Sean # #arianagrande
9,which one would you prefer to listen to for a whole day? 😍🤘🏼; i could never choose #arianagrande #IntoYou #SideToSide #songs #Poll\n,0,which one would you prefer to listen to for a whole day i could never choose #arianagrande #IntoYou #SideToSide #songs #Poll


## 3. Tokenizing and Lemtizing tweets

In [22]:
tokenized_tweets= my_data['clean_tweets'].apply(lambda x: x.split())
tokenized_tweets

0                                              [#ArianaGrande, Ari, By, Ariana, Grande, Full, #Singer, #Actress]
1                     [Ariana, Grande, KIIS, FM, Yours, Truly, CD, listening, party, in, Burbank, #ArianaGrande]
2                               [Ariana, Grande, White, House, Easter, Egg, Roll, in, Washington, #ArianaGrande]
3                     [#CD, #Musics, Ariana, Grande, Sweet, Like, Candy, oz, ML, Sealed, In, Box, Authenic, New]
4                          [SIDE, TO, SIDE, #sidetoside, #arianagrande, #musically, #comunidadgay, #lgbt, #LOTB]
                                                          ...                                                   
12374                                                                    [update, Zac, Efron, kissing, a, puppy]
12375                                                              [#zac, efron, sex, pic, skins, michelle, sex]
12376                                [First, Look, at, Neighbors, with, Zac, Efron, Shirtless, P

In [23]:
from nltk.corpus import wordnet

wordnet_lemmatizer = WordNetLemmatizer()

In [24]:
def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)

In [25]:
tokenized_tweets=tokenized_tweets.apply(lambda x: [wordnet_lemmatizer.lemmatize(i,get_wordnet_pos(i)) for i in x])

In [26]:
tokenized_tweets.head()

0                             [#ArianaGrande, Ari, By, Ariana, Grande, Full, #Singer, #Actress]
1       [Ariana, Grande, KIIS, FM, Yours, Truly, CD, listen, party, in, Burbank, #ArianaGrande]
2              [Ariana, Grande, White, House, Easter, Egg, Roll, in, Washington, #ArianaGrande]
3    [#CD, #Musics, Ariana, Grande, Sweet, Like, Candy, oz, ML, Sealed, In, Box, Authenic, New]
4         [SIDE, TO, SIDE, #sidetoside, #arianagrande, #musically, #comunidadgay, #lgbt, #LOTB]
Name: clean_tweets, dtype: object

In [27]:
for i in range(len(tokenized_tweets)):
    tokenized_tweets[i]=" ".join(tokenized_tweets[i])
my_data['clean_tweets']=tokenized_tweets

In [28]:
my_data.head(10)

Unnamed: 0,tweet,label,clean_tweets
0,#ArianaGrande Ari By Ariana Grande 80% Full https://t.co/ylhCMETHHW #Singer #Actress https://t.co/lTrb1JQiEA\n,0,#ArianaGrande Ari By Ariana Grande Full #Singer #Actress
1,Ariana Grande KIIS FM Yours Truly CD listening party in Burbank https://t.co/ClQIcx8Z6V #ArianaGrande\n,1,Ariana Grande KIIS FM Yours Truly CD listen party in Burbank #ArianaGrande
2,Ariana Grande White House Easter Egg Roll in Washington https://t.co/jdjL9swWM8 #ArianaGrande\n,1,Ariana Grande White House Easter Egg Roll in Washington #ArianaGrande
3,#CD #Musics Ariana Grande Sweet Like Candy 3.4 oz 100 ML Sealed In Box 100% Authenic New https://t.co/oFmp0bOvZy… https://t.co/WIHLch9KtK\n,1,#CD #Musics Ariana Grande Sweet Like Candy oz ML Sealed In Box Authenic New
4,SIDE TO SIDE 😘 @arianagrande #sidetoside #arianagrande #musically #comunidadgay #lgbt🌈 #LOTB… https://t.co/tEd8rftAxV\n,1,SIDE TO SIDE #sidetoside #arianagrande #musically #comunidadgay #lgbt #LOTB
5,Hairspray Live! Previews at the Macy's Thanksgiving Day Parade! https://t.co/GaFTqInolL #arianagrande #televisionnbc\n,1,Hairspray Live Previews at the Macy s Thanksgiving Day Parade #arianagrande #televisionnbc
6,#LindsayLohan Is ‘Feeling Thankful’ After Blasting #ArianaGrande For Wearing ‘#TooMuch… https://t.co/Acf8ogvPxd https://t.co/EOSHtG9ay4\n,1,#LindsayLohan Is Feeling Thankful After Blasting #ArianaGrande For Wearing #TooMuch
7,I hate her but... I love her songs Dammit ._.#ArianaGrande\n,-1,I hate her but I love her song Dammit #ArianaGrande
8,Ariana Grande 【Right There ft. Big Sean】#アリアナ #arianagrande https://t.co/4rZOKYDnut\n,0,Ariana Grande Right There ft Big Sean # #arianagrande
9,which one would you prefer to listen to for a whole day? 😍🤘🏼; i could never choose #arianagrande #IntoYou #SideToSide #songs #Poll\n,0,which one would you prefer to listen to for a whole day i could never choose #arianagrande #IntoYou #SideToSide #songs #Poll


## 4. Analysis of the tweets 

### 4.1 Most Common Words in Positive Tweets

In [33]:
pip install wordcloud

Collecting wordcloud
  Using cached wordcloud-1.8.1.tar.gz (220 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


  error: subprocess-exited-with-error
  
  python setup.py bdist_wheel did not run successfully.
  exit code: 1
  
  [59 lines of output]
  running bdist_wheel
  running build
  running build_py
  UPDATING build\lib.win-amd64-3.9\wordcloud/_version.py
  set build\lib.win-amd64-3.9\wordcloud/_version.py to '1.8.1'
  running build_ext



Building wheels for collected packages: wordcloud
  Building wheel for wordcloud (setup.py): started
  Building wheel for wordcloud (setup.py): finished with status 'error'
  Running setup.py clean for wordcloud
Failed to build wordcloud
Installing collected packages: wordcloud
  Running setup.py install for wordcloud: started
  Running setup.py install for wordcloud: finished with status 'error'


  building 'wordcloud.query_integral_image' extension
  Traceback (most recent call last):
    File "<string>", line 2, in <module>
    File "<pip-setuptools-caller>", line 34, in <module>
    File "C:\Users\Beck\AppData\Local\Temp\pip-install-lum5ykk1\wordcloud_6905e65376c94876b3913dc048a3c89e\setup.py", line 9, in <module>
      setup(
    File "C:\Users\Beck\projectDir\venv\lib\site-packages\setuptools\__init__.py", line 155, in setup
      return distutils.core.setup(**attrs)
    File "C:\Users\Beck\projectDir\venv\lib\site-packages\setuptools\_distutils\core.py", line 148, in setup
      return run_commands(dist)
    File "C:\Users\Beck\projectDir\venv\lib\site-packages\setuptools\_distutils\core.py", line 163, in run_commands
      dist.run_commands()
    File "C:\Users\Beck\projectDir\venv\lib\site-packages\setuptools\_distutils\dist.py", line 967, in run_commands
      self.run_command(cmd)
    File "C:\Users\Beck\projectDir\venv\lib\site-packages\setuptools\_distutils\dist.py"

In [34]:
my_data["label"]=pd.to_numeric(my_data["label"])
all_non_words=" ".join(my_data[my_data['label']==1].clean_tweets)

In [35]:
all_non_words

'Ariana Grande KIIS FM Yours Truly CD listen party in Burbank #ArianaGrande Ariana Grande White House Easter Egg Roll in Washington #ArianaGrande #CD #Musics Ariana Grande Sweet Like Candy oz ML Sealed In Box Authenic New SIDE TO SIDE #sidetoside #arianagrande #musically #comunidadgay #lgbt #LOTB Hairspray Live Previews at the Macy s Thanksgiving Day Parade #arianagrande #televisionnbc #LindsayLohan Is Feeling Thankful After Blasting #ArianaGrande For Wearing #TooMuch My idol be #littlemix #justinbieber #arianagrande #Beauty #ArianaGrande CHRISTMAS CHILL JAPAN ONLY #CD BONUS TRACK C #Deals #TopSeller #Beauty #ArianaGrande THE REMIX JAPAN ONLY #CD E #Deals #TopSeller One last time #ArianaGrande so much love for this woman ughh #arianagrande thanks God it s Friday #Thanksgiving #arianagrande #arianator New on Big fan of and #ArianaGrande #NickiMinaj #Barbies #Barbz #Arianators Love it #Chicago #cop gun down #BlackLivesMatter thug in #Chicago #LOL #NAACP #BlueLivesMatter #BlackTwitter Kee

In [36]:
from wordcloud import WordCloud, STOPWORDS

ModuleNotFoundError: No module named 'wordcloud'

In [None]:
wordcloud = WordCloud(width = 800, height = 800,
                background_color ='white',
                stopwords = stopwords,
                min_font_size = 10).generate(comment_words)
 
# plot the WordCloud image                      
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
 
plt.show()

In [25]:
import sys
print(sys.executable)

C:\Users\Beck\projectDir\venv\Scripts\python.exe


In [26]:
#from wordcloud import WordCloud
#wordcloud=WordCloud(width=800,height=500,random_state=21,max_font_size=110).generate(all_non_words)
#fig=plt.figure(figsize=(10,7))
#plt.imshow(wordcloud,interpolation="bilinear")
#plt.axis("off")
#plt.show()

#### Most Common words in Negative Tweets

In [27]:
all_racistwords=" ".join(my_data[my_data['label']==-1].cl_tweets)
wrd_cld=WordCloud(height=500,width=800,random_state=21,max_font_size=110).generate(all_racistwords)
fig=plt.figure(figsize=(10,7))
plt.imshow(wrd_cld,interpolation="bilinear")
plt.axis("off")
plt.show()

NameError: name 'WordCloud' is not defined

##### Checking the impact of Hashtags

In [None]:
## 1) Positive Tweets
positive=[]
for x in my_data[my_data["label"]==1].cl_tweets:
    all_hashtags=re.findall(r"#[\w]+",x)
    positive.append(all_hashtags)

In [None]:
positive=sum(positive,[])
positive[:7]

In [None]:
## 2) Negative Tweets
negative=[]
for x in my_data[my_data["label"]==-1].cl_tweets:
    all_hashtags=re.findall("#[\w]+",x)
    negative.append(all_hashtags)

In [None]:
negative[:6]

In [None]:
negative=sum(negative,[])

In [None]:
a= nltk.FreqDist(positive)

In [None]:
df_positive=pd.DataFrame({"Hashtags":list(a.keys()),"Count":list(a.values())})

In [None]:
df_positive.head()

In [None]:
b= nltk.FreqDist(negative)
df_negative=pd.DataFrame({"Hashtags":list(b.keys()),"Count":list(b.values())})
df_negative.head(10)

In [None]:
df_positive=df_positive.nlargest(columns="Count",n=20)
df_negative=df_negative.nlargest(columns="Count",n=20)

### Plotting the top 20 hashtags in Positive tweets

In [None]:
sns.set_theme(style="whitegrid")
fig=plt.figure(figsize=(20,8))
sns.barplot(data=df_positive,x="Hashtags",y="Count", palette="Blues_d")
plt.title("Top 20 Hashtags for Positive Tweets")
plt.rc('font', size=16)
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.set_theme(style="whitegrid")
fig=plt.figure(figsize=(20,9))
sns.barplot(data=df_negative,x="Hashtags",y="Count", palette="Blues_d")
plt.title("Top 20 Hashtags for Negative Tweets")
plt.rc('font', size=14)
plt.xticks(rotation=90)
plt.show()

##### Extracting Features using various approaches

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
import gensim

#### Using Bag-of-Words Approach

In [None]:
bag_of_words=CountVectorizer(max_df=0.90,min_df=2,max_features=1000,stop_words="english")
bag_words=bag_of_words.fit_transform(my_data["cl_tweets"])

#### Using TF-IDF Approach

In [None]:
tfidf_vectorizer = TfidfVectorizer(max_df=0.90, min_df=2, max_features=1000, stop_words='english')
tfidf = tfidf_vectorizer.fit_transform(my_data["cl_tweets"])
tfidf.shape

#### Using Word2Vec Approach

In [None]:
tokenized_tweet = my_data["cl_tweets"].apply(lambda x: x.split()) # tokenizing

model_w2v = gensim.models.Word2Vec(
            tokenized_tweet,
            vector_size=200, # desired no. of features/independent variables 
            window=5, # context window size
            min_count=2,
            sg = 1, # 1 for skip-gram model
            hs = 0,
            negative = 10, # for negative sampling
            workers= 2, # no.of cores
            seed = 34)

model_w2v.train(tokenized_tweet, total_examples= len(my_data["cl_tweets"]), epochs=20)

In [None]:
def word_vector(tokens, size):
    vec = np.zeros(size).reshape((1, size))
    count = 0.
    for word in tokens:
        try:
            vec += model_w2v[word].reshape((1, size))
            count += 1.
        except KeyError: # handling the case where the token is not in vocabulary
                         
            continue
    if count != 0:
        vec /= count
    return vec

In [None]:
wordvec_arrays = np.zeros((len(tokenized_tweet), 200))

In [None]:
for i in range(len(tokenized_tweet)):
    print(i, tokenized_tweet[i])
    break

In [None]:
wordvec_arrays = np.zeros((len(tokenized_tweet), 200))
vec = []
for i in range(len(tokenized_tweet)):
    #wordvec_arrays[i,:] = word_vector(tokenized_tweet[i], 200)
    
    vec += wordvec_arrays[i].reshape((1, 200))
    
wordvec_df = pd.DataFrame(vec)
wordvec_df.shape 

In [None]:
 vec += model_w2v.wv[word].reshape((1, size))

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,accuracy_score

#### Building Model through Logistic Regression on Bag_Of_Words_Approach

In [None]:

#train_bow = bag_words[:31962,:]
#test_bow = bag_words[31962:,:]

# splitting data into training and validation set
xtrain_bow, xvalid_bow, ytrain, yvalid = train_test_split(bag_words,my_data['label'],  
                                                          random_state=42, 
                                                          test_size=0.3)
lreg = LogisticRegression()
lreg.fit(xtrain_bow, ytrain) # training the model

prediction = lreg.predict_proba(xvalid_bow) # predicting on the validation set
prediction_int = prediction[:,1] >= 0.4 # if prediction is greater than or equal to 0.4 than 1 else 0
prediction_int = prediction_int.astype(np.int)


print("Accuracy Score ",accuracy_score(yvalid,prediction_int))

#### Building Model through Logistic Regression on TF-IDF Approach

In [None]:
xtrain_tfidf = tfidf[ytrain.index]
xvalid_tfidf = tfidf[yvalid.index]
lreg.fit(xtrain_tfidf, ytrain)

prediction = lreg.predict_proba(xvalid_tfidf)
prediction_int = prediction[:,1] >= 0.3
prediction_int = prediction_int.astype(np.int)
print("Accuracy Score",accuracy_score(yvalid, prediction_int))

#### Building Model through Logistic Regression on Word2Vec Approach

In [None]:
xtrain_w2v = wordvec_df.iloc[ytrain.index,:]
xvalid_w2v = wordvec_df.iloc[yvalid.index,:]

lreg.fit(xtrain_w2v, ytrain)

prediction = lreg.predict_proba(xvalid_w2v)
prediction_int = prediction[:,1] >= 0.3
prediction_int = prediction_int.astype(np.int)
print("Accuracy Score",accuracy_score(yvalid, prediction_int))

#### Building Model Through SVM on Bag Of Words Approach

In [None]:
from sklearn import svm
svc = svm.SVC(kernel='linear', C=1).fit(xtrain_bow, ytrain)

prediction = svc.predict(xvalid_bow)

print("Accuracy Score",accuracy_score(yvalid, prediction))

#### Building Model Through SVM on TF-IDF

In [None]:
svc = svm.SVC(kernel='linear', C=1).fit(xtrain_tfidf, ytrain)

prediction = svc.predict(xvalid_tfidf)

print("Accuracy Score", accuracy_score(yvalid, prediction))

#### Building Model Through SVM on Word2Vec

In [None]:
svc = svm.SVC(kernel='linear', C=1).fit(xtrain_w2v, ytrain)

prediction = svc.predict(xvalid_w2v)

print("Accuracy Score", accuracy_score(yvalid, prediction))

#### Building Model through Random Forest On Bag Of Words Approach

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=400, random_state=11).fit(xtrain_bow, ytrain)

prediction = rf.predict(xvalid_bow)
print("Accuracy Score ",accuracy_score(yvalid, prediction))

#### Building Model through Random Forest On TF-IDF Approach

In [None]:
rf = RandomForestClassifier(n_estimators=400, random_state=11).fit(xtrain_tfidf, ytrain)

prediction = rf.predict(xvalid_tfidf)
print("Accuracy Score ",accuracy_score(yvalid, prediction))

#### Building Model through Random Forest On Word2Vec Approach

In [None]:
rf = RandomForestClassifier(n_estimators=400, random_state=11).fit(xtrain_w2v, ytrain)

prediction = rf.predict(xvalid_w2v)

print("Accuracy Score ",accuracy_score(yvalid,prediction))

#### Building Model through Naive Bayes on Bags of Word Approach

In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB
nb=MultinomialNB().fit(xtrain_bow,ytrain)
prediction=nb.predict(xvalid_bow)
print("Accuracy Score ",accuracy_score(yvalid,prediction))

#### Building Model through Naive Bayes on TF-IDF Approach

In [None]:
nb=MultinomialNB().fit(xtrain_tfidf,ytrain)
prediction=nb.predict(xvalid_tfidf)
print("Accuracy Score ",accuracy_score(yvalid,prediction))

#### Building Model through Naive Bayes on Word2Vec Approach

In [None]:
nb=GaussianNB().fit(xtrain_w2v,ytrain)
prediction=nb.predict(xvalid_w2v)
print("Accuracy Score",accuracy_score(yvalid,prediction))