# Sentiment Analysis using Naive Bayes Algorithms

In [78]:
# Importing Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [79]:
rev = pd.read_csv("rev.csv")

In [80]:
rev.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [81]:
# First row of review
rev["review"][0]

"One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fa

In [82]:
rev["sentiment"].value_counts()

negative    25000
positive    25000
Name: sentiment, dtype: int64

In [83]:
# rev["sentiment"].value_counts().plot.bar()

In [84]:
rev["sentiment"].replace({"positive":1,"negative":0 }, inplace = True)

In [85]:
rev["review"]

0        One of the other reviewers has mentioned that ...
1        A wonderful little production. <br /><br />The...
2        I thought this was a wonderful way to spend ti...
3        Basically there's a family where a little boy ...
4        Petter Mattei's "Love in the Time of Money" is...
                               ...                        
49995    I thought this movie did a down right good job...
49996    Bad plot, bad dialogue, bad acting, idiotic di...
49997    I am a Catholic taught in parochial elementary...
49998    I'm going to have to disagree with the previou...
49999    No one expects the Star Trek movies to be high...
Name: review, Length: 50000, dtype: object

In [86]:
# We have certain characters like < , >, / , which has to be cleaned

In [87]:
import re

In [88]:
# Creating a function to clean <>.* characters

def clean(text):
    rep = re.compile(r"<.*>")
    return re.sub(rep,"",text)

In [89]:
rev["review"].apply(clean)

0        One of the other reviewers has mentioned that ...
1        A wonderful little production. The realism rea...
2        I thought this was a wonderful way to spend ti...
3        Basically there's a family where a little boy ...
4        Petter Mattei's "Love in the Time of Money" is...
                               ...                        
49995    I thought this movie did a down right good job...
49996    Bad plot, bad dialogue, bad acting, idiotic di...
49997    I am a Catholic taught in parochial elementary...
49998    I'm going to have to disagree with the previou...
49999    No one expects the Star Trek movies to be high...
Name: review, Length: 50000, dtype: object

In [90]:
rev["review"] = rev["review"].apply(clean)

In [91]:
rev["review"][0]

"One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.I would say the main appeal of the show is due to the fact that it goes where other shows wouldn't dare. Forget pretty pictures painted for mainstream audiences, forget charm, forget romance...OZ doesn't mess around. The first episode I ever saw struck me as so nasty it was surreal, I couldn't say I was ready for it, but as I watched more, I developed a taste for Oz, and got accustomed to the high levels of graphic violence. Not just violence, but injustice (crooked guards who'll be sold out for a nickel, inmates who'll kill on order and get away with it, well mannered, middle class inmates being turned into prison bitches due to their lack of street skills or prison experience) Watching Oz, you may become comfortable with what is uncomfortable viewing....thats if you can get in touch with your darker side."

In [92]:
# Creating a function to clean punctuation marks

def pure(text):
    st = ""
    for i in text:
        if i.isalnum(): 
            st+=i
        else:
            st += " "
    return st

In [93]:
pure(rev["review"][0])

'One of the other reviewers has mentioned that after watching just 1 Oz episode you ll be hooked  They are right  as this is exactly what happened with me I would say the main appeal of the show is due to the fact that it goes where other shows wouldn t dare  Forget pretty pictures painted for mainstream audiences  forget charm  forget romance   OZ doesn t mess around  The first episode I ever saw struck me as so nasty it was surreal  I couldn t say I was ready for it  but as I watched more  I developed a taste for Oz  and got accustomed to the high levels of graphic violence  Not just violence  but injustice  crooked guards who ll be sold out for a nickel  inmates who ll kill on order and get away with it  well mannered  middle class inmates being turned into prison bitches due to their lack of street skills or prison experience  Watching Oz  you may become comfortable with what is uncomfortable viewing    thats if you can get in touch with your darker side '

In [94]:
rev["review"] = rev["review"].apply(pure)

In [95]:
rev.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production The realism rea...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there s a family where a little boy ...,0
4,Petter Mattei s Love in the Time of Money is...,1


In [96]:
# transforming into lowercase
rev["review"] = rev["review"].str.lower()

In [97]:
# Removing stopwords

# stop words are most common words in any language (like articles, prepositions, pronouns, conjunctions, etc)
# and does not add much information to the text.

In [98]:
from nltk.corpus import stopwords

In [99]:
import nltk

In [100]:
# If stopwords are not downloaded

# nltk.download('stopwords')

In [101]:
stop_words = stopwords.words("english")

In [102]:
print(stop_words)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [103]:
# Tokenizing sentences : means to split each word in a sentence, or we can also use split()

In [104]:
from nltk.tokenize import word_tokenize

In [105]:
print(word_tokenize(rev["review"][0]))

['one', 'of', 'the', 'other', 'reviewers', 'has', 'mentioned', 'that', 'after', 'watching', 'just', '1', 'oz', 'episode', 'you', 'll', 'be', 'hooked', 'they', 'are', 'right', 'as', 'this', 'is', 'exactly', 'what', 'happened', 'with', 'me', 'i', 'would', 'say', 'the', 'main', 'appeal', 'of', 'the', 'show', 'is', 'due', 'to', 'the', 'fact', 'that', 'it', 'goes', 'where', 'other', 'shows', 'wouldn', 't', 'dare', 'forget', 'pretty', 'pictures', 'painted', 'for', 'mainstream', 'audiences', 'forget', 'charm', 'forget', 'romance', 'oz', 'doesn', 't', 'mess', 'around', 'the', 'first', 'episode', 'i', 'ever', 'saw', 'struck', 'me', 'as', 'so', 'nasty', 'it', 'was', 'surreal', 'i', 'couldn', 't', 'say', 'i', 'was', 'ready', 'for', 'it', 'but', 'as', 'i', 'watched', 'more', 'i', 'developed', 'a', 'taste', 'for', 'oz', 'and', 'got', 'accustomed', 'to', 'the', 'high', 'levels', 'of', 'graphic', 'violence', 'not', 'just', 'violence', 'but', 'injustice', 'crooked', 'guards', 'who', 'll', 'be', 'sold'

In [106]:
# Creating a function to clean stopwords

sw = stopwords.words("english")
def rem_sw(text):
    lst = []
    token = word_tokenize(text)
    for i in token:
        if i not in sw:
            lst.append(i)
    return lst

In [107]:
rev["review"].apply(rem_sw)

0        [one, reviewers, mentioned, watching, 1, oz, e...
1        [wonderful, little, production, realism, reall...
2        [thought, wonderful, way, spend, time, hot, su...
3        [basically, family, little, boy, jake, thinks,...
4        [petter, mattei, love, time, money, visually, ...
                               ...                        
49995    [thought, movie, right, good, job, creative, o...
49996    [bad, plot, bad, dialogue, bad, acting, idioti...
49997    [catholic, taught, parochial, elementary, scho...
49998    [going, disagree, previous, comment, side, mal...
49999    [one, expects, star, trek, movies, high, art, ...
Name: review, Length: 50000, dtype: object

In [108]:
rev["review"] = rev["review"].apply(rem_sw)

In [109]:
# Stemming the words:

# Stemming means to reduce words in their base form

# The words care, cared and caring lie under the same stem ‘care’.

In [110]:
from nltk.stem import SnowballStemmer

In [111]:
ss = SnowballStemmer("english")

In [112]:
def stem_text(text_lst):
    lst = []
    for i in text_lst:
        lst.append(ss.stem(i))
    return " ".join(lst)

In [113]:
stem_text(rev["review"][0])

'one review mention watch 1 oz episod hook right exact happen would say main appeal show due fact goe show dare forget pretti pictur paint mainstream audienc forget charm forget romanc oz mess around first episod ever saw struck nasti surreal say readi watch develop tast oz got accustom high level graphic violenc violenc injustic crook guard sold nickel inmat kill order get away well manner middl class inmat turn prison bitch due lack street skill prison experi watch oz may becom comfort uncomfort view that get touch darker side'

In [114]:
rev["review"] = rev["review"].apply(stem_text)

In [115]:
rev.head()

Unnamed: 0,review,sentiment
0,one review mention watch 1 oz episod hook righ...,1
1,wonder littl product realism realli come home ...,1
2,thought wonder way spend time hot summer weeke...,1
3,basic famili littl boy jake think zombi closet...,0
4,petter mattei love time money visual stun film...,1


In [116]:
from sklearn.feature_extraction.text import CountVectorizer

In [117]:
# CountVectorizer creates a matrix in which each unique word is represented by a column of the matrix
# , and each text sample from the document is a row in the matrix.
# The value of each cell is the count of the word in that particular text sample

In [118]:
cv = CountVectorizer(max_features = 1000)

In [119]:
x = cv.fit_transform(rev["review"]).toarray()

In [120]:
x.shape

(50000, 1000)

In [121]:
y = rev["sentiment"]

In [122]:
from sklearn.model_selection import train_test_split

In [123]:
x_train,x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 2022)

In [124]:
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB

In [125]:
m,g,b = MultinomialNB(),GaussianNB(), BernoulliNB()

In [126]:
from sklearn.metrics import accuracy_score

In [127]:
# Applying MultinomialNB

m.fit(x_train, y_train)
predict = m.predict(x_test)
actual = y_test
accuracy_score(predict, actual)*100

81.28

In [128]:
# Applying GaussianNB

g.fit(x_train, y_train)
predict = g.predict(x_test)
actual = y_test
accuracy_score(predict, actual)*100

78.11

In [129]:
# Applying BernoulliNB

b.fit(x_train, y_train)
predict = b.predict(x_test)
actual = y_test
accuracy_score(predict, actual)*100

81.56