##### Import the basic libraries

In [1]:
import pandas as pd
import numpy as np

##### Import the dataset in the form of dataframe

In [2]:
df = pd.read_csv('movie.csv')

In [3]:
df = df.iloc[:500,:]
df.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


##### Data Cleaning

In [24]:
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

ps = PorterStemmer()
all_stopwords = set(stopwords.words('english'))

In [25]:
sentence = []

for word in df.text[:]:
    review = re.sub('[^a-zA-Z]',' ',word)
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in all_stopwords]
    review = ' '.join(review)
    sentence.append(review)

In [26]:
sentence

['grew b watch love thunderbird mate school watch play thunderbird school lunch school want virgil scott one want alan count becam art form took children see movi hope would get glimps love child bitterli disappoint high point snappi theme tune could compar origin score thunderbird thank earli saturday morn one televis channel still play rerun seri gerri anderson wife creat jonatha frake hand director chair version complet hopeless wast film utter rubbish cgi remak may accept replac marionett homo sapien subsp sapien huge error judgment',
 'put movi dvd player sat coke chip expect hope movi would contain strong point first movi awsom anim good flow stori excel voic cast funni comedi kick ass soundtrack disappoint found atlanti milo return read review first might let follow paragraph direct seen first movi enjoy primarili point mention br br first scene appear shock pick atlanti milo return display case local videoshop whatev expect music feel bad imit first movi voic cast replac fit on

##### Data Transformation

In [27]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1420)

In [28]:
x = cv.fit_transform(sentence).toarray()
x.shape

(500, 1420)

In [29]:
y = df.iloc[:,-1].values

In [30]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 42)

##### Model training

In [31]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, y_train)

GaussianNB()

In [32]:
x_test

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 2, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

##### Testing model performance

In [33]:
y_pred = classifier.predict(x_test)

In [34]:
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1], dtype=int64)

In [35]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[43 17]
 [14 26]]


In [36]:
accuracy_score(y_test, y_pred)

0.69

##### Fresh dataset

In [44]:
reviews = [
    "Why do people who do not know what a particular time in the past was like feel the need to try to define that time for others? Replace Woodstock with the Civil War and the Apollo moon-landing with the Titanic sinking and you've got as realistic a flick as this formulaic soap opera populated entirely by low-life trash. Is this what kids who were too young to be allowed to go to Woodstock and who failed grade school composition do? ""I'll show those old meanies, I'll put out my own movie and prove that you don't have to know nuttin about your topic to still make money!"" Yeah, we already know that. The one thing watching this film did for me was to give me a little insight into underclass thinking. The next time I see a slut in a bar who looks like Diane Lane, I'm running the other way. It's child abuse to let parents that worthless raise kids. It's audience abuse to simply stick Woodstock and the moonlanding into a flick as if that ipso facto means the film portrays 1969.",
    "Amateur, no budget films can be surprisingly good ... this however is not one of them.<br /><br />Ah, another Brad Sykes atrocity. The acting is hideous, except for Emmy Smith who shows some promise. The camera ""direction"" needs serious reworking. And no more ""hold the camera and run"" gimmicks either; it just doesn't work. The special effects are unimaginative, there's a problem when the effect can be identified in real time. If you're going to rip off an ear, please don't let us see the actor's real ear beneath the blood. The scenery is bland and boring (same as Mr. Sykes other ventures), and the music is a cross between cheap motel porn and really bad guitar driven metal (see the scenery comment).<br /><br />Did I mention the lack of any real plot, or character development? Apparently, the scriptwriter didn't.<br /><br />Whoever is funding this guy ... please stop. I've seen some of his other ""home movies"" (which I will not plug) and they are just as bad. Normally, a ""director"" will grow and learn from his previous efforts ... not this guy. It's one thing to be an amateur filmmaker, but anyone can be a hack.<br /><br />Definitely not even a popcorn film ... of course, chewing on popcorn kernels would be less painful than this effort.<br /><br />Award: The worst ever military push-ups in a film.",
"I had no idea what this film was about or even knew that it existed until about 1 month ago when I stumbled upon when I was searching for other films that stared Dominic Monaghan. I thought this film was a strange insight into the mind of a none sleeper and what his/her mind may be going through in the hours that they spend awake when the rest of the world around them is asleep,it was an interesting film and a good part was played by Dom.......I believe that even though this film you cannot buy anywhere (well I've never seen it anywhere) you must see it if you ever get the chance because it will really make you think about those people around us that cannot sleep and have to suffer night after night of not been able to sleep or only get about 1 hour of sleep every night so overall it was an interesting film of good substance.",
    "I have no idea why people are so crazy about the show. It is so boring. The jokes are not even close to what we usually say funny. It's like, Alex say something that is not funny nor interesting and then suddenly there's a laughing sound background. My friend and I just looked at each other with blank look as if we asked each other, ""What's so funny?!!"". Seriously, every time we watched that show, you wouldn't hear any laughing or coughing. Just a blank look. So we stop watching it. I am personally a fan of sitcoms, so I tried to watch the show. But the show us such a disappointment. This show might be one of the worst comedy sitcom ever...",
"The word ""1st"" in the title has more ominous meaning for the viewers of this film than for its crime victims. At least they don't have to stick around and watch this interminable film reach its own demise.<br /><br />1st should refer to: 1st draft of a script; 1st takes used in each performance in the final film; 1st edit in post production; etcetera, etcetera.<br /><br />The movie is not cast too badly, it's just that everything about the film come off as worse than third rate, from the goofy script, to the wooden performances. And while suffering through this cobbled together film, by the 2 hour mark you want to be put out of your misery. At 160 minutes long it is readily apparent that it should have been edited to under 2 hours.<br /><br />Going into details concerning the lame script and acting serves little purposes. Even in the equally awful, Lake Placid, at least the performances Bill Pullman and Bridget Fonda constructed out of an extremely weak script, were nuanced enough to make you laugh at the movie. In 1st to Die, one ends up grieving only for the time lost in waiting to see what happens after the opening scene of the preparation of the female lead's suicide.<br /><br />The editing is so bad one is never introduced to one of the main characters, who I think (were never quite told) is a D.A. She just appears in one scene in the middle of a conversation. Obviously the scene where she is introduced to the viewer was dropped on the editor's floor. And no one realized that a character appearing out of nowhere was an unusual film ploy.<br /><br />In a word, don't waste your time with this one. My wife and I wish we didn't. But at least we created our own diversions by commenting in various places in the film like it was Mystery Science Theater. ""Meanwhile, in Cleveland . . . ."" !!!!",
    "(mild spoilers)<br /><br />This movie was filthy and stupid. It could have done well without the constant humping and nude sex. It was also very profane. I think that they had a good story developing, but they messed up the whole thing by overdoing it."
]

In [45]:
dataframe = pd.DataFrame({'Text':reviews})
dataframe

Unnamed: 0,Text
0,Why do people who do not know what a particula...
1,"Amateur, no budget films can be surprisingly g..."
2,I had no idea what this film was about or even...
3,I have no idea why people are so crazy about t...
4,The word 1st in the title has more ominous mea...
5,(mild spoilers)<br /><br />This movie was filt...


In [46]:
test_sent = []

for word in dataframe.Text[:]:
    review = re.sub('[^a-zA-Z]',' ',word)
    review = review.lower()
    review = review.split()
    review = [ps.stem(word) for word in review if not word in all_stopwords]
    review = ' '.join(review)
    test_sent.append(review)

In [47]:
test_sent

['peopl know particular time past like feel need tri defin time other replac woodstock civil war apollo moon land titan sink got realist flick formula soap opera popul entir low life trash kid young allow go woodstock fail grade school composit show old meani put movi prove know nuttin topic still make money yeah alreadi know one thing watch film give littl insight underclass think next time see slut bar look like dian lane run way child abus let parent worthless rais kid audienc abus simpli stick woodstock moonland flick ipso facto mean film portray',
 'amateur budget film surprisingli good howev one br br ah anoth brad syke atroc act hideou except emmi smith show promis camera direct need seriou rework hold camera run gimmick either work special effect unimagin problem effect identifi real time go rip ear pleas let us see actor real ear beneath blood sceneri bland bore mr syke ventur music cross cheap motel porn realli bad guitar driven metal see sceneri comment br br mention lack re

In [48]:
len(test_sent)

6

In [49]:
x1 = cv.transform(test_sent).toarray()
x1

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [50]:
y_pred_res = classifier.predict(x1)
y_pred_res

array([0, 1, 1, 0, 0, 0], dtype=int64)

In [51]:
dataframe['predictions'] = y_pred_res.tolist()

In [52]:
dataframe

Unnamed: 0,Text,predictions
0,Why do people who do not know what a particula...,0
1,"Amateur, no budget films can be surprisingly g...",1
2,I had no idea what this film was about or even...,1
3,I have no idea why people are so crazy about t...,0
4,The word 1st in the title has more ominous mea...,0
5,(mild spoilers)<br /><br />This movie was filt...,0


In [56]:
dataframe.to_csv('Sentiment_analysis_result.tsv', sep = '\t', index = False, encoding = 'UTF-8')