In [22]:
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences

from keras import Sequential
from keras.layers import RNN, LSTM, Dense, BatchNormalization, Dropout, Embedding
from keras.callbacks import EarlyStopping

from keras.models import load_model

In [13]:
df = pd.read_csv('clean.csv')
df.head()

Unnamed: 0,text,label
0,grew b watching loving thunderbird mate school...,0
1,put movie dvd player sat coke chip expectation...,0
2,people know particular time past like feel nee...,0
3,even though great interest biblical movie bore...,0
4,die hard dad army fan nothing ever change got ...,1


In [14]:
df.shape

(40000, 2)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('label',axis = 1),df['label'],test_size=0.25,stratify=df['label'])

In [16]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train['text'])

In [17]:
X_train_tokens = pad_sequences(tokenizer.texts_to_sequences(X_train['text']),maxlen=200)
X_test_tokens = pad_sequences(tokenizer.texts_to_sequences(X_test['text']),maxlen=200)

In [18]:
vocab_size = sum([x for _,x in tokenizer.word_counts.items()])

In [37]:
model = Sequential()
model.add(Embedding(input_dim = vocab_size, input_length = 200, output_dim = 50))
model.add(LSTM(50))
model.add(Dense(1,activation = 'sigmoid'))

model.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics = ['accuracy'])

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 200, 50)           177541900 
                                                                 
 lstm_4 (LSTM)               (None, 50)                20200     
                                                                 
 dense_8 (Dense)             (None, 1)                 51        
                                                                 
Total params: 177,562,151
Trainable params: 177,562,151
Non-trainable params: 0
_________________________________________________________________


In [38]:
es = EarlyStopping(monitor='val_accuracy',mode = 'max',patience = 1,verbose = 1)
model.fit(X_train_tokens,y_train, validation_data= (X_test_tokens,y_test), batch_size=128,epochs=5,callbacks = [es])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 3: early stopping


<keras.callbacks.History at 0x1d089deb310>

In [None]:
model.save('model.h5')

In [44]:
import pickle
pickle.dump(tokenizer,open('tokenizer.pkl','wb'))

In [50]:
model = load_model('model.h5')
tokenizer = pickle.load(open('tokenizer.pkl','rb'))

In [51]:
def predict_review(review):
    tokenized_review = pad_sequences(tokenizer.texts_to_sequences([review]),maxlen=200)
    prediction = model.predict(tokenized_review,verbose = 0)[0][0]
    if prediction>0.5:
        print('positive')
    else:
        print('negative')

In [52]:
s = """Detective Batman at its peak! Great storyline. 
Just as dark a universe as we've come to expect from DC. The gloomy, gritty, dark tone of this film is exactly what I wanted.
When you think the movie is over, there's more. Beautiful cinematography. Great score."""

predict_review(s)

positive


In [53]:
s = """Look at the profiles for the writers that wrote Madame Web. You'll see virtually every movie they've written has a bad rating and bad reviews. Why then does Hollywood continually hire these same writers for these big budget films? I could understand if they wrote movies for Asylum Films because all their movies are terrible.

Madame Web is god awful! Very difficult to sit through. None of the main characters are likeable. Even the villain is lame. It seems like the entire movie was written to set up a sequel. But why would people spend money to see the sequel of a movie that is one of the worst movies ever written? I'm shocked at how bad this movie is and if there is a sequel I certainly wont bother watching it.

Stop hiring bad writers, Hollywood!"""

predict_review(s)

negative
