In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences

from keras import Sequential
from keras.layers import SimpleRNN, LSTM, Dense, BatchNormalization, Dropout, Embedding
from keras.callbacks import EarlyStopping

from keras.saving import load_model
from keras.saving import save_model

import pickle

import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('clean.csv')
df.head()

Unnamed: 0,text,label
0,grew b watching loving thunderbird mate school...,0
1,put movie dvd player sat coke chip expectation...,0
2,people know particular time past like feel nee...,0
3,even though great interest biblical movie bore...,0
4,die hard dad army fan nothing ever change got ...,1


In [5]:
df.shape

(40000, 2)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('label',axis = 1),df['label'],test_size=0.25,stratify=df['label'],random_state=42)

In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train['text'])

In [8]:
X_train_tokens = pad_sequences(tokenizer.texts_to_sequences(X_train['text']),maxlen=200)
X_test_tokens = pad_sequences(tokenizer.texts_to_sequences(X_test['text']),maxlen=200)

In [9]:
vocab_size = sum([x for _,x in tokenizer.word_counts.items()])

In [52]:
model = Sequential()
model.add(Embedding(input_dim = vocab_size,output_dim = 2,input_shape = (200,)))
model.add(LSTM(8))
model.add(Dense(16,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation = 'sigmoid'))

model.compile(optimizer = keras.optimizers.Adam(learning_rate = 0.0001),loss = 'binary_crossentropy',metrics = ['accuracy'])

model.summary()

In [53]:
es = EarlyStopping(monitor='val_accuracy',mode = 'max',patience = 3,verbose = 1)
model.fit(X_train_tokens,y_train, validation_data= (X_test_tokens,y_test),epochs=100,callbacks = [es])

Epoch 1/100
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 75ms/step - accuracy: 0.5366 - loss: 0.6921 - val_accuracy: 0.7339 - val_loss: 0.6179
Epoch 2/100
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 76ms/step - accuracy: 0.7962 - loss: 0.5630 - val_accuracy: 0.8377 - val_loss: 0.4456
Epoch 3/100
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 75ms/step - accuracy: 0.8692 - loss: 0.3988 - val_accuracy: 0.8625 - val_loss: 0.3588
Epoch 4/100
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 75ms/step - accuracy: 0.8969 - loss: 0.3041 - val_accuracy: 0.8754 - val_loss: 0.3101
Epoch 5/100
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 75ms/step - accuracy: 0.9152 - loss: 0.2451 - val_accuracy: 0.8816 - val_loss: 0.2874
Epoch 6/100
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 75ms/step - accuracy: 0.9313 - loss: 0.2041 - val_accuracy: 0.8848 - val_loss: 0.2822
Epoch 7/10

<keras.src.callbacks.history.History at 0x1f430691650>

In [54]:
save_model(model,'model.keras')

In [16]:
model = load_model('model.keras')
tokenizer = pickle.load(open('tokenizer.pkl','rb'))

FileNotFoundError: [Errno 2] No such file or directory: 'tokenizer.pkl'

In [25]:
def predict_review(review):
    tokenized_review = pad_sequences(tokenizer.texts_to_sequences([review]),maxlen=200)
    prediction = model.predict(tokenized_review,verbose = 0)[0][0]
    if prediction>0.5:
        print('positive')
    else:
        print('negative')

In [26]:
s = """Detective Batman at its peak! Great storyline. 
Just as dark a universe as we've come to expect from DC. The gloomy, gritty, dark tone of this film is exactly what I wanted.
When you think the movie is over, there's more. Beautiful cinematography. Great score."""

predict_review(s)

positive


In [27]:
s = """Look at the profiles for the writers that wrote Madame Web. You'll see virtually every movie they've written has a bad rating and bad reviews. Why then does Hollywood continually hire these same writers for these big budget films? I could understand if they wrote movies for Asylum Films because all their movies are terrible.

Madame Web is god awful! Very difficult to sit through. None of the main characters are likeable. Even the villain is lame. It seems like the entire movie was written to set up a sequel. But why would people spend money to see the sequel of a movie that is one of the worst movies ever written? I'm shocked at how bad this movie is and if there is a sequel I certainly wont bother watching it.

Stop hiring bad writers, Hollywood!"""

predict_review(s)

negative
