In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv("Movie_Review.csv")
data

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [3]:
data.shape

(50000, 2)

In [4]:
type(data)

pandas.core.frame.DataFrame

In [5]:
data["sentiment"].value_counts()

positive    25000
negative    25000
Name: sentiment, dtype: int64

In [6]:
data.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)

In [7]:
data

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1
...,...,...
49995,I thought this movie did a down right good job...,1
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",0
49997,I am a Catholic taught in parochial elementary...,0
49998,I'm going to have to disagree with the previou...,0


In [8]:
data["sentiment"].value_counts()

1    25000
0    25000
Name: sentiment, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [10]:
train_data, test_data = train_test_split(data, test_size = 0.2, random_state=42)

In [11]:
train_data.shape

(40000, 2)

In [12]:
test_data.shape

(10000, 2)

In [None]:
tokenizer = Tokenizer(num_words = 5000)
tokenizer.fit_on_texts(train_data["review"])

In [None]:
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)


In [None]:
X_train

In [None]:
X_test

In [None]:
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]

In [None]:
Y_train

In [None]:
model = Sequential()
model.add(Embedding(input_dim =5000, output_dim = 128, input_length = 200))
model.add(LSTM(128, dropout=0.2, recurrent_dropout = 0.2))
model.add(Dense(1, activation = "sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(optimizer = "adam", loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
# model.fit(X_train, Y_train, epochs = 5, batch_size = 64, validation_split = 0.2)

In [None]:
model.save("model.h5")

In [None]:
import pickle

# Save the trained model to a .pkl file
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)


In [None]:
model.save('saved_model.keras')

In [None]:
import joblib
joblib.dump(tokenizer, "tokenizer.pkl")

In [None]:
loss, accuracy = model.evaluate(X_test, Y_test)

In [None]:
print(loss)

In [None]:

print(accuracy)

In [None]:
def predictive_system(review):
  sequences = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequences, maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment

In [None]:
predictive_system("This movie was fantastic and amazing")

In [None]:
predictive_system("Overall long and slow")

In [None]:
pickle.load(open('model.pkl', 'rb'))

In [None]:
pip install gradio

In [None]:
import gradio as gr
title = "MOVIE SENTIMENT ANALYSIS APPLICATION"

app = gr.Interface(fn = predictive_system, inputs="textbox", outputs="textbox", title=title)

app.launch(share=True)