In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Load dataset
data = pd.read_csv("/content/IMDB Dataset.csv")

# Display the first few rows and the shape of the dataset
print(data.head())
print(data.shape)


                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive
(50000, 2)


In [None]:
# Display the last few rows of the dataset
print(data.tail())

# Count the sentiment values
print(data["sentiment"].value_counts())

# Map sentiment values to numerical format
data.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)

# Display the first and last few rows after replacing sentiment values
print(data.head())
print(data.tail())

# Verify sentiment value counts
print(data["sentiment"].value_counts())


                                                  review sentiment
49995  I thought this movie did a down right good job...  positive
49996  Bad plot, bad dialogue, bad acting, idiotic di...  negative
49997  I am a Catholic taught in parochial elementary...  negative
49998  I'm going to have to disagree with the previou...  negative
49999  No one expects the Star Trek movies to be high...  negative
sentiment
positive    25000
negative    25000
Name: count, dtype: int64
                                              review  sentiment
0  One of the other reviewers has mentioned that ...          1
1  A wonderful little production. <br /><br />The...          1
2  I thought this was a wonderful way to spend ti...          1
3  Basically there's a family where a little boy ...          0
4  Petter Mattei's "Love in the Time of Money" is...          1
                                                  review  sentiment
49995  I thought this movie did a down right good job...          1
49996 

In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Print the shape of training and testing sets
print(train_data.shape)
print(test_data.shape)


(40000, 2)
(10000, 2)


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data["review"])

# Pad sequences
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)

# Assign sentiment labels
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM

# Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation="sigmoid"))

# Print the model summary
model.summary()

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          640000    
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 771713 (2.94 MB)
Trainable params: 771713 (2.94 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Train the model
model.fit(X_train, Y_train, epochs=5, batch_size=64, validation_split=0.2)

# Save the model and tokenizer
model.save("model.h5")
import joblib
joblib.dump(tokenizer, "tokenizer.pkl")


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


['tokenizer.pkl']

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")


Loss: 0.36436212062835693
Accuracy: 0.8779000043869019


In [None]:
# Define the predictive system function
def predictive_system(review):
    sequences = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequences, maxlen=200)
    prediction = model.predict(padded_sequence)
    sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
    return sentiment

# Test the predictive system
print(predictive_system("This movie was fantastic and amazing"))
print(predictive_system("A thrilling adventure with stunning visuals"))
print(predictive_system("A visual masterpiece"))


positive
positive
positive


In [None]:
from keras.models import load_model
import joblib

# Reload the model and tokenizer
model = load_model("/content/model.h5")
tokenizer = joblib.load("/content/tokenizer.pkl")

# Redefine the predictive system function
def predictive_system(review):
    sequences = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequences, maxlen=200)
    prediction = model.predict(padded_sequence)
    sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
    return sentiment

# Test the reloaded predictive system
review_sentiment = predictive_system("Beautiful cinematography")
print(review_sentiment)


positive


In [None]:
# Install Gradio for creating a web interface
!pip install gradio

# Create a Gradio interface for the predictive system
import gradio as gr
title = "MOVIE SENTIMENT ANALYSIS APPLICATION"
app = gr.Interface(fn=predictive_system, inputs="textbox", outputs="textbox", title=title)

# Launch the Gradio app
app.launch(share=True)


Collecting gradio
  Downloading gradio-4.39.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.1-py3-none-any.whl.metadata (26 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.1.1 (from gradio)
  Downloading gradio_client-1.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting p

