# Read dataset

**Libraries**

In [27]:
import pandas as pd

In [31]:
data = pd.read_csv("imdb.csv")

# Data preprocessing

**Libraries**

In [33]:
import numpy as np
import re
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [35]:
data["sentiment"] = data["sentiment"].map({"positive": 1, "negative": 0})

In [37]:
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [39]:
x = np.array(data["review"].values)
y = np.array(data["sentiment"].values)

# Preprocessing: Lowercasing & Removing Punctuation
x_filtered = [re.sub(r'[^\w\s]', ' ', review.lower()) for review in x]

In [41]:
# One-Hot Encoding each sentence
vocabulary_size = 5000
onehot_encoded = [one_hot(review, vocabulary_size) for review in x_filtered]

# Padding each encoded sentence to have a max_length=500
max_length = 500
x_padded = pad_sequences(onehot_encoded, maxlen=max_length, padding="post")

In [43]:
# Splitting into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x_padded, y, test_size=0.2,random_state=42)

# Create LSTM

**Libraries**

In [45]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM

In [13]:
# Define Model
model = Sequential()
embeded_vector_size = 35

# Embedding Layer
model.add(Embedding(input_dim=vocabulary_size, output_dim=embeded_vector_size))

# LSTM Layer
model.add(LSTM(128))

# Output Layer
model.add(Dense(1, activation="sigmoid"))

# Explicitly build the model to define input shape
model.build(input_shape=(None, max_length))

# Compile Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["accuracy"])

# Print Model Summary
print(model.summary())


None


**Train model**

In [14]:
model.fit(x_train,y_train,epochs=10)

Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 18ms/step - accuracy: 0.5080 - loss: 0.6934
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 17ms/step - accuracy: 0.5028 - loss: 0.6963
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 18ms/step - accuracy: 0.5123 - loss: 0.6928
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 17ms/step - accuracy: 0.5142 - loss: 0.6880
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 17ms/step - accuracy: 0.5335 - loss: 0.6829
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 18ms/step - accuracy: 0.5418 - loss: 0.6639
Epoch 7/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 18ms/step - accuracy: 0.6633 - loss: 0.5672
Epoch 8/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 18ms/step - accuracy: 0.8792 - loss: 0.2935
Epoch 9/

<keras.src.callbacks.history.History at 0x7c6c584ee0d0>

**Evaluate model**

In [20]:
for i in range(10):
    predicted = model.predict(x_test)[i]
    sentiment = 1 if predicted > 0.6 else 0

    print(f"Sample {i+1}:")
    print("PREDICTED :", sentiment)
    print("ACTUAL    :", y_test[i])
    print("-" * 20)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Sample 1:
PREDICTED : 1
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Sample 2:
PREDICTED : 1
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Sample 3:
PREDICTED : 0
ACTUAL    : 0
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
Sample 4:
PREDICTED : 1
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Sample 5:
PREDICTED : 1
ACTUAL    : 0
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Sample 6:
PREDICTED : 1
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Sample 7:
PREDICTED : 1
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

# Save model

In [21]:
from google.colab import files
model.save("my_model.keras")
files.download("my_model.keras")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Load model

In [47]:
from tensorflow.keras.models import load_model
model = load_model("my_model.keras") 

In [49]:
for i in range(10):
    predicted = model.predict(x_test)[i]
    sentiment = 1 if predicted > 0.6 else 0

    print(f"Sample {i+1}:")
    print("PREDICTED :", sentiment)
    print("ACTUAL    :", y_test[i])
    print("-" * 20)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 130ms/step
Sample 1:
PREDICTED : 0
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 128ms/step
Sample 2:
PREDICTED : 0
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 130ms/step
Sample 3:
PREDICTED : 0
ACTUAL    : 0
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 128ms/step
Sample 4:
PREDICTED : 0
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 133ms/step
Sample 5:
PREDICTED : 1
ACTUAL    : 0
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 132ms/step
Sample 6:
PREDICTED : 0
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 138ms/step
Sample 7:
PREDICTED : 1
ACTUAL    : 1
--------------------
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━