In [None]:
# !pip install opendatasets

In [None]:
import opendatasets as od

In [None]:
od.download ('https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews')

Dataset URL: https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews
Downloading imdb-dataset-of-50k-movie-reviews.zip to ./imdb-dataset-of-50k-movie-reviews


100%|██████████| 25.7M/25.7M [00:00<00:00, 1.28GB/s]







In [None]:
# Define the path to the downloaded text file
file_path = '/content/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv'

import pandas as pd
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to confirm it loaded correctly
df.sample (1)

Unnamed: 0,review,sentiment
28951,Those two main characters Erkan and Stefan are...,positive


In [None]:
from sklearn.model_selection import train_test_split
X= df.drop(columns=['sentiment'])
y= df['sentiment']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X['review'],y, test_size=0.2, random_state=42)

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, GRU, SimpleRNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer

In [None]:
tokenizer = Tokenizer ()
# Fit the tokenizer on the text data in the 'review' column
tokenizer.fit_on_texts (X_train)
tokenizer.fit_on_texts (X_test)

In [None]:
X_train = tokenizer.texts_to_sequences (X_train)
X_test = tokenizer.texts_to_sequences (X_test)

In [None]:
input_dim =len(tokenizer.word_index)+1

In [None]:
X_train = pad_sequences (X_train, maxlen=100)
X_test= pad_sequences (X_test,maxlen=100)

In [None]:
y_train= np.where (y_train == 'positive',1,0)
y_test = np.where (y_test == 'positive',1,0)

In [None]:
# Deep RNN Model
model = Sequential()
model.add(Embedding(input_dim=input_dim, output_dim=128, input_length=100))

model.add (SimpleRNN(64, return_sequences= True))
model.add (Dropout(0.4)) # Added dropout
model.add (SimpleRNN(32, return_sequences= True))
model.add (Dropout(0.4)) # Added dropout
model.add (SimpleRNN (16))
model.add (Dropout(0.4)) # Added dropout

model.add(Dense(1, activation='sigmoid'))

model.compile (optimizer='adam', loss= 'binary_crossentropy', metrics=['accuracy'])



In [None]:
history= model.fit (X_train, y_train, epochs=10, batch_size=64, validation_data = (X_test, y_test))

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 29ms/step - accuracy: 0.5170 - loss: 0.7890 - val_accuracy: 0.7775 - val_loss: 0.4884
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 25ms/step - accuracy: 0.8187 - loss: 0.4415 - val_accuracy: 0.8312 - val_loss: 0.4299
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 25ms/step - accuracy: 0.8929 - loss: 0.2961 - val_accuracy: 0.8051 - val_loss: 0.4687
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 26ms/step - accuracy: 0.9245 - loss: 0.2159 - val_accuracy: 0.8371 - val_loss: 0.4740
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 26ms/step - accuracy: 0.9541 - loss: 0.1479 - val_accuracy: 0.7999 - val_loss: 0.5217
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 25ms/step - accuracy: 0.9669 - loss: 0.1135 - val_accuracy: 0.8211 - val_loss: 0.5738
Epoch 7/10
[1m6

## Deep RNN (LSTM)

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping



model = Sequential()
model.add (Embedding(input_dim=input_dim, output_dim=128, input_length=200))

model.add (LSTM (128, return_sequences= True))
model.add (Dropout(0.2))
model.add (LSTM (128, return_sequences= True))
model.add (Dropout(0.2))
model.add (LSTM (32))
model.add (Dropout(0.2))

model.add (Dense (1, activation='sigmoid'))

model.compile ( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



callback= EarlyStopping (
    monitor='val_loss',
    min_delta=.001,
    patience=4,
    verbose=1,
    restore_best_weights= True
)


model.fit (X_train, y_train, epochs=10, batch_size=64, callbacks= callback, validation_data=(X_test, y_test))



Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 27ms/step - accuracy: 0.7569 - loss: 0.4777 - val_accuracy: 0.8674 - val_loss: 0.3092
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 28ms/step - accuracy: 0.9287 - loss: 0.1948 - val_accuracy: 0.8615 - val_loss: 0.3345
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 27ms/step - accuracy: 0.9645 - loss: 0.1029 - val_accuracy: 0.8525 - val_loss: 0.4676
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - accuracy: 0.9810 - loss: 0.0585 - val_accuracy: 0.8465 - val_loss: 0.4610
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - accuracy: 0.9885 - loss: 0.0346 - val_accuracy: 0.8530 - val_loss: 0.5818
Epoch 5: early stopping
Restoring model weights from the end of the best epoch: 1.


<keras.src.callbacks.history.History at 0x78371ff57e10>

In [None]:
text = 'This movie touched my heart. I was kind of lost while i was seeing the movie. It made me remember my childhood. Nowadays its hard to find a movie like that.'
token_text= tokenizer.texts_to_sequences ([text])[0]
padded_text = pad_sequences ([token_text], maxlen= 200)
y_pred = model.predict (padded_text)
if y_pred >0.5:
  print ('The sentiment is Positive')
else:
  ('The sentiment is Negative')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
The sentiment is Positive


## Deep RNN (GRU)

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping




model = Sequential()
model.add (Embedding(input_dim=input_dim, output_dim=128, input_length=100))

model.add (GRU (128, return_sequences= True))
model.add (Dropout(0.2))
model.add (GRU (128, return_sequences= True))
model.add (Dropout(0.2))
model.add (GRU (32))
model.add (Dropout(0.2))

model.add (Dense (1, activation='sigmoid'))

model.compile ( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model.fit (X_train, y_train, epochs=10, batch_size=64, callbacks= callback, validation_data=(X_test, y_test))

Epoch 1/10




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 27ms/step - accuracy: 0.7287 - loss: 0.5106 - val_accuracy: 0.8536 - val_loss: 0.3290
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 25ms/step - accuracy: 0.9266 - loss: 0.1954 - val_accuracy: 0.8657 - val_loss: 0.3146
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 25ms/step - accuracy: 0.9720 - loss: 0.0840 - val_accuracy: 0.8595 - val_loss: 0.3822
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - accuracy: 0.9870 - loss: 0.0393 - val_accuracy: 0.8533 - val_loss: 0.5926
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 25ms/step - accuracy: 0.9927 - loss: 0.0219 - val_accuracy: 0.8508 - val_loss: 0.5602
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 25ms/step - accuracy: 0.9961 - loss: 0.0128 - val_accuracy: 0.8544 - val_loss: 0.7885
Epoch 6: early stopping
Res

<keras.src.callbacks.history.History at 0x78371ff76c10>

In [None]:
loss, accuracy= model.evaluate (X_test, y_test)
print (f'Test Accuracy : {accuracy:.4f}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8637 - loss: 0.3145
Test Accuracy : 0.8657


In [None]:
text = 'This movie touched my heart. I was kind of lost while i was seeing the movie. It made me remember my childhood. Nowadays its hard to find a movie like that.'
token_text= tokenizer.texts_to_sequences ([text])[0]
padded_text = pad_sequences ([token_text], maxlen= 200)
y_pred = model.predict (padded_text)
if y_pred >0.5:
  print ('The sentiment is Positive')
else:
  ('The sentiment is Negative')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
The sentiment is Positive


## GRU is for smaller data and for less complications while LSTM is for larger data and complex computational tasks.

In [None]:
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import LSTM

# Example of adding L2 regularization to an LSTM layer
# model.add(LSTM(units=..., return_sequences=..., kernel_regularizer=l2(0.001), recurrent_regularizer=l2(0.001)))

# You can apply regularization to one or more of the kernel, recurrent, and bias weights.
# Remember to experiment with the regularization strength (the value inside l2()).
## This is for more modification to reduce overfitting , not necessary