# Part 0. Data Prepraration

In [1]:
from datasets import load_dataset
dataset = load_dataset("rotten_tomatoes")
train_dataset = dataset ['train']
validation_dataset = dataset ['validation']
test_dataset = dataset ['test']

  from .autonotebook import tqdm as notebook_tqdm


# Part 2. Model Training & Evaluation - RNN

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gensim.downloader as api
import numpy as np
import nltk
import random

In [3]:
model = api.load("glove-wiki-gigaword-100")
vocab_size = len(model.index_to_key) + 1
embedding_dim = model.vector_size
word_index = {word: index+1 for index, word in enumerate(model.index_to_key)} # index 0 is reserved for padding
embedding_matrix = np.zeros((vocab_size, embedding_dim))

In [4]:
for word, idx in word_index.items():
    if word in model:
        embedding_matrix[idx] = model[word]

In [5]:
def tokenize(text, word_index):
    ls = nltk.word_tokenize(text)
    return [word_index[word] for word in ls if word in word_index]

X_train = [tokenize(text, word_index) for text in train_dataset['text']]
X_val = [tokenize(text, word_index) for text in validation_dataset['text']]
X_test = [tokenize(text, word_index) for text in test_dataset['text']]
max_length = max(len(seq) for seq in X_train)

In [6]:
X_train = pad_sequences(X_train, maxlen=max_length)
X_val = pad_sequences(X_val, maxlen=max_length)
X_test = pad_sequences(X_test, maxlen=max_length)

In [7]:
y_train = np.array(train_dataset['label'])
y_val = np.array(validation_dataset['label'])
y_test = np.array(test_dataset['label'])

Model Training - Grid Search

In [8]:
from tensorflow.keras.callbacks import Callback
best_accuracy = {}
class CustomCallback(Callback):
    accuracy = 0
    cur_key = ""
    epochs = 0
    optimizer = ""
    batch_size = 0
    best_model = None
    lr = 0
    def on_train_begin(self, logs=None):
        self.accuracy = 0

    def on_train_end(self, logs=None):
        global best_accuracy
        if self.accuracy > best_accuracy.get("accuracy", 0):
            best_accuracy = {
                "accuracy": self.accuracy,
                "epoch": self.epochs,
                "optimizer": self.optimizer,
                "batch_size": self.batch_size,
                "lr": self.lr
            }
            print("Saved best accuracy for current run:", self.accuracy, "at epoch", self.epochs)
            self.best_model.save(filepath="best_model.keras")
        print("Run completed on:")
        print(self.cur_key)
        print("Best accuracy for current run:", self.accuracy, "at epoch", self.epochs)
        print("Training ended")


    
    def on_epoch_end(self, epoch, logs=None):
        val_accuracy = logs['val_accuracy']
        if val_accuracy > self.accuracy:
            self.accuracy = val_accuracy
            self.epochs = epoch
            self.best_model = self.model

    def set_key(self, optimizer, batch_size, lr):
        self.optimizer = optimizer
        self.batch_size = batch_size
        self.lr = lr
        self.cur_key = f"optimizer: {optimizer}, batch_size: {batch_size}, lr: {lr}"

In [9]:
from tensorflow.keras.layers import Dropout
def train_model(optimizer, epochs, batch_size, lr):
    tf.random.set_seed(0)
    np.random.seed(0)
    random.seed(0)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True
    )
    custom_callback = CustomCallback()
    custom_callback.set_key(optimizer, batch_size, lr)
    model = Sequential([
        Embedding(input_dim=vocab_size,
                  output_dim=embedding_dim,
                  weights=[embedding_matrix],
                  trainable=False),  
        SimpleRNN(16, return_sequences=False),
        Dense(1, activation='sigmoid')
    ])
    if optimizer == 'adam': optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    elif optimizer == 'sgd': optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
    elif optimizer == 'rmsprop': optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
    else: optimizer = tf.keras.optimizers.Adagrad(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[custom_callback, early_stopping],
        verbose=2
    )
    return model, history

In [10]:
for batch_size in [16, 32, 64, 128]:
    for lr in [0.005, 0.01, 0.05, 0.1]:
        for optimizer in ['adam', 'sgd', 'rmsprop', 'adagrad']:
            train_model(optimizer, 100, batch_size, lr)

Epoch 1/100
534/534 - 9s - 16ms/step - accuracy: 0.5737 - loss: 0.6776 - val_accuracy: 0.6323 - val_loss: 0.6495
Epoch 2/100
534/534 - 6s - 12ms/step - accuracy: 0.6385 - loss: 0.6445 - val_accuracy: 0.6463 - val_loss: 0.6390
Epoch 3/100
534/534 - 6s - 12ms/step - accuracy: 0.5896 - loss: 0.6656 - val_accuracy: 0.6285 - val_loss: 0.6385
Epoch 4/100
534/534 - 6s - 12ms/step - accuracy: 0.6163 - loss: 0.6522 - val_accuracy: 0.5685 - val_loss: 0.6736
Epoch 5/100
534/534 - 5s - 9ms/step - accuracy: 0.5978 - loss: 0.6644 - val_accuracy: 0.6341 - val_loss: 0.6477
Saved best accuracy for current run: 0.6463414430618286 at epoch 1
Run completed on:
optimizer: adam, batch_size: 16, lr: 0.005
Best accuracy for current run: 0.6463414430618286 at epoch 1
Training ended
Epoch 1/100
534/534 - 9s - 17ms/step - accuracy: 0.5013 - loss: 0.7042 - val_accuracy: 0.5272 - val_loss: 0.6950
Epoch 2/100
534/534 - 6s - 11ms/step - accuracy: 0.5279 - loss: 0.6915 - val_accuracy: 0.5338 - val_loss: 0.6886
Epoch 

In [11]:
best_accuracy

{'accuracy': 0.7439024448394775,
 'epoch': 40,
 'optimizer': 'adagrad',
 'batch_size': 64,
 'lr': 0.01}

#### Best model is trained with Optimizer: adagrad, Batch_size: 64, Learning_rate: 0.01 (Final Hidden State)

In [12]:
model, history = train_model(best_accuracy['optimizer'], 100, best_accuracy['batch_size'], best_accuracy['lr'])

Epoch 1/100
134/134 - 8s - 57ms/step - accuracy: 0.5062 - loss: 0.7016 - val_accuracy: 0.5300 - val_loss: 0.6923
Epoch 2/100
134/134 - 2s - 14ms/step - accuracy: 0.5325 - loss: 0.6890 - val_accuracy: 0.5516 - val_loss: 0.6866
Epoch 3/100
134/134 - 2s - 14ms/step - accuracy: 0.5532 - loss: 0.6849 - val_accuracy: 0.5525 - val_loss: 0.6835
Epoch 4/100
134/134 - 2s - 15ms/step - accuracy: 0.5612 - loss: 0.6820 - val_accuracy: 0.5629 - val_loss: 0.6811
Epoch 5/100
134/134 - 2s - 14ms/step - accuracy: 0.5652 - loss: 0.6795 - val_accuracy: 0.5610 - val_loss: 0.6790
Epoch 6/100
134/134 - 2s - 14ms/step - accuracy: 0.5716 - loss: 0.6772 - val_accuracy: 0.5600 - val_loss: 0.6770
Epoch 7/100
134/134 - 2s - 14ms/step - accuracy: 0.5766 - loss: 0.6748 - val_accuracy: 0.5704 - val_loss: 0.6750
Epoch 8/100
134/134 - 2s - 15ms/step - accuracy: 0.5817 - loss: 0.6724 - val_accuracy: 0.5732 - val_loss: 0.6729
Epoch 9/100
134/134 - 1s - 9ms/step - accuracy: 0.5870 - loss: 0.6697 - val_accuracy: 0.5826 - v

In [13]:
best_model = tf.keras.models.load_model("best_model.keras")
accuracy = best_model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy[1] * 100:.2f}%")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6484 - loss: 0.6370
Test accuracy: 71.67%


#### Mean pooling

In [14]:
from tensorflow.keras.layers import GlobalAveragePooling1D
def train_model(optimizer, epochs, batch_size, lr):
    tf.random.set_seed(0)
    np.random.seed(0)
    random.seed(0)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True
    )
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath="model_mean.keras", 
        monitor='val_accuracy',            
        save_best_only=True,           
        mode='max',                 
        save_weights_only=False,       
        verbose=1
    )
    model = Sequential([
        Embedding(input_dim=vocab_size,
                  output_dim=embedding_dim,
                  weights=[embedding_matrix],
                  trainable=False),
        SimpleRNN(16, return_sequences=True),
        GlobalAveragePooling1D(),
        Dense(1, activation='sigmoid')
    ])
    if optimizer == 'adam': optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    elif optimizer == 'sgd': optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
    elif optimizer == 'rmsprop': optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
    else: optimizer = tf.keras.optimizers.Adagrad(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[checkpoint_callback, early_stopping]
    )
    return model, history

In [15]:
model, history = train_model("adagrad", 100, 64, 0.01)

Epoch 1/100
[1m130/134[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - accuracy: 0.4716 - loss: 0.6983
Epoch 1: val_accuracy improved from -inf to 0.48124, saving model to model_mean.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.4718 - loss: 0.6983 - val_accuracy: 0.4812 - val_loss: 0.6943
Epoch 2/100
[1m131/134[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.4877 - loss: 0.6949
Epoch 2: val_accuracy improved from 0.48124 to 0.49812, saving model to model_mean.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.4879 - loss: 0.6949 - val_accuracy: 0.4981 - val_loss: 0.6931
Epoch 3/100
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5035 - loss: 0.6935
Epoch 3: val_accuracy improved from 0.49812 to 0.51782, saving model to model_mean.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3

In [16]:
best_model = tf.keras.models.load_model("model_mean.keras")
accuracy = best_model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy[1] * 100:.2f}%")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5962 - loss: 0.6690
Test accuracy: 64.26%


#### Max pooling

In [17]:
from tensorflow.keras.layers import GlobalMaxPooling1D
def train_model(optimizer, epochs, batch_size, lr):
    tf.random.set_seed(0)
    np.random.seed(0)
    random.seed(0)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True
    )
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath="model_max.keras", 
        monitor='val_accuracy',            
        save_best_only=True,           
        mode='max',                 
        save_weights_only=False,       
        verbose=1
    )
    model = Sequential([
        Embedding(input_dim=vocab_size,
                  output_dim=embedding_dim,
                  weights=[embedding_matrix],
                  trainable=False), 
        SimpleRNN(16, return_sequences=True),
        GlobalMaxPooling1D(),
        Dense(1, activation='sigmoid')
    ])
    if optimizer == 'adam': optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    elif optimizer == 'sgd': optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
    elif optimizer == 'rmsprop': optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
    else: optimizer = tf.keras.optimizers.Adagrad(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[checkpoint_callback, early_stopping]
    )
    return model, history

In [18]:
model, history = train_model("adagrad", 100, 64, 0.01)

Epoch 1/100
[1m131/134[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 14ms/step - accuracy: 0.5103 - loss: 0.6999
Epoch 1: val_accuracy improved from -inf to 0.53377, saving model to model_max.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 29ms/step - accuracy: 0.5103 - loss: 0.6998 - val_accuracy: 0.5338 - val_loss: 0.6900
Epoch 2/100
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5525 - loss: 0.6867
Epoch 2: val_accuracy improved from 0.53377 to 0.55253, saving model to model_max.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.5525 - loss: 0.6867 - val_accuracy: 0.5525 - val_loss: 0.6854
Epoch 3/100
[1m131/134[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - accuracy: 0.5703 - loss: 0.6803
Epoch 3: val_accuracy improved from 0.55253 to 0.58443, saving model to model_max.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/

In [19]:
best_model = tf.keras.models.load_model("model_max.keras")
accuracy = best_model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy[1] * 100:.2f}%")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6748 - loss: 0.6061
Test accuracy: 71.20%


#### Dense layer

In [20]:
from tensorflow.keras.layers import Flatten
def train_model(optimizer, epochs, batch_size, lr):
    tf.random.set_seed(0)
    np.random.seed(0)
    random.seed(0)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True
    )
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath="model_dense.keras", 
        monitor='val_accuracy',            
        save_best_only=True,           
        mode='max',                 
        save_weights_only=False,       
        verbose=1
    )
    model = Sequential([
        Embedding(input_dim=vocab_size,
                  output_dim=embedding_dim,
                  weights=[embedding_matrix],
                  trainable=False),
        SimpleRNN(16, return_sequences=True),
        Flatten(),
        Dense(62, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    if optimizer == 'adam': optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    elif optimizer == 'sgd': optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
    elif optimizer == 'rmsprop': optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
    else: optimizer = tf.keras.optimizers.Adagrad(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[checkpoint_callback, early_stopping]
    )
    return model, history

In [21]:
model, history = train_model("adagrad", 100, 64, 0.01)

Epoch 1/100
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5386 - loss: 0.7048
Epoch 1: val_accuracy improved from -inf to 0.53471, saving model to model_dense.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - accuracy: 0.5388 - loss: 0.7046 - val_accuracy: 0.5347 - val_loss: 0.6956
Epoch 2/100
[1m129/134[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.6151 - loss: 0.6546
Epoch 2: val_accuracy improved from 0.53471 to 0.56191, saving model to model_dense.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.6153 - loss: 0.6544 - val_accuracy: 0.5619 - val_loss: 0.6918
Epoch 3/100
[1m128/134[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.6556 - loss: 0.6250
Epoch 3: val_accuracy improved from 0.56191 to 0.59568, saving model to model_dense.keras
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m

In [22]:
best_model = tf.keras.models.load_model("model_dense.keras")
accuracy = best_model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy[1] * 100:.2f}%")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6216 - loss: 0.7153
Test accuracy: 71.48%
