In [17]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" 

In [18]:
import time

import sys
import numpy as np
import pandas as pd
pd.set_option("display.width", 120)
pd.set_option("display.max_columns", None)

from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau

import torchtext
from torchtext.vocab import build_vocab_from_iterator, GloVe
from torchtext.data import get_tokenizer
from gensim.models import KeyedVectors

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
import src.config as config
from src.trainer import train_model, compute_accuracy, print_summary 

In [19]:
np.random.seed(config.RANDOM_SEED)
torch.manual_seed(config.RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(config.RANDOM_SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)

try:
    torch.backends.cuda.matmul.allow_tf32 = False
    torch.backends.cudnn.allow_tf32 = False
except Exception:
    pass

g = torch.Generator().manual_seed(config.RANDOM_SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [20]:
def build_vocab(x_train, tok_min_freq, embed_dim, embed_init, w2v_path):

    special_tokens = ['<unk>', '<pad>']
    vocab = build_vocab_from_iterator(
      x_train.tolist(),
      specials=special_tokens,
      min_freq=tok_min_freq
    )
    vocab.set_default_index(vocab['<unk>'])
    
    if embed_init == "random":
        return vocab

    elif embed_init == "glove":
        glove = GloVe(name='840B', dim=embed_dim)  
        vocab.vectors = glove.get_vecs_by_tokens(vocab.get_itos())
        return vocab

    elif embed_init == "w2v":
        
        try:
            w2v_vectors = KeyedVectors.load(str(w2v_path), mmap='r')
        except FileNotFoundError:
            raise FileNotFoundError(f"Word2Vec file not found at: {w2v_path}")
        
        tokens = vocab.get_itos()
        unk_idx = vocab['<unk>']
        pad_idx = vocab['<pad>']
        
        rng = np.random.default_rng(42)
        unk_vector = rng.normal(0.0, 0.1, size=(embed_dim,)).astype(np.float32)
        mat = np.full((len(tokens), embed_dim), unk_vector, dtype=np.float32)
        
        mat[pad_idx] = np.zeros(embed_dim, dtype=np.float32)
        mat[unk_idx] = unk_vector 
        
        w2v_itos = w2v_vectors.index_to_key
        common_tokens = set(tokens).intersection(set(w2v_itos))
        
        common_tokens_list = list(common_tokens)
        common_vectors = w2v_vectors[common_tokens_list]
        common_indices = [vocab[token] for token in common_tokens_list]
        mat[common_indices] = common_vectors

        vocab.vectors = torch.from_numpy(mat)
        return vocab
    
    else:
        raise ValueError(f"Unknown embed_init type: '{embed_init}'. Should be 'random', 'glove', or 'w2v'.")

In [21]:
class My_Dataset(Dataset):
    
    def __init__(self, x, y, vocab):
        self.x = x
        self.y = y
        self.vocab = vocab
           
    def __len__(self):
        return len(self.x)

    def encode_tokens(self, tokens):
        return self.vocab.lookup_indices(tokens)
    
    def __getitem__(self, idx):
        token = self.x[idx]
        label = self.y[idx]
        return self.encode_tokens(token), int(label)

In [22]:
class DataCollator:
    def __init__(self, pad_idx, max_len):
        self.pad_idx = pad_idx
        self.max_len = max_len

    def __call__(self, batch):
        ids_list, labels = zip(*batch)

        lengths = [len(ids) for ids in ids_list]
        batch_max_len = max(1, min(self.max_len, max(lengths)))
        batch_size = len(ids_list)

        x = torch.full(
            (batch_size, batch_max_len),
            fill_value=self.pad_idx,
            dtype=torch.long
        )
        l = torch.empty(batch_size, dtype=torch.long)

        for i, ids in enumerate(ids_list):
            L_eff = min(len(ids), batch_max_len)
            x[i, :L_eff] = torch.as_tensor(ids[:L_eff], dtype=torch.long)
            l[i] = L_eff

        y = torch.as_tensor(labels, dtype=torch.long)
        
        return {
            'x': x,               
            'label': y,                  
            'lengths': l                   
        }

In [23]:
class LSTMTextClassifier(nn.Module):
    def __init__(
        self,
        vocab_size,
        embed_dim,
        hidden_dim,
        hidden_fc_dim,
        num_layers=1,
        bidirectional=False,
        pad_idx=None,
        dropout_emb=0.2,
        dropout_rep=0.3,
        dropout_fc=0.3,
        dropout_lstm=0.3,
        num_lin_layers=1,
        pooling=None
    ):
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)
        self.drop_emb = nn.Dropout(dropout_emb)

        self.lstm = nn.LSTM(
            input_size=embed_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            bidirectional=bidirectional,
            batch_first=True,
            dropout=(0.0 if num_layers <= 1 else dropout_lstm),
        )

        d_out = hidden_dim * (2 if bidirectional else 1)

        self.post = nn.Sequential(
            nn.LayerNorm(d_out),
            nn.Dropout(dropout_rep),
        )

        if num_lin_layers == 1:
            self.head = nn.Linear(d_out, 2)
        else:
            self.head = nn.Sequential(
                nn.Linear(d_out, hidden_fc_dim),
                nn.LayerNorm(hidden_fc_dim),
                nn.ReLU(),
                nn.Dropout(dropout_fc),
                nn.Linear(hidden_fc_dim, 2)
            )

        if pooling == "attention":
            self.W_a = nn.Linear(d_out, d_out)
            self.v_a = nn.Linear(d_out, 1, bias=False)
            self.attention_dropout = nn.Dropout(dropout_emb) 
            self.pool_fn = self.masked_attention
            
        elif pooling == "mean":
            self.pool_fn = self.masked_mean
            
        elif pooling == "max":
            self.pool_fn = self.masked_max
            
        else:
            raise ValueError(f"Unknown pooling type: '{embed_init}'. Should be 'attention', 'max', or 'mean'.")
            
            
    def masked_max(self, x, lengths):
        B, T, D = x.size()
        mask = (torch.arange(T, device=x.device, dtype=torch.long)[None, :] < lengths[:, None]).unsqueeze(-1)  
        x_masked = x.masked_fill(~mask, float("-inf"))
        return x_masked.max(dim=1)[0]   


    def masked_mean(self, x, lengths):
        B, T, D = x.size()
        mask = (torch.arange(T, device=x.device)[None, :] < lengths[:, None]).unsqueeze(-1)
        s = (x * mask).sum(dim=1)
        return s / lengths.clamp(min=1).unsqueeze(-1).to(x.dtype)

    def masked_attention(self, x, lengths):
        B, T, D = x.size()
        attn_h = self.attention_dropout(torch.tanh(self.W_a(x)))
        scores = self.v_a(attn_h).squeeze(2)
        mask = (torch.arange(T, device=x.device)[None, :] < lengths[:, None])
        scores = scores.masked_fill(~mask, float("-inf"))
        alpha = torch.softmax(scores, dim=1)
        rep = torch.bmm(alpha.unsqueeze(1), x).squeeze(1)
        return rep


    def forward(self, x, lengths):
        e = self.drop_emb(self.embedding(x))
        packed = nn.utils.rnn.pack_padded_sequence(
            e, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        packed_out, _ = self.lstm(packed)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(
            packed_out, batch_first=True, total_length=x.size(1)
        )
        rep = self.pool_fn(outputs, lengths)
        rep = self.post(rep)
        return self.head(rep)

In [24]:
def run_model(
    x_train, y_train,
    x_val, y_val,
    x_test, y_test,
    embed_dim=128,
    embed_init="random",
    w2v_path=None,
    freez_embed=False,
    hidden_dim=256,
    hidden_fc_dim=128,
    num_layers=1,
    bidirectional=True,
    dropout_emb=0.35,
    dropout_rep=0.45,
    dropout_fc=0.45,
    dropout_lstm=0.45,
    epochs=7,
    weight_decay=1e-4,
    lr=1e-3,
    max_len=200,
    batch_size=32,
    device=None,
    num_lin_layers=1,
    tok_min_freq=10,
    pooling=None,
    patience=3,
):

    vocab = build_vocab(
        x_train,
        tok_min_freq=tok_min_freq,
        embed_init=embed_init,
        embed_dim=embed_dim,
        w2v_path=w2v_path,
    )

    train_dataset = My_Dataset(x_train, y_train, vocab)
    val_dataset = My_Dataset(x_val, y_val, vocab)
    test_dataset = My_Dataset(x_test, y_test, vocab)

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=DataCollator(pad_idx=vocab['<pad>'], max_len=max_len),
        pin_memory=True,
        num_workers=0,
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=DataCollator(pad_idx=vocab['<pad>'], max_len=max_len),
        pin_memory=True,
        num_workers=0,
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=DataCollator(pad_idx=vocab['<pad>'], max_len=max_len),
        pin_memory=True,
        num_workers=0,
    )

    model = LSTMTextClassifier(
        vocab_size=len(vocab),
        embed_dim=embed_dim,
        hidden_dim=hidden_dim,
        hidden_fc_dim=hidden_fc_dim,
        num_layers=num_layers,
        bidirectional=bidirectional,
        pad_idx=vocab['<pad>'],
        dropout_emb=dropout_emb,
        dropout_rep=dropout_rep,
        dropout_fc=dropout_fc,
        dropout_lstm=dropout_lstm,
        num_lin_layers=num_lin_layers,
        pooling=pooling,
    )

    if embed_init != "random":
        model.embedding.weight.data.copy_(vocab.vectors)

    if freez_embed:
        model.embedding.weight.requires_grad = False

    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    scheduler = ReduceLROnPlateau(
        optimizer,
        mode='max',      
        factor=0.1,      
        patience=3,     
        min_lr=1e-6      
)

    criterion = nn.CrossEntropyLoss(label_smoothing=0.05)

    summary = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        optimizer=optimizer,
        criterion=criterion,
        scheduler=scheduler,
        device=device,
        epochs=epochs,
        patience=patience,
    )

    print_summary(summary)
    return summary

In [25]:
tokenizer = get_tokenizer("basic_english")

# Data Import

In [26]:
imdb_train = pd.read_csv(config.CLEAN_IMDB_TRAIN_PATH)
imdb_val = pd.read_csv(config.CLEAN_IMDB_VAL_PATH)
imdb_test = pd.read_csv(config.CLEAN_IMDB_TEST_PATH)

imdb_x_train = imdb_train[config.TEXT_COL].apply(tokenizer)
imdb_y_train = imdb_train[config.LABEL_COL]
imdb_x_val = imdb_val[config.TEXT_COL].apply(tokenizer)
imdb_y_val = imdb_val[config.LABEL_COL]
imdb_x_test = imdb_test[config.TEXT_COL].apply(tokenizer)
imdb_y_test = imdb_test[config.LABEL_COL]
imdb_data = {
    "x_train": imdb_x_train,
    "y_train": imdb_y_train,
    "x_val": imdb_x_val,
    "y_val": imdb_y_val,
    "x_test": imdb_x_test,
    "y_test": imdb_y_test,
}

In [27]:
rt_train = pd.read_csv(config.CLEAN_RT_TRAIN_PATH)
rt_val = pd.read_csv(config.CLEAN_RT_VAL_PATH)
rt_test = pd.read_csv(config.CLEAN_RT_TEST_PATH)

rt_x_train = imdb_train[config.TEXT_COL].apply(tokenizer)
rt_y_train = imdb_train[config.LABEL_COL]
rt_x_val = imdb_val[config.TEXT_COL].apply(tokenizer)
rt_y_val = imdb_val[config.LABEL_COL]
rt_x_test = imdb_test[config.TEXT_COL].apply(tokenizer)
rt_y_test = imdb_test[config.LABEL_COL]
rt_data = {
    "x_train": rt_x_train,
    "y_train": rt_y_train,
    "x_val": rt_x_val,
    "y_val": rt_y_val,
    "x_test": rt_x_test,
    "y_test": rt_y_test,
}

# Experiments Across Embedding Settings

## Random Ammbding

### Frozen

#### IMDb

In [12]:
history = run_model(
    **imdb_data,
    
    embed_init = "random",
    embed_dim=200,
    hidden_dim=200,
    num_layers=2,
    bidirectional=True,
    freez_embed=True,
    pooling="attention",
    
    
    dropout_emb=0.35,
    dropout_rep=0.5,
    dropout_lstm=0.5,
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=900,
    batch_size=64,
    
    tok_min_freq=30,
    
    device=device,
)

                                                                                     

Stopping early at epoch 13 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.5858,70.04%,77.52%,78.09%,69.44
1,2,0.4341,82.17%,83.62%,83.85%,68.05
2,3,0.3799,85.58%,86.14%,86.14%,64.09
3,4,0.3479,87.72%,88.52%,88.24%,69.82
4,5,0.3147,89.59%,87.16%,87.02%,62.89
5,6,0.2887,91.20%,88.50%,88.46%,64.22
6,7,0.2638,92.66%,88.94%,88.88%,58.18
7,8,0.2539,93.09%,88.88%,88.80%,56.33
8,9,0.2372,93.84%,88.72%,88.98%,57.62
9,10,0.2237,94.84%,89.56%,89.45%,54.88


--- Training Summary ---
Total epochs run: 13
Total training time: 794.39 seconds

Best Validation Accuracy: 89.56% (at Epoch 10)
Induced Test Accuracy: 89.45% (at Epoch 10)
Best Ever Test Accuracy: 89.45% (at Epoch 10)



#### Rotten Tomatoes

In [13]:
history = run_model(
    **rt_data,
    
    embed_dim=125,
    hidden_dim=125,
    num_layers=2,
    bidirectional=True,
    freez_embed=True,
    
    dropout_emb=0.35,
    dropout_rep=0.55,
    dropout_lstm=0.55,
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=50,
    batch_size=25,
    
    tok_min_freq=3,
    pooling="attention",
    
    device=device,
)

                                                                                     

Stopping early at epoch 13 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.6672,59.69%,67.38%,67.57%,17.32
1,2,0.6017,67.61%,70.04%,69.61%,17.01
2,3,0.5637,71.60%,74.10%,73.47%,17.02
3,4,0.533,73.83%,76.40%,74.36%,16.94
4,5,0.5037,76.67%,75.88%,75.02%,17.81
5,6,0.4817,78.04%,77.18%,75.60%,17.06
6,7,0.4626,79.83%,76.90%,75.35%,32.86
7,8,0.4447,81.14%,77.30%,75.76%,18.56
8,9,0.433,82.02%,76.44%,74.71%,17.78
9,10,0.4188,82.55%,77.36%,75.58%,17.51


--- Training Summary ---
Total epochs run: 13
Total training time: 240.49 seconds

Best Validation Accuracy: 77.36% (at Epoch 10)
Induced Test Accuracy: 75.58% (at Epoch 10)
Best Ever Test Accuracy: 75.76% (at Epoch 8)



### Fine-Tuned Embeddings

#### IMDb

In [14]:
history = run_model(
     **imdb_data,
    
    embed_init = "random",
    embed_dim=200,
    hidden_dim=200,
    num_layers=2,
    bidirectional=True,
    pooling="attention",
    
    
    dropout_emb=0.35,
    dropout_rep=0.5,
    dropout_lstm=0.5,
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=900,
    batch_size=64,
    
    tok_min_freq=30,
    
    device=device,
)

                                                                                     

Stopping early at epoch 11 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.574,71.13%,84.98%,84.78%,60.01
1,2,0.3926,84.86%,84.78%,85.10%,59.0
2,3,0.3326,88.80%,86.10%,86.04%,67.9
3,4,0.3027,90.62%,88.94%,88.28%,57.54
4,5,0.2727,92.18%,90.62%,90.04%,59.13
5,6,0.248,93.54%,88.28%,87.84%,62.91
6,7,0.2275,94.55%,90.20%,89.55%,66.66
7,8,0.2117,95.56%,90.90%,90.07%,81.29
8,9,0.1967,96.28%,89.22%,88.57%,61.74
9,10,0.1859,96.84%,90.16%,89.62%,61.57


--- Training Summary ---
Total epochs run: 11
Total training time: 700.74 seconds

Best Validation Accuracy: 90.90% (at Epoch 8)
Induced Test Accuracy: 90.07% (at Epoch 8)
Best Ever Test Accuracy: 90.07% (at Epoch 8)



#### Rotten Tomatoes

In [15]:
history = run_model(
    **rt_data,
    
    embed_dim=125,
    hidden_dim=125,
    num_layers=2,
    bidirectional=True,
    
    dropout_emb=0.35,
    dropout_rep=0.55,
    dropout_lstm=0.55,
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=50,
    batch_size=25,
    
    tok_min_freq=3,
    pooling="attention",
    
    device=device,
)

                                                                                    

Stopping early at epoch 9 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.6467,62.64%,72.08%,70.78%,18.68
1,2,0.5442,73.20%,76.66%,74.49%,18.05
2,3,0.4785,78.86%,77.74%,75.77%,18.13
3,4,0.4302,82.30%,78.14%,75.87%,18.29
4,5,0.3924,84.82%,78.14%,76.30%,18.12
5,6,0.3585,87.14%,79.26%,76.29%,17.74
6,7,0.3262,88.94%,78.98%,76.14%,17.58
7,8,0.2979,90.50%,78.94%,76.00%,17.48
8,9,0.2786,91.83%,78.56%,75.27%,17.4


--- Training Summary ---
Total epochs run: 9
Total training time: 161.47 seconds

Best Validation Accuracy: 79.26% (at Epoch 6)
Induced Test Accuracy: 76.29% (at Epoch 6)
Best Ever Test Accuracy: 76.30% (at Epoch 5)



## Pre-trained GloVe Embeddings

### Frozen 

#### IMDb

In [12]:
history = run_model(
    **imdb_data,
    
    embed_init = "glove",
    embed_dim=300,
    hidden_dim=200,
    num_layers=2,
    bidirectional=True,
    pooling="attention",
    freez_embed=True,
    
    dropout_emb=0.35,
    dropout_rep=0.5,
    dropout_lstm=0.5,
    
    epochs=20,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=900,
    batch_size=64,
    
    tok_min_freq=30,
    
    device=device,
)

                                                                                     

Stopping early at epoch 11 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.4709,79.29%,82.94%,83.24%,56.9
1,2,0.3679,86.84%,87.10%,87.05%,59.24
2,3,0.3421,88.28%,89.46%,89.86%,57.37
3,4,0.3353,88.60%,89.82%,90.18%,55.87
4,5,0.3153,89.69%,89.82%,89.78%,57.88
5,6,0.3005,90.53%,89.78%,90.22%,66.97
6,7,0.2876,91.36%,90.60%,90.30%,60.24
7,8,0.2796,91.84%,91.00%,91.07%,57.43
8,9,0.2663,92.56%,90.50%,90.90%,65.6
9,10,0.2504,93.55%,90.26%,90.70%,66.42


--- Training Summary ---
Total epochs run: 11
Total training time: 675.74 seconds

Best Validation Accuracy: 91.00% (at Epoch 8)
Induced Test Accuracy: 91.07% (at Epoch 8)
Best Ever Test Accuracy: 91.07% (at Epoch 8)



#### Rotten Tomatoes

In [12]:
history = run_model(
    **rt_data,
    
    embed_dim=300,
    hidden_dim=300,
    
    num_layers=1,
    bidirectional=True,
    embed_init = "glove",
    dropout_emb=0.35,
    dropout_rep=0.55,
    dropout_lstm=0.55,
    freez_embed=True,
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=50,
    batch_size=16,
    
    tok_min_freq=3,
    pooling="attention",
    
    device=device,
)

                                                                                       

Stopping early at epoch 13 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.5785,70.97%,76.30%,76.56%,21.68
1,2,0.5216,75.66%,76.06%,75.64%,21.43
2,3,0.4989,77.16%,77.22%,77.62%,21.17
3,4,0.4798,78.68%,79.28%,78.72%,20.78
4,5,0.4615,80.16%,79.28%,78.47%,20.69
5,6,0.443,81.10%,79.64%,78.90%,20.94
6,7,0.4233,82.48%,79.90%,78.65%,21.13
7,8,0.404,83.80%,79.50%,78.91%,21.53
8,9,0.3837,85.31%,79.44%,78.76%,21.36
9,10,0.3625,86.71%,80.48%,78.90%,20.78


--- Training Summary ---
Total epochs run: 13
Total training time: 273.56 seconds

Best Validation Accuracy: 80.48% (at Epoch 10)
Induced Test Accuracy: 78.90% (at Epoch 10)
Best Ever Test Accuracy: 78.91% (at Epoch 8)



### Fine-Tuned Embeddings
#### IMDb

In [13]:
history = run_model(
    **imdb_data,
    
    embed_init = "glove",
    embed_dim=300,
    hidden_dim=200,
    num_layers=2,
    bidirectional=True,
    pooling="attention",
    
    dropout_emb=0.35,
    dropout_rep=0.5,
    dropout_lstm=0.5,
    
    epochs=20,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=900,
    batch_size=64,
    
    tok_min_freq=30,
    
    device=device,
)

                                                                                    

Stopping early at epoch 6 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.4885,77.49%,89.70%,89.74%,57.79
1,2,0.2997,90.94%,91.08%,91.06%,57.66
2,3,0.2529,93.41%,91.58%,91.01%,57.03
3,4,0.216,95.33%,91.14%,90.54%,58.24
4,5,0.1926,96.56%,90.42%,89.51%,57.92
5,6,0.175,97.58%,91.08%,90.03%,58.24


--- Training Summary ---
Total epochs run: 6
Total training time: 346.88 seconds

Best Validation Accuracy: 91.58% (at Epoch 3)
Induced Test Accuracy: 91.01% (at Epoch 3)
Best Ever Test Accuracy: 91.06% (at Epoch 2)



#### Rotten Tomatoes

In [14]:
history = run_model(
    **rt_data,
    
    embed_dim=300,
    hidden_dim=300,
    
    num_layers=1,
    bidirectional=True,
    embed_init = "glove",
    dropout_emb=0.35,
    dropout_rep=0.55,
    dropout_lstm=0.55,
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=50,
    batch_size=28,
    
    tok_min_freq=3,
    pooling="attention",
    
    device=device,
)

                                                                                    

Stopping early at epoch 5 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.5537,73.00%,79.16%,78.76%,14.66
1,2,0.4255,82.50%,81.18%,78.06%,14.6
2,3,0.3309,88.97%,80.58%,76.19%,14.43
3,4,0.2583,93.08%,79.84%,75.74%,14.56
4,5,0.2112,95.80%,79.38%,75.44%,14.64


--- Training Summary ---
Total epochs run: 5
Total training time: 72.89 seconds

Best Validation Accuracy: 81.18% (at Epoch 2)
Induced Test Accuracy: 78.06% (at Epoch 2)
Best Ever Test Accuracy: 78.76% (at Epoch 1)



## Pre-trained Word2Vec Embeddings (Per Corpus)
### Frozen 
#### IMDb

In [28]:
history = run_model(
    **imdb_data,
    
    embed_init = "w2v",
    w2v_path=config.W2V_VECTORS_PATH,
    freez_embed=True,
    embed_dim=256,
    hidden_dim=200,
    num_layers=2,
    bidirectional=True,
    pooling="attention",
    
    dropout_emb=0.35,
    dropout_rep=0.5,
    dropout_lstm=0.5,
    
    epochs=20,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=900,
    batch_size=64,
    
    tok_min_freq=30,
    
    device=device,
)

                                                                                    

Stopping early at epoch 9 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.5354,73.41%,88.04%,88.26%,54.92
1,2,0.3658,86.88%,86.88%,86.53%,58.06
2,3,0.3437,88.27%,88.80%,88.86%,55.71
3,4,0.3328,88.66%,90.48%,90.84%,58.62
4,5,0.3195,89.27%,90.76%,91.01%,55.72
5,6,0.3128,89.89%,91.38%,91.29%,56.37
6,7,0.3034,90.40%,91.02%,91.08%,57.48
7,8,0.303,90.44%,89.32%,89.63%,54.98
8,9,0.2893,91.32%,91.20%,91.31%,56.58


--- Training Summary ---
Total epochs run: 9
Total training time: 508.44 seconds

Best Validation Accuracy: 91.38% (at Epoch 6)
Induced Test Accuracy: 91.29% (at Epoch 6)
Best Ever Test Accuracy: 91.31% (at Epoch 9)



#### Rotten Tomatoes

In [31]:
history = run_model(
    **rt_data,
    
    
    num_layers=2,
    bidirectional=True,
    hidden_dim=200,
    embed_init = "w2v",
    w2v_path=config.W2V_VECTORS_PATH,
    embed_dim=256,
    freez_embed=True,
    dropout_emb=0.35,
    dropout_rep=0.55,
    dropout_lstm=0.55,
    
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=50,
    batch_size=28,
    
    tok_min_freq=3,
    pooling="attention",
    
    device=device,
)

                                                                                     

Stopping early at epoch 14 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.5819,69.77%,74.50%,74.51%,15.96
1,2,0.5131,76.00%,78.28%,78.35%,15.91
2,3,0.4959,77.25%,76.36%,76.69%,15.88
3,4,0.483,78.14%,79.98%,79.97%,16.13
4,5,0.4725,78.88%,79.04%,78.46%,16.89
5,6,0.4614,79.48%,81.14%,80.06%,17.37
6,7,0.4518,80.63%,79.68%,78.23%,17.28
7,8,0.4385,81.33%,81.34%,80.14%,15.99
8,9,0.4257,82.31%,81.14%,80.66%,16.14
9,10,0.4148,83.05%,81.42%,80.72%,16.01


--- Training Summary ---
Total epochs run: 14
Total training time: 227.59 seconds

Best Validation Accuracy: 82.20% (at Epoch 11)
Induced Test Accuracy: 80.64% (at Epoch 11)
Best Ever Test Accuracy: 80.72% (at Epoch 10)



### Fine-Tuned Embeddings
#### IMDb

In [32]:
history = run_model(
    **imdb_data,
    
    embed_init = "w2v",
    w2v_path=config.W2V_VECTORS_PATH,
    embed_dim=256,
    
    hidden_dim=200,
    num_layers=2,
    bidirectional=True,
    pooling="attention",
    
    dropout_emb=0.35,
    dropout_rep=0.5,
    dropout_lstm=0.5,
    
    epochs=20,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=900,
    batch_size=64,
    
    tok_min_freq=30,
    
    device=device,
)

                                                                                    

Stopping early at epoch 6 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.4795,77.20%,89.14%,89.00%,58.28
1,2,0.2933,91.10%,89.98%,88.90%,57.08
2,3,0.2497,93.58%,90.44%,90.14%,58.17
3,4,0.2257,95.03%,90.02%,89.85%,57.16
4,5,0.1943,96.56%,90.38%,90.19%,57.79
5,6,0.1738,97.48%,89.58%,89.12%,57.91


--- Training Summary ---
Total epochs run: 6
Total training time: 346.39 seconds

Best Validation Accuracy: 90.44% (at Epoch 3)
Induced Test Accuracy: 90.14% (at Epoch 3)
Best Ever Test Accuracy: 90.19% (at Epoch 5)



#### Rotten Tomatoes

In [33]:
history = run_model(
    **rt_data,
    
    
    
    num_layers=2,
    bidirectional=True,
    hidden_dim=200,
    embed_init = "w2v",
    w2v_path=config.W2V_VECTORS_PATH,
    embed_dim=256,
    
    dropout_emb=0.35,
    dropout_rep=0.55,
    dropout_lstm=0.55,
    
    
    epochs=15,
    lr=1e-3,
    weight_decay=1e-4,
    max_len=50,
    batch_size=28,
    
    tok_min_freq=3,
    pooling="attention",
    
    device=device,
)

                                                                                    

Stopping early at epoch 5 (no improvement for 3 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.5671,71.48%,79.16%,78.47%,17.36
1,2,0.41,83.75%,81.32%,78.23%,17.27
2,3,0.3243,89.41%,79.64%,75.87%,17.48
3,4,0.254,93.29%,79.88%,75.05%,17.72
4,5,0.207,95.97%,78.92%,74.18%,17.92


--- Training Summary ---
Total epochs run: 5
Total training time: 87.75 seconds

Best Validation Accuracy: 81.32% (at Epoch 2)
Induced Test Accuracy: 78.23% (at Epoch 2)
Best Ever Test Accuracy: 78.47% (at Epoch 1)

