In [1]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" 

In [2]:
import time
import sys
import numpy as np
import pandas as pd
pd.set_option("display.width", 120)
pd.set_option("display.max_columns", None)


from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from transformers import RobertaTokenizer, RobertaModel
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
import src.config as config
from src.trainer import train_model, compute_accuracy, print_summary 

In [3]:
np.random.seed(config.RANDOM_SEED)
torch.manual_seed(config.RANDOM_SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(config.RANDOM_SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)

try:
    torch.backends.cuda.matmul.allow_tf32 = False
    torch.backends.cudnn.allow_tf32 = False
except Exception:
    pass

g = torch.Generator().manual_seed(config.RANDOM_SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
class Roberta_Dataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        
        text = str(self.texts[idx])
        label = int(self.labels[idx])
 
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

       
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

In [5]:
class RobertaClassifier(nn.Module):  
    def __init__(self, dropout_rate=0.3): 
        super(RobertaClassifier, self).__init__()

        self.roberta = RobertaModel.from_pretrained('roberta-base')
        self.drop = nn.Dropout(dropout_rate)
        self.out = nn.Linear(self.roberta.config.hidden_size, 2)

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        pooled_output = outputs.last_hidden_state[:, 0, :]
        output = self.drop(pooled_output)
        return self.out(output)

In [6]:
def run_roberta_model(
    x_train, y_train,
    x_val, y_val,
    x_test, y_test,
    epochs=6,        
    lr=2e-5,         
    batch_size=32,
    dropout_rate=0.3,
    max_len=256,
    patience=2,
    device=None,
):

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

    train_dataset = Roberta_Dataset(x_train.values, y_train.values, tokenizer, max_len)
    val_dataset = Roberta_Dataset(x_val.values, y_val.values, tokenizer, max_len)
    test_dataset = Roberta_Dataset(x_test.values, y_test.values, tokenizer, max_len)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = RobertaClassifier(dropout_rate=dropout_rate)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=1)



    summary = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        optimizer=optimizer,
        criterion=criterion,
        scheduler=scheduler,
        device=device,
        epochs=epochs,
        patience=patience,
    )
    best_induced_test_state = summary.get("best_state_dict")

    print_summary(summary)
    return summary, best_induced_test_state

# IMDb

In [7]:
imdb_train = pd.read_csv(config.CLEAN_IMDB_TRAIN_PATH)
imdb_val = pd.read_csv(config.CLEAN_IMDB_VAL_PATH)
imdb_test = pd.read_csv(config.CLEAN_IMDB_TEST_PATH)

imdb_x_train = imdb_train[config.TEXT_COL]
imdb_y_train = imdb_train[config.LABEL_COL]
imdb_x_val = imdb_val[config.TEXT_COL]
imdb_y_val = imdb_val[config.LABEL_COL]
imdb_x_test = imdb_test[config.TEXT_COL]
imdb_y_test = imdb_test[config.LABEL_COL]
imdb_data = {
    "x_train": imdb_x_train,
    "y_train": imdb_y_train,
    "x_val": imdb_x_val,
    "y_val": imdb_y_val,
    "x_test": imdb_x_test,
    "y_test": imdb_y_test,
}

In [9]:
history,  best_induced_test_state = run_roberta_model(
    **imdb_data,
    epochs=10,        
    lr=1e-6,         
    batch_size=32,
    dropout_rate=0.3,
    max_len=512,
    patience=2,
    device=device,
)
torch.save( best_induced_test_state, str(config.BEST_ROBERTA_IMDB_PATH))

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                                    

Stopping early at epoch 8 (no improvement for 2 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.4733,75.35%,92.22%,92.57%,700.32
1,2,0.2773,92.03%,93.42%,93.76%,693.26
2,3,0.2591,93.08%,93.56%,94.04%,695.8
3,4,0.2457,93.77%,93.34%,93.98%,692.27
4,5,0.2383,94.22%,93.82%,94.35%,693.3
5,6,0.2301,94.72%,94.20%,94.50%,712.69
6,7,0.2214,95.06%,93.88%,94.34%,713.81
7,8,0.2156,95.56%,94.06%,94.37%,712.12


--- Training Summary ---
Total epochs run: 8
Total training time: 5613.57 seconds

Best Validation Accuracy: 94.20% (at Epoch 6)
Induced Test Accuracy: 94.50% (at Epoch 6)
Best Ever Test Accuracy: 94.50% (at Epoch 6)



# Rotten Tomatoes

In [9]:
rt_train = pd.read_csv(config.CLEAN_RT_TRAIN_PATH)
rt_val = pd.read_csv(config.CLEAN_RT_VAL_PATH)
rt_test = pd.read_csv(config.CLEAN_RT_TEST_PATH)

rt_x_train = rt_train[config.TEXT_COL]
rt_y_train = rt_train[config.LABEL_COL]
rt_x_val = rt_val[config.TEXT_COL]
rt_y_val = rt_val[config.LABEL_COL]
rt_x_test = rt_test[config.TEXT_COL]
rt_y_test = rt_test[config.LABEL_COL]
rt_data = {
    "x_train": rt_x_train,
    "y_train": rt_y_train,
    "x_val": rt_x_val,
    "y_val": rt_y_val,
    "x_test": rt_x_test,
    "y_test": rt_y_test,
}

In [12]:
history, best_induced_test_state= run_roberta_model(
    **rt_data,
    epochs=15,        
    lr=1e-6,         
    batch_size=16,
    dropout_rate=0.3,
    max_len=50,
    patience=2,
    device=device,
)
torch.save( best_induced_test_state, config.BEST_ROBERTA_RT_PATH)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                                                     

Stopping early at epoch 10 (no improvement for 2 epochs).
--- Epoch History ---


Unnamed: 0,Epoch,Train Loss,Train Acc,Val Acc,Test Acc,Time (s)
0,1,0.6861,57.75%,79.73%,79.64%,46.0
1,2,0.4321,83.72%,86.86%,86.03%,52.93
2,3,0.3631,86.82%,87.75%,86.50%,44.26
3,4,0.3422,88.74%,87.75%,86.75%,45.07
4,5,0.3249,89.54%,88.86%,86.72%,48.91
5,6,0.3098,89.91%,89.09%,87.38%,48.82
6,7,0.295,90.79%,89.09%,87.41%,43.93
7,8,0.2862,91.51%,89.98%,87.26%,46.31
8,9,0.2748,92.48%,89.31%,87.32%,50.63
9,10,0.2612,92.96%,89.31%,87.19%,46.29


--- Training Summary ---
Total epochs run: 10
Total training time: 473.15 seconds

Best Validation Accuracy: 89.98% (at Epoch 8)
Induced Test Accuracy: 87.26% (at Epoch 8)
Best Ever Test Accuracy: 87.41% (at Epoch 7)

