In [1]:
!pip install optuna --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/383.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/231.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

from torch.optim import AdamW

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay,accuracy_score
import optuna
import matplotlib.pyplot as plt
import os

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:",device)

Mounted at /content/drive
Using: cuda


In [4]:
# Load data
train_df = pd.read_csv("/content/drive/My Drive/ADL Final Project/yelp_dataset_train.csv")
test_df = pd.read_csv("/content/drive/My Drive/ADL Final Project/yelp_dataset_test.csv")

# Sample 10% for tuning
df_sampled, _ = train_test_split(train_df, test_size = 0.90, stratify = train_df['class_index'], random_state = 42)

# Split sampled data into train/val for early stopping
train_small, val_small = train_test_split(df_sampled, test_size = 0.2, stratify = df_sampled['class_index'], random_state = 42)

In [5]:
# Step 1: Extract text and labels
texts = train_small['review_text'].tolist()
labels = train_small['class_index'].tolist()

# Step 2: Initialize tokenizer
vocab_size = 20000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = "<OOV>")
tokenizer.fit_on_texts(texts)

# Step 3: Convert texts to sequences
train_sequences = tokenizer.texts_to_sequences(train_small['review_text'])
val_sequences = tokenizer.texts_to_sequences(val_small['review_text'])

# Step 4: Pad sequences to a fixed length
max_length = 512

train_padded = pad_sequences(train_sequences, maxlen = max_length, padding = 'post', truncating = 'post')
val_padded = pad_sequences(val_sequences, maxlen = max_length, padding = 'post', truncating = 'post')

In [6]:
y_train = train_small['class_index'].values
y_val = val_small['class_index'].values

In [7]:
class BiLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout):
        super(BiLSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first = True, bidirectional = True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # *2 for bidirectional
        self.softmax = nn.Softmax(dim = 1)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        pooled = torch.mean(lstm_out, dim = 1)
        out = self.dropout(pooled)
        out = self.fc(out)
        return self.softmax(out)

In [8]:
# Convert padded sequences and labels to PyTorch tensors

X_train_tensor = torch.tensor(train_padded, dtype = torch.long)
y_train_tensor = torch.tensor(y_train, dtype = torch.long)
X_val_tensor = torch.tensor(val_padded, dtype = torch.long)
y_val_tensor = torch.tensor(y_val, dtype = torch.long)

# Create Dataset objects
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size = 128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = 128, shuffle=False)

In [9]:
# Objective function for Optuna
def objective(trial):
    start_time = time.time()

    # Hyperparameters for LSTM units, learning rate, and weight decay
    hidden_dim = trial.suggest_int("lstm_units", 64, 256)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    dropout_rate = 0.3  # Fixed dropout rate

    # Set fixed parameters
    vocab_size = 20000  # Adjust based on your dataset
    embed_dim = 100
    output_dim = 3  # 3 classes: negative, neutral, positive
    num_epochs = 3  # Number of epochs

    # Initialize model
    model = BiLSTMClassifier(vocab_size, embed_dim, hidden_dim, output_dim, dropout_rate).to(device)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training loop
    model.train()
    for epoch in range(num_epochs):
        epoch_losses = []
        for batch in train_loader:  # train_loader from your sampled 10% training data
            inputs, targets = batch  # adjust based on your DataLoader output
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            epoch_losses.append(loss.item())
        avg_loss = sum(epoch_losses) / len(epoch_losses)
        print(f"Trial {trial.number}, Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")

    # Evaluation on validation set
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for batch in val_loader:  # val_loader from your sampled validation data
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    val_accuracy = accuracy_score(all_targets, all_preds)
    val_loss = sum(epoch_losses) / len(epoch_losses)  # Average loss on validation
    elapsed_time = time.time() - start_time

    # Capture the trial information
    trial_info = {
        'trial_number': trial.number,
        'learning_rate': learning_rate,
        'weight_decay': weight_decay,
        'lstm_units': hidden_dim,
        'val_accuracy': val_accuracy,
        'val_loss': val_loss,
        'train_time': elapsed_time,
    }

    print(f"Trial {trial.number} - LR: {learning_rate} | WD: {weight_decay} | LSTM Units: {hidden_dim} | "
          f"Loss: {val_loss} | Accuracy: {val_accuracy} | Time: {elapsed_time:.2f}s")

    # Optuna maximizes accuracy
    return val_accuracy

study_path = "/content/drive/My Drive/ADL Final Project/optuna_study.db"

# Create the Optuna study
study = optuna.create_study(storage = f"sqlite:///{study_path}", direction = "maximize")

# Run the optimization
study.optimize(objective, n_trials = 20)

[I 2025-04-14 04:17:04,809] A new study created in RDB with name: no-name-5e4341b8-33e6-49b1-bcf4-25badf2c0fcd


Trial 0, Epoch 1/3, Loss: 1.0746
Trial 0, Epoch 2/3, Loss: 1.0286
Trial 0, Epoch 3/3, Loss: 0.8958


[I 2025-04-14 04:18:52,178] Trial 0 finished with value: 0.6789285714285714 and parameters: {'lstm_units': 232, 'learning_rate': 0.0008462171253028531, 'weight_decay': 3.3675872097409635e-06}. Best is trial 0 with value: 0.6789285714285714.


Trial 0 - LR: 0.0008462171253028531 | WD: 3.3675872097409635e-06 | LSTM Units: 232 | Loss: 0.8958458766737818 | Accuracy: 0.6789285714285714 | Time: 107.29s
Trial 1, Epoch 1/3, Loss: 1.0457
Trial 1, Epoch 2/3, Loss: 0.9696
Trial 1, Epoch 3/3, Loss: 0.8111


[I 2025-04-14 04:21:26,256] Trial 1 finished with value: 0.7582142857142857 and parameters: {'lstm_units': 141, 'learning_rate': 0.001513883127515937, 'weight_decay': 5.710195978844217e-05}. Best is trial 1 with value: 0.7582142857142857.


Trial 1 - LR: 0.001513883127515937 | WD: 5.710195978844217e-05 | LSTM Units: 141 | Loss: 0.8111318957216386 | Accuracy: 0.7582142857142857 | Time: 154.02s
Trial 2, Epoch 1/3, Loss: 1.0052
Trial 2, Epoch 2/3, Loss: 0.8148
Trial 2, Epoch 3/3, Loss: 0.7445


[I 2025-04-14 04:22:11,076] Trial 2 finished with value: 0.7738095238095238 and parameters: {'lstm_units': 113, 'learning_rate': 0.002416298685681744, 'weight_decay': 0.0009044123716065391}. Best is trial 2 with value: 0.7738095238095238.


Trial 2 - LR: 0.002416298685681744 | WD: 0.0009044123716065391 | LSTM Units: 113 | Loss: 0.7444599205096865 | Accuracy: 0.7738095238095238 | Time: 44.76s
Trial 3, Epoch 1/3, Loss: 1.0636
Trial 3, Epoch 2/3, Loss: 1.0136
Trial 3, Epoch 3/3, Loss: 0.9709


[I 2025-04-14 04:26:38,554] Trial 3 finished with value: 0.5723809523809524 and parameters: {'lstm_units': 192, 'learning_rate': 0.0006661853674379298, 'weight_decay': 0.0006198932608682206}. Best is trial 2 with value: 0.7738095238095238.


Trial 3 - LR: 0.0006661853674379298 | WD: 0.0006198932608682206 | LSTM Units: 192 | Loss: 0.9709296172109394 | Accuracy: 0.5723809523809524 | Time: 267.42s
Trial 4, Epoch 1/3, Loss: 1.0951
Trial 4, Epoch 2/3, Loss: 1.0606
Trial 4, Epoch 3/3, Loss: 1.0589


[I 2025-04-14 04:27:25,854] Trial 4 finished with value: 0.4642857142857143 and parameters: {'lstm_units': 123, 'learning_rate': 0.0001129390346704744, 'weight_decay': 4.299284821238262e-06}. Best is trial 2 with value: 0.7738095238095238.


Trial 4 - LR: 0.0001129390346704744 | WD: 4.299284821238262e-06 | LSTM Units: 123 | Loss: 1.0589278866129683 | Accuracy: 0.4642857142857143 | Time: 47.23s
Trial 5, Epoch 1/3, Loss: 1.0969
Trial 5, Epoch 2/3, Loss: 1.0949
Trial 5, Epoch 3/3, Loss: 1.0899


[I 2025-04-14 04:29:04,167] Trial 5 finished with value: 0.3985714285714286 and parameters: {'lstm_units': 223, 'learning_rate': 2.5465205836647278e-05, 'weight_decay': 3.1213600307451916e-06}. Best is trial 2 with value: 0.7738095238095238.


Trial 5 - LR: 2.5465205836647278e-05 | WD: 3.1213600307451916e-06 | LSTM Units: 223 | Loss: 1.08993610773703 | Accuracy: 0.3985714285714286 | Time: 98.26s
Trial 6, Epoch 1/3, Loss: 1.0834
Trial 6, Epoch 2/3, Loss: 1.0511
Trial 6, Epoch 3/3, Loss: 1.0647


[I 2025-04-14 04:31:00,987] Trial 6 finished with value: 0.49238095238095236 and parameters: {'lstm_units': 246, 'learning_rate': 0.0001611899416446787, 'weight_decay': 0.000588024465130978}. Best is trial 2 with value: 0.7738095238095238.


Trial 6 - LR: 0.0001611899416446787 | WD: 0.000588024465130978 | LSTM Units: 246 | Loss: 1.0646811682915054 | Accuracy: 0.49238095238095236 | Time: 116.75s
Trial 7, Epoch 1/3, Loss: 1.0648
Trial 7, Epoch 2/3, Loss: 1.0219
Trial 7, Epoch 3/3, Loss: 1.0111


[I 2025-04-14 04:35:27,207] Trial 7 finished with value: 0.5246428571428572 and parameters: {'lstm_units': 188, 'learning_rate': 0.00035159983081168504, 'weight_decay': 1.8742199100829313e-06}. Best is trial 2 with value: 0.7738095238095238.


Trial 7 - LR: 0.00035159983081168504 | WD: 1.8742199100829313e-06 | LSTM Units: 188 | Loss: 1.0111470555624582 | Accuracy: 0.5246428571428572 | Time: 266.15s
Trial 8, Epoch 1/3, Loss: 1.0974
Trial 8, Epoch 2/3, Loss: 1.0961
Trial 8, Epoch 3/3, Loss: 1.0943


[I 2025-04-14 04:37:23,844] Trial 8 finished with value: 0.37535714285714283 and parameters: {'lstm_units': 246, 'learning_rate': 1.8358459087333473e-05, 'weight_decay': 1.3494410779356944e-06}. Best is trial 2 with value: 0.7738095238095238.


Trial 8 - LR: 1.8358459087333473e-05 | WD: 1.3494410779356944e-06 | LSTM Units: 246 | Loss: 1.094265566126022 | Accuracy: 0.37535714285714283 | Time: 116.58s
Trial 9, Epoch 1/3, Loss: 1.0899
Trial 9, Epoch 2/3, Loss: 1.0575
Trial 9, Epoch 3/3, Loss: 1.0445


[I 2025-04-14 04:41:34,076] Trial 9 finished with value: 0.495 and parameters: {'lstm_units': 175, 'learning_rate': 0.00015214540285293223, 'weight_decay': 4.050007269254585e-05}. Best is trial 2 with value: 0.7738095238095238.


Trial 9 - LR: 0.00015214540285293223 | WD: 4.050007269254585e-05 | LSTM Units: 175 | Loss: 1.0444755606324954 | Accuracy: 0.495 | Time: 250.16s
Trial 10, Epoch 1/3, Loss: 0.9100
Trial 10, Epoch 2/3, Loss: 0.7555
Trial 10, Epoch 3/3, Loss: 0.7159


[I 2025-04-14 04:41:57,210] Trial 10 finished with value: 0.7889285714285714 and parameters: {'lstm_units': 74, 'learning_rate': 0.008020703537265418, 'weight_decay': 0.008137965779632793}. Best is trial 10 with value: 0.7889285714285714.


Trial 10 - LR: 0.008020703537265418 | WD: 0.008137965779632793 | LSTM Units: 74 | Loss: 0.7159416408139944 | Accuracy: 0.7889285714285714 | Time: 23.08s
Trial 11, Epoch 1/3, Loss: 0.9006
Trial 11, Epoch 2/3, Loss: 0.7579
Trial 11, Epoch 3/3, Loss: 0.7078


[I 2025-04-14 04:42:20,063] Trial 11 finished with value: 0.7922619047619047 and parameters: {'lstm_units': 71, 'learning_rate': 0.00927710409930187, 'weight_decay': 0.009550366555892127}. Best is trial 11 with value: 0.7922619047619047.


Trial 11 - LR: 0.00927710409930187 | WD: 0.009550366555892127 | LSTM Units: 71 | Loss: 0.7077555504588573 | Accuracy: 0.7922619047619047 | Time: 22.79s
Trial 12, Epoch 1/3, Loss: 0.8903
Trial 12, Epoch 2/3, Loss: 0.7900
Trial 12, Epoch 3/3, Loss: 0.7529


[I 2025-04-14 04:42:43,203] Trial 12 finished with value: 0.7886904761904762 and parameters: {'lstm_units': 73, 'learning_rate': 0.009593252897203764, 'weight_decay': 0.009752563590034137}. Best is trial 11 with value: 0.7922619047619047.


Trial 12 - LR: 0.009593252897203764 | WD: 0.009752563590034137 | LSTM Units: 73 | Loss: 0.7529441859332328 | Accuracy: 0.7886904761904762 | Time: 23.08s
Trial 13, Epoch 1/3, Loss: 0.8902
Trial 13, Epoch 2/3, Loss: 0.7636
Trial 13, Epoch 3/3, Loss: 0.7139


[I 2025-04-14 04:42:59,639] Trial 13 finished with value: 0.8002380952380952 and parameters: {'lstm_units': 64, 'learning_rate': 0.009885647542627366, 'weight_decay': 0.00959695196216962}. Best is trial 13 with value: 0.8002380952380952.


Trial 13 - LR: 0.009885647542627366 | WD: 0.00959695196216962 | LSTM Units: 64 | Loss: 0.713944719544835 | Accuracy: 0.8002380952380952 | Time: 16.38s
Trial 14, Epoch 1/3, Loss: 0.9541
Trial 14, Epoch 2/3, Loss: 0.8084
Trial 14, Epoch 3/3, Loss: 0.7426


[I 2025-04-14 04:43:16,041] Trial 14 finished with value: 0.7838095238095238 and parameters: {'lstm_units': 64, 'learning_rate': 0.004147126836136838, 'weight_decay': 0.0018714673056361812}. Best is trial 13 with value: 0.8002380952380952.


Trial 14 - LR: 0.004147126836136838 | WD: 0.0018714673056361812 | LSTM Units: 64 | Loss: 0.7425683297131904 | Accuracy: 0.7838095238095238 | Time: 16.34s
Trial 15, Epoch 1/3, Loss: 0.9580
Trial 15, Epoch 2/3, Loss: 0.8018
Trial 15, Epoch 3/3, Loss: 0.7839


[I 2025-04-14 04:43:57,337] Trial 15 finished with value: 0.7845238095238095 and parameters: {'lstm_units': 103, 'learning_rate': 0.003918111801898915, 'weight_decay': 0.0025122958460535236}. Best is trial 13 with value: 0.8002380952380952.


Trial 15 - LR: 0.003918111801898915 | WD: 0.0025122958460535236 | LSTM Units: 103 | Loss: 0.7838745033333057 | Accuracy: 0.7845238095238095 | Time: 41.23s
Trial 16, Epoch 1/3, Loss: 0.8901
Trial 16, Epoch 2/3, Loss: 0.7459
Trial 16, Epoch 3/3, Loss: 0.7071


[I 2025-04-14 04:44:24,283] Trial 16 finished with value: 0.7938095238095239 and parameters: {'lstm_units': 85, 'learning_rate': 0.009978769279933882, 'weight_decay': 0.0002008909037175462}. Best is trial 13 with value: 0.8002380952380952.


Trial 16 - LR: 0.009978769279933882 | WD: 0.0002008909037175462 | LSTM Units: 85 | Loss: 0.7071021867795589 | Accuracy: 0.7938095238095239 | Time: 26.89s
Trial 17, Epoch 1/3, Loss: 1.0426
Trial 17, Epoch 2/3, Loss: 0.9189
Trial 17, Epoch 3/3, Loss: 0.8305


[I 2025-04-14 04:44:50,602] Trial 17 finished with value: 0.7317857142857143 and parameters: {'lstm_units': 95, 'learning_rate': 0.001636542120469616, 'weight_decay': 0.00016795063732367538}. Best is trial 13 with value: 0.8002380952380952.


Trial 17 - LR: 0.001636542120469616 | WD: 0.00016795063732367538 | LSTM Units: 95 | Loss: 0.8304977498580295 | Accuracy: 0.7317857142857143 | Time: 26.25s
Trial 18, Epoch 1/3, Loss: 1.0973
Trial 18, Epoch 2/3, Loss: 1.0950
Trial 18, Epoch 3/3, Loss: 1.0868


[I 2025-04-14 04:47:24,715] Trial 18 finished with value: 0.40523809523809523 and parameters: {'lstm_units': 141, 'learning_rate': 4.254422876470329e-05, 'weight_decay': 1.4942873954598515e-05}. Best is trial 13 with value: 0.8002380952380952.


Trial 18 - LR: 4.254422876470329e-05 | WD: 1.4942873954598515e-05 | LSTM Units: 141 | Loss: 1.0867740201406153 | Accuracy: 0.40523809523809523 | Time: 154.05s
Trial 19, Epoch 1/3, Loss: 0.9438
Trial 19, Epoch 2/3, Loss: 0.7915
Trial 19, Epoch 3/3, Loss: 0.7348


[I 2025-04-14 04:47:50,046] Trial 19 finished with value: 0.7904761904761904 and parameters: {'lstm_units': 90, 'learning_rate': 0.004241544346115228, 'weight_decay': 0.00025980889070388466}. Best is trial 13 with value: 0.8002380952380952.


Trial 19 - LR: 0.004241544346115228 | WD: 0.00025980889070388466 | LSTM Units: 90 | Loss: 0.734802420828279 | Accuracy: 0.7904761904761904 | Time: 25.26s


In [15]:
best_params = study.best_params
print(f"Best hyperparameters: {best_params}")

Best hyperparameters: {'lstm_units': 64, 'learning_rate': 0.009885647542627366, 'weight_decay': 0.00959695196216962}
