In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import numpy as np
from transformers import BertTokenizer

from torch.utils.data import TensorDataset, DataLoader
import lightning as L
import mlflow

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Data has already been prepared for you
train = np.load('HA3_train_set.npz')
test = np.load('HA3_test_set.npz')
X_train = train['x_train']
y_train = train['y_train']
X_test = test['x_test']
y_test = test['y_test']

# Step 1: Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.long)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)


# Step 2: Create TensorDataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [3]:
print("X_train.shape:", X_train.shape)
print("y_train.shape:", y_train.shape)
print("X_test.shape:", X_test.shape)
print("y_test.shape:", y_test.shape)

X_train.shape: (25000, 500)
y_train.shape: (25000,)
X_test.shape: (17500, 500)
y_test.shape: (17500,)


In [27]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, embedding_dim, vocab_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding_dim = embedding_dim
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.01)
        self.layer_norm = nn.LayerNorm(hidden_size * 2)
        self.fc1 = nn.Linear(hidden_size * 2, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        embedded = self.embedding(x)
        out, (final_hidden_state, final_cell_state) = self.lstm(embedded)
        #out = self.layer_norm(out[:, -1, :])
        #out = self.dropout(out)
        out = self.fc1(out[:, -1, :])
        #out = F.relu(out)
        #out = self.fc2(out)
        out = self.softmax(out)
        return out



In [28]:
def train_model(model, num_epochs, learning_rate, train_loader, test_loader):
    # Loss and optimizer
    criterion = nn.NLLLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    best_accuracy = 0.0
    global_step = 0
    no_improvement_counter = 0
    patience = 10

    # Train the model
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (comments, labels) in enumerate(train_loader):
            global_step += 1

            # Forward pass
            outputs = model(comments)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mlflow.log_metric("loss", loss.item(), step = global_step)

            if (i+1) % 10 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
                
                # Test the model
                with torch.no_grad():
                    correct = 0
                    total = 0
                    for comments, labels in test_loader:
                        outputs = model(comments)
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                        accuracy = 100 * correct / total
                    mlflow.log_metric("Test accuracy vs number of training epochs", 100 * correct / total, step = global_step)

                # Save the best model
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    mlflow.pytorch.log_model(pytorch_model=model, artifact_path="best-torch-rnn-model")
                    no_improvement_counter = 0
                else:
                    no_improvement_counter += 1

                print('Test Accuracy of the model on test comments: {} %'.format(100 * correct / total))

                # Early stopping criterion
                if no_improvement_counter >= patience:
                    print(f'Stopping early after {patience} evaluations without improvement.')
                    return best_accuracy
                
    return best_accuracy


In [29]:
import optuna

def objective(trial):

    num_layers = 1 ##trial.suggest_int('num_layers', 1, 2)
    hidden_size = 15 ##trial.suggest_int('hidden_size', 1, 30)
    batch_size = 758 ##trial.suggest_int('batch_size', 64, 1028)
    embedding_dim = 20 ##trial.suggest_int('embedding_dim', 1, 64)
    learning_rate = 0.036 ##trial.suggest_float('learning_rate', 0.001, 0.1)


    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    num_classes = 2  # Fixed number of classes
    num_epochs = 100  # For faster trials, you might want to use more epochs in actual training
    vocab_size = tokenizer.vocab_size

    with mlflow.start_run():
        mlflow.log_param("num_hidden_layers", num_layers)
        mlflow.log_param("hidden_size", hidden_size)
        mlflow.log_param("batch_size", batch_size)
        mlflow.log_param("epochs", num_epochs)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("embedding_dim", embedding_dim)

        model = RNN(embedding_dim, vocab_size, hidden_size, num_layers, num_classes)
        print('----------------------------------------------------------')
        best_accuracy = train_model(model, num_epochs, learning_rate, train_loader, test_loader)  # Modify train_model to return accuracy

        mlflow.log_metric("best_accuracy", best_accuracy)

    return best_accuracy


In [26]:
# Create a study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=1)

# Log the best hyperparameters
best_trial = study.best_trial
print(f'Best trial: {best_trial.value}')
print(f'Best hyperparameters: {best_trial.params}')



[I 2024-06-06 11:23:53,594] A new study created in memory with name: no-name-dff43fb0-e343-4010-8073-a20672301888


----------------------------------------------------------
Epoch [1/100], Step [10/33], Loss: 0.6798
Test Accuracy of the model on test comments: 53.94857142857143 %
Epoch [1/100], Step [20/33], Loss: 0.6791
Test Accuracy of the model on test comments: 56.23428571428571 %
Epoch [1/100], Step [30/33], Loss: 0.6717
Test Accuracy of the model on test comments: 61.12 %
Epoch [2/100], Step [10/33], Loss: 0.5792
Test Accuracy of the model on test comments: 68.46285714285715 %
Epoch [2/100], Step [20/33], Loss: 0.4986
Test Accuracy of the model on test comments: 75.56 %
Epoch [2/100], Step [30/33], Loss: 0.4410
Test Accuracy of the model on test comments: 80.48571428571428 %
Epoch [3/100], Step [10/33], Loss: 0.2527
Test Accuracy of the model on test comments: 83.55428571428571 %
Epoch [3/100], Step [20/33], Loss: 0.2702
Test Accuracy of the model on test comments: 85.86857142857143 %
Epoch [3/100], Step [30/33], Loss: 0.2278
Test Accuracy of the model on test comments: 83.33714285714285 %
Ep

[I 2024-06-06 11:45:21,196] Trial 0 finished with value: 86.52 and parameters: {}. Best is trial 0 with value: 86.52.


Test Accuracy of the model on test comments: 85.72571428571429 %
Stopping early after 10 evaluations without improvement.
Best trial: 86.52
Best hyperparameters: {}
