In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

import optuna
from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [12]:
# Mock dataset for demonstration
class TextDataset(Dataset):
    # def __init__(self, vocabulary_size, sequence_length, num_samples):
    #     self.data = torch.randint(0, vocabulary_size, (num_samples, sequence_length))
    #     self.labels = torch.randint(0, 2, (num_samples,))
    def __init__(self, texts, labels, sequence_length):
        self.texts = texts
        self.labels = labels
        self.sequence_length = sequence_length
        self.vocab = self.build_vocab(texts)
        self.encoded_texts = [self.encode_text(text) for text in texts]

    def build_vocab(self, texts):
        unique_words = set(word for text in texts for word in text.lower().split())
        vocab = {word: i + 1 for i, word in enumerate(unique_words)}  # +1 for padding token at index 0
        return vocab
    
    def encode_text(self, text):
        return [self.vocab.get(word, 0) for word in text.lower().split()][:self.sequence_length] + [0] * (self.sequence_length - len(text.split()))

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return torch.tensor(self.encoded_texts[idx]), torch.tensor(self.labels[idx])
    

# Define the Q-network model
class QLearning(nn.Module):
    def __init__(self, vocabulary_size, embedding_dim, hidden_dim, dropout_rate=0.5):
        super(QLearning, self).__init__()
        self.embedding = nn.Embedding(vocabulary_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True, num_layers=2)
        self.dropout = nn.Dropout(dropout_rate)
        # Two actions: 0 or 1
        self.fc = nn.Linear(hidden_dim * 2, 2)  

    def forward(self, x):
        embeds = self.embedding(x)
        lstm_out, _ = self.lstm(embeds)
        out = self.dropout(lstm_out[:, -1])
        q_values = self.fc(out)
        return q_values

def update_q_values(model, optimizer, states, actions, rewards, next_states, gamma=0.99):
    # Ensure actions tensor is in the correct shape for .gather()
    actions = actions.long().unsqueeze(-1)  # Adding a dimension to match q_values dimensions for gather

    model.eval()
    with torch.no_grad():
        # Get max Q value for next states, for all next states in the batch
        q_values_next = model(next_states).max(1)[0]
    model.train()

    # Get the predicted Q-values for the chosen actions. 
    # As actions are now correctly unsqueezed, this should work without issues.
    # q_values = model(states).gather(1, actions).squeeze(-1)  # squeeze(-1) to remove the extra dimension after gather
    q_values = model(states).gather(1, actions.unsqueeze(-1))
    
    # Calculate the target Q values for the current states
    # This is the immediate reward plus the discounted max future Q value
    # Note: We detach q_values_next from the graph as it's a target, not a variable we need gradients for
    target = rewards + gamma * q_values_next.detach()
    
    # Compute the loss between current Q values (for the chosen actions) and the target Q values
    loss = nn.functional.mse_loss(q_values, target)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()

In [13]:
def train(model, device, train_loader, optimizer, epoch, gamma=0.99, log_interval=10):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # Predict Q-values for current states (data)
        current_q_values = model(data)

        # Simulate taking actions (classifying) and receiving rewards
        # In a real RL scenario, actions are taken based on a policy. Here, we simplify.
        _, predicted_actions = torch.max(current_q_values, 1)
        rewards = (predicted_actions == target).float()  # Reward is 1 for correct classification, 0 otherwise

        # Simulate next states (in practice, you might use a different strategy)
        # For simplicity, let's just roll data to simulate "next states"
        next_data = torch.roll(data, -1, 0)
        with torch.no_grad():
            future_q_values = model(next_data).max(1)[0]  # Use max Q-value for simplicity
        
        # Compute target Q-values
        target_q_values = rewards + gamma * future_q_values

        # Update model based on the difference between current Q-values and target Q-values
        # Assuming binary classification, gather Q-values for the taken actions
        action_indices = target.view(-1, 1).long()
        gathered_q_values = current_q_values.gather(1, action_indices).squeeze()

        loss = nn.functional.mse_loss(gathered_q_values, target_q_values.detach())
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}'
                  f' ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# def train(model, device, train_loader, optimizer, epoch, gamma=0.99, log_interval=10):
#     model.train()
#     total_loss = 0
#     for batch_idx, (data, target) in enumerate(train_loader):
#         data, target = data.to(device), target.to(device)
#         optimizer.zero_grad()

#         # Predict Q-values for current states (data)
#         current_q_values = model(data)

#         # Simulate taking actions (classifying) and receiving rewards
#         _, predicted_actions = torch.max(current_q_values, 1)
#         rewards = (predicted_actions == target).float()  # Reward is 1 for correct classification, 0 otherwise

#         # Simulate next states (in practice, you might use a different strategy)
#         next_data = torch.roll(data, -1, 0)
#         with torch.no_grad():
#             future_q_values = model(next_data).max(1)[0]

#         # Here we manually construct what we need for update_q_values
#         actions = predicted_actions.view(-1, 1)
#         rewards = rewards.unsqueeze(-1)
#         next_states = next_data

#         # Update Q-values using the custom function, adapted to fit our setup
#         # Note: update_q_values function is assumed to be adapted for our context
#         loss = update_q_values(model, optimizer, data, actions, rewards, next_states, gamma)
#         total_loss += loss

#         if batch_idx % log_interval == 0:
#             print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}'
#                   f' ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {total_loss / (batch_idx + 1):.6f}')

def validate(model, device, validation_loader):
    model.eval()
    validation_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in validation_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            validation_loss += nn.CrossEntropyLoss()(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    validation_loss /= len(validation_loader.dataset)
    validation_acc = correct / len(validation_loader.dataset)
    print(f'\nValidation set: Average loss: {validation_loss:.4f}, Accuracy: {correct}/{len(validation_loader.dataset)} ({100. * correct / len(validation_loader.dataset):.0f}%)\n')
    return validation_loss, validation_acc

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += nn.CrossEntropyLoss()(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')


In [14]:
df = pd.read_csv('dataset/sentiment_analysis.csv')

# Extracting texts and labels
texts = df['tweet'].tolist()
labels = df['label'].tolist()

# Splitting dataset into train+val and test
train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Splitting train+val into train and val
train_texts, validation_texts, train_labels, validation_labels = train_test_split(train_val_texts, train_val_labels, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Creating datasets
sequence_length = 10  # Max number of words in a text
train_dataset = TextDataset(train_texts, train_labels, sequence_length)
validation_dataset = TextDataset(validation_texts, validation_labels, sequence_length)
test_dataset = TextDataset(test_texts, test_labels, sequence_length)

# Creating DataLoaders
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for data, label in train_loader:
    print(f"Validation Encoded text: {data}")
    print(f"Validation Label: {label}")
    break  # Just show one batch for brevity

for data, label in validation_loader:
    print(f"Validation Encoded text: {data}")
    print(f"Validation Label: {label}")
    break  # Just show one batch for brevity

for data, label in test_loader:
    print(f"Test Encoded text: {data}")
    print(f"Test Label: {label}")
    break  # Just show one batch for brevity

Validation Encoded text: tensor([[20655, 18571, 13064,  3125, 19348, 13014, 16230, 13924, 20400, 11550],
        [14625, 10428,  3125, 12991, 15537, 10920, 17104, 13213, 11874, 19899],
        [ 8654, 14999, 20606,  8966, 18599,  9186, 10363, 10770,   925,  4179],
        [ 1111,  5026, 12025, 13896, 19397,  8211,  3350, 18458, 15633, 17776]])
Validation Label: tensor([0, 1, 1, 1])
Validation Encoded text: tensor([[2026, 5854, 5043, 5005, 1440,  150, 5999, 5785, 8586,  248],
        [5434, 7852, 1896, 5684, 4545, 3561, 5345, 3043,   45,  924],
        [6821,  293, 7617, 1422, 8655, 2273, 2014, 4048, 2961, 7476],
        [5434, 1531, 1896,  411, 7369, 6574,  285, 1568, 3410, 6618]])
Validation Label: tensor([1, 1, 1, 1])
Test Encoded text: tensor([[6332, 6864, 6903, 2554, 6857, 7206, 3527, 4826, 8675, 8933],
        [3233, 5512, 5310, 5005, 4409, 2170, 5859, 1313, 4221, 1449],
        [6673, 3527, 8940, 4598, 5074, 5172, 7242, 6581, 2468, 2550],
        [9078, 4781, 8736, 3098, 5001, 22

In [15]:
# Parameters and Hyperparameters
vocabulary_size = 100000  # to adjust 
sequence_length = 50  # to adjust 
embedding_dim = 128
hidden_dim = 64
num_classes = 2
batch_size = 64
epochs = 5
learning_rate = 0.001

# Model, optimizer, and device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = QLearning(vocabulary_size, embedding_dim, hidden_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    validate(model, device, validation_loader)

# After training, evaluate on the test set
test(model, device, test_loader)


Validation set: Average loss: 0.1455, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.1564, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.3445, Accuracy: 402/1584 (25%)


Validation set: Average loss: 0.3304, Accuracy: 402/1584 (25%)


Validation set: Average loss: 0.2281, Accuracy: 402/1584 (25%)


Test set: Average loss: 0.2249, Accuracy: 432/1584 (27%)



In [18]:
n_trials=5

def objective(trial):
    # Define the search space
    # vocabulary_size = trial.suggest_categorical('vocabulary_size', [5000, 10000, 20000, 40000])
    vocabulary_size = 100000
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    embedding_dim = trial.suggest_categorical('embedding_dim', [64, 128, 256])
    hidden_dim = trial.suggest_categorical('hidden_dim', [32, 64, 128])
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    step_size = trial.suggest_int('step_size', 1, 100)
    gamma = trial.suggest_float('gamma', 0.1, 1.0, log=True)
    sequence_length = trial.suggest_categorical('sequence_length', [50, 100, 200, 400])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Device configuration
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Model setup with trial suggestions
    model = QLearning(vocabulary_size, embedding_dim, hidden_dim, dropout_rate=dropout_rate).to(device)

    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=learning_rate)

    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    # Training loop
    epochs = 5 # Reduced for faster optimization cycles
    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        val_loss, val_accuracy = validate(model, device, validation_loader)
        scheduler.step()

    # Set custom attributes for the trial
    trial.set_user_attr("val_loss", val_loss)
    trial.set_user_attr("val_accuracy", val_accuracy)
    
    # print(f"Returning from validate: val_loss={val_loss}, val_accuracy={val_accuracy}")
    # return val_loss

    # Objective: maximize validation accuracy by minimizing its negative value
    return -val_accuracy  # Return the negative accuracy

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials) 

print('Number of finished trials:', len(study.trials))
print('Best trial:')
trial = study.best_trial

# Retrieve the validation loss and accuracy from the best trial
best_val_loss = trial.user_attrs["val_loss"]
best_val_accuracy = trial.user_attrs["val_accuracy"]

print(f'Best Validation Loss: {best_val_loss}')
print(f'Best Validation Accuracy: {best_val_accuracy}')
print('Best Trial Parameters:')
for key, value in trial.params.items():
    print(f'    {key}: {value}')

[I 2024-04-02 20:57:17,521] A new study created in memory with name: no-name-ab7b57ca-6751-4f57-b2f4-0c88f8f12761



Validation set: Average loss: 0.0048, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0053, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0052, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0052, Accuracy: 1182/1584 (75%)



[I 2024-04-02 20:57:35,047] Trial 0 finished with value: -0.7462121212121212 and parameters: {'batch_size': 128, 'learning_rate': 0.0012591182670210136, 'embedding_dim': 128, 'hidden_dim': 32, 'optimizer': 'Adam', 'dropout_rate': 0.013259984128779212, 'step_size': 78, 'gamma': 0.2509806262222176, 'sequence_length': 200}. Best is trial 0 with value: -0.7462121212121212.



Validation set: Average loss: 0.0051, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0186, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0186, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0181, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0179, Accuracy: 1182/1584 (75%)



[I 2024-04-02 20:59:05,105] Trial 1 finished with value: -0.7462121212121212 and parameters: {'batch_size': 32, 'learning_rate': 0.001296619982995175, 'embedding_dim': 256, 'hidden_dim': 64, 'optimizer': 'RMSprop', 'dropout_rate': 0.05082897612082027, 'step_size': 29, 'gamma': 0.8831017758467231, 'sequence_length': 50}. Best is trial 0 with value: -0.7462121212121212.



Validation set: Average loss: 0.0181, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0101, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0099, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0100, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0097, Accuracy: 1182/1584 (75%)



[I 2024-04-02 20:59:18,526] Trial 2 finished with value: -0.7462121212121212 and parameters: {'batch_size': 64, 'learning_rate': 0.0004892930038408881, 'embedding_dim': 64, 'hidden_dim': 32, 'optimizer': 'RMSprop', 'dropout_rate': 0.4397029645972962, 'step_size': 4, 'gamma': 0.11731105218391365, 'sequence_length': 50}. Best is trial 0 with value: -0.7462121212121212.



Validation set: Average loss: 0.0097, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0141, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0115, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0564, Accuracy: 402/1584 (25%)


Validation set: Average loss: 0.0139, Accuracy: 1182/1584 (75%)



[I 2024-04-02 20:59:35,027] Trial 3 finished with value: -0.7462121212121212 and parameters: {'batch_size': 128, 'learning_rate': 0.07527339560856858, 'embedding_dim': 128, 'hidden_dim': 32, 'optimizer': 'RMSprop', 'dropout_rate': 0.10182729575093302, 'step_size': 45, 'gamma': 0.14614642750913567, 'sequence_length': 200}. Best is trial 0 with value: -0.7462121212121212.



Validation set: Average loss: 0.0190, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0343, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0311, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0296, Accuracy: 1182/1584 (75%)


Validation set: Average loss: 0.0258, Accuracy: 1182/1584 (75%)



[I 2024-04-02 21:00:25,901] Trial 4 finished with value: -0.7462121212121212 and parameters: {'batch_size': 32, 'learning_rate': 0.00017598356651484445, 'embedding_dim': 128, 'hidden_dim': 128, 'optimizer': 'RMSprop', 'dropout_rate': 0.4501877673997916, 'step_size': 68, 'gamma': 0.21208200504659475, 'sequence_length': 100}. Best is trial 0 with value: -0.7462121212121212.



Validation set: Average loss: 0.0216, Accuracy: 1182/1584 (75%)

Number of finished trials: 5
Best trial:
Best Validation Loss: 0.005118348532252842
Best Validation Accuracy: 0.7462121212121212
Best Trial Parameters:
    batch_size: 128
    learning_rate: 0.0012591182670210136
    embedding_dim: 128
    hidden_dim: 32
    optimizer: Adam
    dropout_rate: 0.013259984128779212
    step_size: 78
    gamma: 0.2509806262222176
    sequence_length: 200
