In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class CNN_LSTM_Attention_OvR_Model(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, kernel_sizes, num_filters, lstm_hidden_dim, num_layers, dropout_rate, glove_weights):
        super(CNN_LSTM_Attention_OvR_Model, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(glove_weights, requires_grad=True)
        self.convs = nn.ModuleList([
            nn.Conv2d(in_channels=1, out_channels=num_filters, kernel_size=(k, embedding_dim))
            for k in kernel_sizes
        ])
        self.lstm = nn.LSTM(input_size=num_filters * len(kernel_sizes), hidden_size=lstm_hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout_rate, bidirectional=True)
        self.attention_fc = nn.Linear(lstm_hidden_dim * 2, 1)
        self.fc = nn.Linear(lstm_hidden_dim * 2, num_classes)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)

        conv_out = [torch.relu(conv(x)).squeeze(3) for conv in self.convs]
        conv_out = torch.cat(conv_out, dim=2)
        lstm_out, _ = self.lstm(conv_out.permute(0, 2, 1))

        attention_scores = self.attention_fc(lstm_out).squeeze(-1)
        attention_weights = torch.softmax(attention_scores, dim=1)
        context_vector = torch.sum(lstm_out * attention_weights.unsqueeze(-1), dim=1)

        out = self.fc(self.dropout(context_vector))

        return out


In [None]:
from sklearn.model_selection import train_test_split
train_data, val_data = train_test_split(train_df, test_size=0.25, random_state=42)

In [None]:
embedding_dim = 100      # Size of word embeddings
hidden_dim = 145         # LSTM hidden size
output_dim = 3           # Number of classes (for multi-class classification)
kernel_sizes = [2, 3, 4] # Sizes of kernels for CNN
num_filters = 100        # Number of filters for CNN
lstm_hidden_dim = 203    # LSTM hidden dimension
num_layers = 1         # Number of LSTM layers
dropout_rate = 0.3       # Dropout rate
#Best Parameters: {'hidden_dim': 145, 'num_filters': 98, 'lstm_hidden_dim': 203, 'learning_rate': 1.4197737369066888e-05, 'dropout_rate': 0.3404884780048647, 'kernel_sizes': [2, 3, 4], 'num_layers': 1}
#Best F1 Score: 0.32155648068635906


In [None]:
def load_glove_embeddings(glove_path, vocab, embedding_dim=100):
    glove_embeddings = {}
    with open(glove_path, 'r') as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            glove_embeddings[word] = vector

    weights_matrix = np.zeros((len(vocab), embedding_dim))
    for i, word in enumerate(vocab):
        weights_matrix[i] = glove_embeddings.get(word, np.random.normal(scale=0.6, size=(embedding_dim,)))

    return torch.tensor(weights_matrix, dtype=torch.float32)


In [None]:
glove_weights = load_glove_embeddings("glove.6B.100d.txt", vocab, embedding_dim)

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Mak

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score

n_classes = 3

class TextDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels
    def __len__(self):
        return len(self.sequences)
    def __getitem__(self, idx):
        seq = torch.tensor(self.sequences[idx])
        return seq, self.labels[idx]

text_dataset = TextDataset(train_data['sequences'].tolist(), train_data['labels'].tolist())

train_loader = DataLoader(text_dataset, batch_size = 256, shuffle=True, collate_fn=lambda x: (
    pad_sequence([item[0] for item in x], batch_first=True),
    torch.tensor([item[1] for item in x])
))

text_dataset2 = TextDataset(val_data['sequences'].tolist(), val_data['labels'].tolist())

test_loader = DataLoader(text_dataset2, batch_size = 256, shuffle=True, collate_fn=lambda x: (
    pad_sequence([item[0] for item in x], batch_first=True),
    torch.tensor([item[1] for item in x])
))

def objective(trial):
    hidden_dim = trial.suggest_int("hidden_dim", 64, 256)
    num_filters = trial.suggest_int("num_filters", 50, 200)
    lstm_hidden_dim = trial.suggest_int("lstm_hidden_dim", 64, 256)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-3, 1e-2)
    dropout_rate = trial.suggest_uniform("dropout_rate", 0.2, 0.5)
    kernel_sizes = trial.suggest_categorical("kernel_sizes", [[2, 3, 4], [3, 4, 5], [2, 4, 6]])
    num_layers = trial.suggest_int("num_layers", 1, 3)

    model = CNN_LSTM_Attention_OvR_Model(
        vocab_size=vocab_size,
        embedding_dim=embedding_dim,
        hidden_dim=hidden_dim,
        output_dim=output_dim,
        kernel_sizes=kernel_sizes,
        num_filters=num_filters,
        lstm_hidden_dim=lstm_hidden_dim,
        num_layers=num_layers,
        dropout_rate=dropout_rate,
        glove_weights=glove_weights
    )
    model.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()
    for epoch in range(12):
        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)

            optimizer.zero_grad()
            texts = texts.long()
            labels = labels.long()

        for class_id in range(n_classes):
            binary_labels = (labels == class_id).long()
            predictions = model(texts)
            loss = criterion(predictions[:, class_id], binary_labels.float())
            loss.backward()
        optimizer.step()

    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for padded_sequences, labels in test_loader:
            padded_sequences, labels = padded_sequences.to(device), labels.to(device)
            output = model(padded_sequences)
            predicted_classes = torch.argmax(output, dim=1)
            all_preds.extend(predicted_classes.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    f1 = f1_score(all_labels, all_preds, average="macro")
    return f1

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print("Best Parameters:", study.best_params)
print("Best F1 Score:", study.best_value)


[I 2024-11-14 12:08:54,376] A new study created in memory with name: no-name-61fb0676-2d14-41e0-81d4-d77b217dfc2f
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
  dropout_rate = trial.suggest_uniform("dropout_rate", 0.2, 0.5)
[I 2024-11-14 12:09:20,908] Trial 0 finished with value: 0.22087526463849658 and parameters: {'hidden_dim': 248, 'num_filters': 78, 'lstm_hidden_dim': 236, 'learning_rate': 0.0014265056148906757, 'dropout_rate': 0.3434375232017183, 'kernel_sizes': [2, 3, 4], 'num_layers': 1}. Best is trial 0 with value: 0.22087526463849658.
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
  dropout_rate = trial.suggest_uniform("dropout_rate", 0.2, 0.5)
[I 2024-11-14 12:10:05,267] Trial 1 finished with value: 0.22087526463849658 and parameters: {'hidden_dim': 187, 'num_filters': 166, 'lstm_hidden_dim': 228, 'learning_rate': 0.00212597573038813, 'dropout_rate': 0.2752194449180221, 'kernel_sizes': [2, 3, 4], 'num_layers': 3}. Best is tr

Best Parameters: {'hidden_dim': 145, 'num_filters': 98, 'lstm_hidden_dim': 203, 'learning_rate': 1.4197737369066888e-05, 'dropout_rate': 0.3404884780048647, 'kernel_sizes': [2, 3, 4], 'num_layers': 1}
Best F1 Score: 0.32155648068635906
