# Load libraries and data

In [3]:
from pathlib import Path
import re
import numpy as np
import pandas as pd
from collections import Counter
import torch
import torch.nn as nn
import torch.optim as optim
# import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import wandb

In [4]:
train_data = pd.read_csv(Path("..", "data", "processed", "train.csv"))
val_data = pd.read_csv(Path("..", "data", "processed", "val.csv"))

# Parameters & wandb

In [5]:
EMBEDDING_DIM = 100
MAX_LEN = 150
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {DEVICE}")

Using cuda


In [6]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mdaniele-didino[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Tokenizer

In [7]:
# Tokenizer (basic word splitting)
def basic_tokenizer(text: str) -> str:
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove special characters
    return text.split()


def build_vocab(texts):
    token_counts = Counter()
    for text in texts:
        cleaned_text = basic_tokenizer(text)
        token_counts.update(cleaned_text)
    vocab = {word: idx + 2 for idx, (word, _) in enumerate(token_counts.most_common())}  # Reserve index 0 for padding, 1 for unknown
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab


# Convert texts to numerical sequences
def encode_text(text, vocab, max_len=150):
    tokens = basic_tokenizer(text)
    encoded = [vocab.get(word, vocab["<UNK>"]) for word in tokens[:max_len]]
    return np.pad(encoded, (0, max_len - len(encoded)), constant_values=vocab["<PAD>"])[:max_len]

# Model

In [8]:
# Dataset Class
class ToxicDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len=150):
        texts = [encode_text(text, vocab, max_len) for text in texts]
        self.texts = [torch.tensor(text, dtype=torch.long) for text in texts]
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return {
            "input_ids": self.texts[idx],
            "labels": self.labels[idx]
        }


# Load Pretrained Embeddings (GloVe)
def load_glove_embeddings(filepath, vocab, embedding_dim=100):
    embeddings = np.random.uniform(-0.25, 0.25, (len(vocab), embedding_dim))
    with open(filepath, 'r', encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype="float32")
            if word in vocab:
                embeddings[vocab[word]] = vector
    return torch.tensor(embeddings, dtype=torch.float32)


# Model: CNN + GRU
class ToxicClassifier(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, num_filters, kernel_size, dropout, num_classes):
        super().__init__()
        # Embedding layers
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        # CNN layer
        self.conv = nn.Conv1d(
            in_channels=embedding_dim,
            out_channels=num_filters,
            kernel_size=kernel_size,
            padding=1)
        self.pool = nn.AdaptiveMaxPool1d(50) # This reduces the sequence length
        # GRU layer
        self.gru = nn.GRU(
            input_size=num_filters,
            hidden_size=hidden_dim,
            batch_first=True,
            bidirectional=True)
        # Fully connected layer
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, x):
        x = self.embedding(x) # (batch_size, seq_len, embedding_dim)
        x = x.permute(0, 2, 1) # change shape for conv1d (batch_size, channels, seq_len)
        x = torch.relu(self.conv(x))
        x = self.pool(x)
        x = x.permute(0, 2, 1) # change shape back for GRU (batch_size, seq_len, channels)
        x, _ = self.gru(x)
        x = self.dropout(x[:, -1, :]) # take the last time step
        return self.fc(x)


# Training function
def model_train(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    model.to(device)

    for epoch in range(epochs):
        model.train()  # set model to training mode
        total_train_loss = 0
        all_preds = []
        all_labels = []

        for batch in train_loader:
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            outputs = torch.sigmoid(outputs)
            all_preds.append(outputs.cpu().detach().numpy())
            all_labels.append(labels.cpu().numpy())
        
        # Compute Loss
        train_loss = total_train_loss / len(train_loader)

        # Compute AUC_ROC
        all_preds = np.concatenate(all_preds, axis=0).T
        all_labels = np.concatenate(all_labels, axis=0).T
        train_roc_auc = np.mean(
            [roc_auc_score(y_true, y_pred) for y_true, y_pred in zip(all_labels, all_preds)]
        )

        # Validation Step
        model.eval()
        val_preds = []
        val_labels = []
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                labels = batch['labels'].to(device)
                outputs = model(input_ids)
                
                # Compute validation loss
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()

                val_preds.append(outputs.cpu().numpy())
                val_labels.append(labels.cpu().numpy())
        
        # Compute validation loss
        val_loss = total_val_loss / len(val_loader)

        # Compute AUC_ROC
        val_preds = np.concatenate(val_preds, axis=0).T
        val_labels = np.concatenate(val_labels, axis=0).T
        val_roc_auc = np.mean(
            [roc_auc_score(y_true, y_pred) for y_true, y_pred in zip(val_labels, val_preds)]
        )

        print(f"Epoch {epoch + 1}/{epochs}")
        print(f"Train Loss: {train_loss:.4f} | AUC_ROC: {train_roc_auc:.4f}")
        print(f"Val Loss: {val_loss:.4f} | AUC_ROC: {val_roc_auc:.4f}")

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_roc_auc": train_roc_auc,
            "val_loss": val_loss,
            "val_roc_auc": val_roc_auc,
        })

    return train_loss, train_roc_auc, val_loss, val_roc_auc


# wandb Sweeps

In [9]:
sweep_config = {
    "method": "bayes", # "random" or "grid" or "bayes"
    "metric": {"name": "val_roc_auc", "goal": "maximize"},
    "parameters": {
        "embed_dim": {"values": [EMBEDDING_DIM]},
        "hidden_dim": {"values": [64, 128, 256]},
        "kernel_size": {"min": 2, "max": 5},
        "num_filters": {"values": [64, 128, 256]},
        "dropout": {"min": 0.2, "max": 0.5},
        "learning_rate": {"min": 1e-4, "max": 1e-2, "distribution": "log_uniform_values"},
        "batch_size": {"values": [16, 32, 64]},
        "epochs": {"min": 1, "max": 5},
    }
}

# Create the sweep
sweep_id = wandb.sweep(sweep_config, project="toxic_comment_clf")

Create sweep with ID: wdc1dzb1
Sweep URL: https://wandb.ai/daniele-didino/toxic_comment_clf/sweeps/wdc1dzb1


In [10]:
train_input = train_data.comment_text.to_list()
train_labels = train_data.loc[:, ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values.tolist()

val_input = val_data.comment_text.to_list()
val_labels = val_data.loc[:,  ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values.tolist()

vocab = build_vocab(train_input)

embedding_matrix = load_glove_embeddings("../embedding/glove.6B.100d.txt", vocab, EMBEDDING_DIM)

# Prepare train dataset
train_dataset = ToxicDataset(train_input, train_labels, vocab, MAX_LEN)

# Prepare validation dataset
val_dataset = ToxicDataset(val_input, val_labels, vocab, MAX_LEN)

In [11]:
# Define the training function
def train_sweep():

    # EPOCHS = 2
    num_classes = 6  # toxic, severe_toxic, obscene, threat, insult, identity_hate

    with wandb.init() as run:
        config = wandb.config # sample hyperparameters
        
        # Initialize DataLoaders
        train_dataloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
        val_dataloader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=True)
    
        # Initialize model
        model = ToxicClassifier(
            config["embed_dim"],
            config["hidden_dim"],
            config["num_filters"],
            config["kernel_size"],
            config["dropout"],
            num_classes=num_classes)
        model.to(DEVICE)
    
        # Loss
        criterion = nn.BCEWithLogitsLoss() # Multi-label loss
    
        # Optimizer
        optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
    
        # Training loop
        # _ = model_train(model, train_dataloader, val_dataloader, criterion, optimizer, EPOCHS, DEVICE)
        _ = model_train(model, train_dataloader, val_dataloader, criterion, optimizer, config["epochs"], DEVICE)

In [13]:
# Launch the sweep
wandb.agent(sweep_id, function=train_sweep, count=20)

[34m[1mwandb[0m: Agent Starting Run: iu4erau5 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.2215374519536568
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.00595487859017714
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/2
Train Loss: 0.0891 | AUC_ROC: 0.8632
Val Loss: 0.0732 | AUC_ROC: 0.9536
Epoch 2/2
Train Loss: 0.1105 | AUC_ROC: 0.7920
Val Loss: 0.0826 | AUC_ROC: 0.9335


0,1
epoch,▁█
train_loss,▁█
train_roc_auc,█▁
val_loss,▁█
val_roc_auc,█▁

0,1
epoch,2.0
train_loss,0.11054
train_roc_auc,0.79202
val_loss,0.08263
val_roc_auc,0.93354


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ezchz1wd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4445974334927478
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0008039270737862104
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/4
Train Loss: 0.0652 | AUC_ROC: 0.9484
Val Loss: 0.0524 | AUC_ROC: 0.9747
Epoch 2/4
Train Loss: 0.0441 | AUC_ROC: 0.9773
Val Loss: 0.0481 | AUC_ROC: 0.9784
Epoch 3/4
Train Loss: 0.0356 | AUC_ROC: 0.9855
Val Loss: 0.0506 | AUC_ROC: 0.9781
Epoch 4/4
Train Loss: 0.0294 | AUC_ROC: 0.9903
Val Loss: 0.0555 | AUC_ROC: 0.9777


0,1
epoch,▁▃▆█
train_loss,█▄▂▁
train_roc_auc,▁▆▇█
val_loss,▅▁▃█
val_roc_auc,▁█▇▇

0,1
epoch,4.0
train_loss,0.02937
train_roc_auc,0.99031
val_loss,0.05555
val_roc_auc,0.97765


[34m[1mwandb[0m: Agent Starting Run: w27vf93u with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.44988501516771445
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0006247627042943908
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.0742 | AUC_ROC: 0.9341
Val Loss: 0.0521 | AUC_ROC: 0.9754
Epoch 2/5
Train Loss: 0.0460 | AUC_ROC: 0.9750
Val Loss: 0.0509 | AUC_ROC: 0.9770
Epoch 3/5
Train Loss: 0.0387 | AUC_ROC: 0.9811
Val Loss: 0.0498 | AUC_ROC: 0.9777
Epoch 4/5
Train Loss: 0.0326 | AUC_ROC: 0.9867
Val Loss: 0.0512 | AUC_ROC: 0.9782
Epoch 5/5
Train Loss: 0.0275 | AUC_ROC: 0.9905
Val Loss: 0.0563 | AUC_ROC: 0.9770


0,1
epoch,▁▃▅▆█
train_loss,█▄▃▂▁
train_roc_auc,▁▆▇██
val_loss,▃▂▁▃█
val_roc_auc,▁▅▇█▅

0,1
epoch,5.0
train_loss,0.02753
train_roc_auc,0.99053
val_loss,0.05628
val_roc_auc,0.97702


[34m[1mwandb[0m: Agent Starting Run: sb8wqitz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4854884152096631
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.00023747992796155145
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/4
Train Loss: 0.0876 | AUC_ROC: 0.8961
Val Loss: 0.0537 | AUC_ROC: 0.9742
Epoch 2/4
Train Loss: 0.0511 | AUC_ROC: 0.9676
Val Loss: 0.0505 | AUC_ROC: 0.9765
Epoch 3/4
Train Loss: 0.0452 | AUC_ROC: 0.9746
Val Loss: 0.0504 | AUC_ROC: 0.9773
Epoch 4/4
Train Loss: 0.0403 | AUC_ROC: 0.9792
Val Loss: 0.0523 | AUC_ROC: 0.9772


0,1
epoch,▁▃▆█
train_loss,█▃▂▁
train_roc_auc,▁▇██
val_loss,█▁▁▅
val_roc_auc,▁▆██

0,1
epoch,4.0
train_loss,0.04032
train_roc_auc,0.9792
val_loss,0.05228
val_roc_auc,0.97719


[34m[1mwandb[0m: Agent Starting Run: rvy7a08h with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.3994719481954938
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00024626058064443747
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0831 | AUC_ROC: 0.9107
Val Loss: 0.0546 | AUC_ROC: 0.9735
Epoch 2/5
Train Loss: 0.0503 | AUC_ROC: 0.9684
Val Loss: 0.0514 | AUC_ROC: 0.9755
Epoch 3/5
Train Loss: 0.0441 | AUC_ROC: 0.9768
Val Loss: 0.0531 | AUC_ROC: 0.9772
Epoch 4/5
Train Loss: 0.0390 | AUC_ROC: 0.9808
Val Loss: 0.0511 | AUC_ROC: 0.9776
Epoch 5/5
Train Loss: 0.0341 | AUC_ROC: 0.9853
Val Loss: 0.0537 | AUC_ROC: 0.9777


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
train_roc_auc,▁▆▇██
val_loss,█▁▅▁▆
val_roc_auc,▁▄▇██

0,1
epoch,5.0
train_loss,0.03407
train_roc_auc,0.98532
val_loss,0.05372
val_roc_auc,0.97772


[34m[1mwandb[0m: Agent Starting Run: wfdw51sg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4499568223919632
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.00046893453624174
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0857 | AUC_ROC: 0.9007
Val Loss: 0.0527 | AUC_ROC: 0.9751
Epoch 2/5
Train Loss: 0.0491 | AUC_ROC: 0.9702
Val Loss: 0.0494 | AUC_ROC: 0.9772
Epoch 3/5
Train Loss: 0.0422 | AUC_ROC: 0.9786
Val Loss: 0.0500 | AUC_ROC: 0.9773
Epoch 4/5
Train Loss: 0.0362 | AUC_ROC: 0.9832
Val Loss: 0.0528 | AUC_ROC: 0.9780
Epoch 5/5
Train Loss: 0.0316 | AUC_ROC: 0.9866
Val Loss: 0.0573 | AUC_ROC: 0.9774


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
train_roc_auc,▁▇▇██
val_loss,▄▁▁▄█
val_roc_auc,▁▆▆█▇

0,1
epoch,5.0
train_loss,0.03163
train_roc_auc,0.98661
val_loss,0.05731
val_roc_auc,0.97743


[34m[1mwandb[0m: Agent Starting Run: dd58aih1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4493794111087579
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.0001054477709649774
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.1196 | AUC_ROC: 0.8102
Val Loss: 0.0649 | AUC_ROC: 0.9651
Epoch 2/5
Train Loss: 0.0581 | AUC_ROC: 0.9604
Val Loss: 0.0552 | AUC_ROC: 0.9723
Epoch 3/5
Train Loss: 0.0516 | AUC_ROC: 0.9691
Val Loss: 0.0556 | AUC_ROC: 0.9740
Epoch 4/5
Train Loss: 0.0478 | AUC_ROC: 0.9739
Val Loss: 0.0516 | AUC_ROC: 0.9759
Epoch 5/5
Train Loss: 0.0443 | AUC_ROC: 0.9774
Val Loss: 0.0515 | AUC_ROC: 0.9766


0,1
epoch,▁▃▅▆█
train_loss,█▂▂▁▁
train_roc_auc,▁▇███
val_loss,█▃▃▁▁
val_roc_auc,▁▅▆██

0,1
epoch,5.0
train_loss,0.04433
train_roc_auc,0.97739
val_loss,0.05146
val_roc_auc,0.97663


[34m[1mwandb[0m: Agent Starting Run: n8fy7xy0 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.43062370912890424
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.0006548267874973512
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0812 | AUC_ROC: 0.9138
Val Loss: 0.0529 | AUC_ROC: 0.9749
Epoch 2/5
Train Loss: 0.0482 | AUC_ROC: 0.9721
Val Loss: 0.0497 | AUC_ROC: 0.9772
Epoch 3/5
Train Loss: 0.0402 | AUC_ROC: 0.9798
Val Loss: 0.0518 | AUC_ROC: 0.9758
Epoch 4/5
Train Loss: 0.0341 | AUC_ROC: 0.9844
Val Loss: 0.0538 | AUC_ROC: 0.9755
Epoch 5/5
Train Loss: 0.0296 | AUC_ROC: 0.9879
Val Loss: 0.0576 | AUC_ROC: 0.9747


0,1
epoch,▁▃▅▆█
train_loss,█▄▂▂▁
train_roc_auc,▁▇▇██
val_loss,▄▁▃▅█
val_roc_auc,▂█▄▃▁

0,1
epoch,5.0
train_loss,0.02961
train_roc_auc,0.98788
val_loss,0.05764
val_roc_auc,0.97471


[34m[1mwandb[0m: Agent Starting Run: jougogde with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.46321514172843686
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 2
[34m[1mwandb[0m: 	learning_rate: 0.00011371364027140131
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.1569 | AUC_ROC: 0.6674
Val Loss: 0.0722 | AUC_ROC: 0.9589
Epoch 2/5
Train Loss: 0.0636 | AUC_ROC: 0.9508
Val Loss: 0.0579 | AUC_ROC: 0.9701
Epoch 3/5
Train Loss: 0.0550 | AUC_ROC: 0.9629
Val Loss: 0.0537 | AUC_ROC: 0.9727
Epoch 4/5
Train Loss: 0.0509 | AUC_ROC: 0.9694
Val Loss: 0.0520 | AUC_ROC: 0.9743
Epoch 5/5
Train Loss: 0.0482 | AUC_ROC: 0.9725
Val Loss: 0.0515 | AUC_ROC: 0.9753


0,1
epoch,▁▃▅▆█
train_loss,█▂▁▁▁
train_roc_auc,▁████
val_loss,█▃▂▁▁
val_roc_auc,▁▆▇██

0,1
epoch,5.0
train_loss,0.04819
train_roc_auc,0.97252
val_loss,0.05148
val_roc_auc,0.97531


[34m[1mwandb[0m: Agent Starting Run: 5diwf70v with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4953053987426373
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.00010563177809877102
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0981 | AUC_ROC: 0.8758
Val Loss: 0.0585 | AUC_ROC: 0.9694
Epoch 2/5
Train Loss: 0.0545 | AUC_ROC: 0.9660
Val Loss: 0.0591 | AUC_ROC: 0.9731
Epoch 3/5
Train Loss: 0.0491 | AUC_ROC: 0.9717
Val Loss: 0.0511 | AUC_ROC: 0.9755
Epoch 4/5
Train Loss: 0.0454 | AUC_ROC: 0.9750
Val Loss: 0.0505 | AUC_ROC: 0.9757
Epoch 5/5
Train Loss: 0.0422 | AUC_ROC: 0.9784
Val Loss: 0.0505 | AUC_ROC: 0.9765


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▁▁
train_roc_auc,▁▇███
val_loss,▇█▁▁▁
val_roc_auc,▁▅▇▇█

0,1
epoch,5.0
train_loss,0.04218
train_roc_auc,0.97837
val_loss,0.05052
val_roc_auc,0.97652


[34m[1mwandb[0m: Agent Starting Run: chuxdggh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.45684281479646466
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.00013266295892079753
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.1006 | AUC_ROC: 0.8625
Val Loss: 0.0571 | AUC_ROC: 0.9714
Epoch 2/5
Train Loss: 0.0545 | AUC_ROC: 0.9646
Val Loss: 0.0518 | AUC_ROC: 0.9755
Epoch 3/5
Train Loss: 0.0483 | AUC_ROC: 0.9718
Val Loss: 0.0508 | AUC_ROC: 0.9763
Epoch 4/5
Train Loss: 0.0442 | AUC_ROC: 0.9756
Val Loss: 0.0499 | AUC_ROC: 0.9775
Epoch 5/5
Train Loss: 0.0403 | AUC_ROC: 0.9799
Val Loss: 0.0516 | AUC_ROC: 0.9775


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▁▁
train_roc_auc,▁▇███
val_loss,█▃▂▁▃
val_roc_auc,▁▆▇██

0,1
epoch,5.0
train_loss,0.04027
train_roc_auc,0.97986
val_loss,0.05159
val_roc_auc,0.97753


[34m[1mwandb[0m: Agent Starting Run: 0dahmpu8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4893033397512304
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.000183107723150314
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/5
Train Loss: 0.0907 | AUC_ROC: 0.8999
Val Loss: 0.0556 | AUC_ROC: 0.9710
Epoch 2/5
Train Loss: 0.0525 | AUC_ROC: 0.9678
Val Loss: 0.0533 | AUC_ROC: 0.9745
Epoch 3/5
Train Loss: 0.0471 | AUC_ROC: 0.9741
Val Loss: 0.0499 | AUC_ROC: 0.9764
Epoch 4/5
Train Loss: 0.0431 | AUC_ROC: 0.9775
Val Loss: 0.0497 | AUC_ROC: 0.9769
Epoch 5/5
Train Loss: 0.0395 | AUC_ROC: 0.9811
Val Loss: 0.0509 | AUC_ROC: 0.9772


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
train_roc_auc,▁▇▇██
val_loss,█▅▁▁▂
val_roc_auc,▁▅▇██

0,1
epoch,5.0
train_loss,0.03947
train_roc_auc,0.98105
val_loss,0.05088
val_roc_auc,0.97718


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gqq952fq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4454069117837284
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00011899748978242172
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.1118 | AUC_ROC: 0.8352
Val Loss: 0.0646 | AUC_ROC: 0.9667
Epoch 2/5
Train Loss: 0.0576 | AUC_ROC: 0.9593
Val Loss: 0.0556 | AUC_ROC: 0.9730
Epoch 3/5
Train Loss: 0.0511 | AUC_ROC: 0.9679
Val Loss: 0.0518 | AUC_ROC: 0.9756
Epoch 4/5
Train Loss: 0.0467 | AUC_ROC: 0.9731
Val Loss: 0.0539 | AUC_ROC: 0.9760
Epoch 5/5
Train Loss: 0.0436 | AUC_ROC: 0.9765
Val Loss: 0.0506 | AUC_ROC: 0.9768


0,1
epoch,▁▃▅▆█
train_loss,█▂▂▁▁
train_roc_auc,▁▇███
val_loss,█▃▂▃▁
val_roc_auc,▁▅▇▇█

0,1
epoch,5.0
train_loss,0.04359
train_roc_auc,0.97652
val_loss,0.0506
val_roc_auc,0.97679


[34m[1mwandb[0m: Agent Starting Run: s4jstzrg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4998669567693553
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 2
[34m[1mwandb[0m: 	learning_rate: 0.003300747864702935
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0722 | AUC_ROC: 0.9315
Val Loss: 0.0524 | AUC_ROC: 0.9737
Epoch 2/5
Train Loss: 0.0463 | AUC_ROC: 0.9734
Val Loss: 0.0538 | AUC_ROC: 0.9736
Epoch 3/5
Train Loss: 0.0386 | AUC_ROC: 0.9810
Val Loss: 0.0529 | AUC_ROC: 0.9748
Epoch 4/5
Train Loss: 0.0343 | AUC_ROC: 0.9847
Val Loss: 0.0579 | AUC_ROC: 0.9744
Epoch 5/5
Train Loss: 0.0322 | AUC_ROC: 0.9873
Val Loss: 0.0620 | AUC_ROC: 0.9729


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▁▁
train_roc_auc,▁▆▇██
val_loss,▁▂▁▅█
val_roc_auc,▄▄█▇▁

0,1
epoch,5.0
train_loss,0.03224
train_roc_auc,0.9873
val_loss,0.06203
val_roc_auc,0.97285


[34m[1mwandb[0m: Agent Starting Run: iorl0q6m with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.4985148868742243
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0003819888583547725
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.0707 | AUC_ROC: 0.9369
Val Loss: 0.0517 | AUC_ROC: 0.9760
Epoch 2/5
Train Loss: 0.0475 | AUC_ROC: 0.9727
Val Loss: 0.0492 | AUC_ROC: 0.9783
Epoch 3/5
Train Loss: 0.0402 | AUC_ROC: 0.9806
Val Loss: 0.0482 | AUC_ROC: 0.9795
Epoch 4/5
Train Loss: 0.0345 | AUC_ROC: 0.9861
Val Loss: 0.0509 | AUC_ROC: 0.9796
Epoch 5/5
Train Loss: 0.0298 | AUC_ROC: 0.9897
Val Loss: 0.0552 | AUC_ROC: 0.9781


0,1
epoch,▁▃▅▆█
train_loss,█▄▃▂▁
train_roc_auc,▁▆▇██
val_loss,▅▂▁▄█
val_roc_auc,▁▅██▅

0,1
epoch,5.0
train_loss,0.02977
train_roc_auc,0.98966
val_loss,0.05515
val_roc_auc,0.97812


[34m[1mwandb[0m: Agent Starting Run: q5m3pife with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4553126049789043
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001733243731332818
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.1161 | AUC_ROC: 0.8153
Val Loss: 0.0629 | AUC_ROC: 0.9685
Epoch 2/5
Train Loss: 0.0567 | AUC_ROC: 0.9609
Val Loss: 0.0550 | AUC_ROC: 0.9740
Epoch 3/5
Train Loss: 0.0496 | AUC_ROC: 0.9697
Val Loss: 0.0508 | AUC_ROC: 0.9761
Epoch 4/5
Train Loss: 0.0451 | AUC_ROC: 0.9748
Val Loss: 0.0518 | AUC_ROC: 0.9765
Epoch 5/5
Train Loss: 0.0411 | AUC_ROC: 0.9776
Val Loss: 0.0514 | AUC_ROC: 0.9766


0,1
epoch,▁▃▅▆█
train_loss,█▂▂▁▁
train_roc_auc,▁▇███
val_loss,█▃▁▂▁
val_roc_auc,▁▆███

0,1
epoch,5.0
train_loss,0.04106
train_roc_auc,0.97757
val_loss,0.05141
val_roc_auc,0.97661


[34m[1mwandb[0m: Agent Starting Run: 02isoht1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4907446795302052
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00010387660327337795
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/4
Train Loss: 0.1352 | AUC_ROC: 0.7321
Val Loss: 0.0698 | AUC_ROC: 0.9628
Epoch 2/4
Train Loss: 0.0632 | AUC_ROC: 0.9504
Val Loss: 0.0577 | AUC_ROC: 0.9709
Epoch 3/4
Train Loss: 0.0540 | AUC_ROC: 0.9625
Val Loss: 0.0534 | AUC_ROC: 0.9738
Epoch 4/4
Train Loss: 0.0492 | AUC_ROC: 0.9696
Val Loss: 0.0520 | AUC_ROC: 0.9754


0,1
epoch,▁▃▆█
train_loss,█▂▁▁
train_roc_auc,▁▇██
val_loss,█▃▂▁
val_roc_auc,▁▆▇█

0,1
epoch,4.0
train_loss,0.04923
train_roc_auc,0.96961
val_loss,0.05203
val_roc_auc,0.97536


[34m[1mwandb[0m: Agent Starting Run: 14gyywxl with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4717882142092425
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001728585579652961
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.0983 | AUC_ROC: 0.8718
Val Loss: 0.0563 | AUC_ROC: 0.9712
Epoch 2/5
Train Loss: 0.0536 | AUC_ROC: 0.9646
Val Loss: 0.0515 | AUC_ROC: 0.9750
Epoch 3/5
Train Loss: 0.0478 | AUC_ROC: 0.9733
Val Loss: 0.0523 | AUC_ROC: 0.9761
Epoch 4/5
Train Loss: 0.0434 | AUC_ROC: 0.9765
Val Loss: 0.0550 | AUC_ROC: 0.9764
Epoch 5/5
Train Loss: 0.0397 | AUC_ROC: 0.9794
Val Loss: 0.0504 | AUC_ROC: 0.9770


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▁▁
train_roc_auc,▁▇███
val_loss,█▂▃▆▁
val_roc_auc,▁▆▇▇█

0,1
epoch,5.0
train_loss,0.03969
train_roc_auc,0.9794
val_loss,0.05038
val_roc_auc,0.97703


[34m[1mwandb[0m: Agent Starting Run: eedcjxex with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.3845750159238698
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 2
[34m[1mwandb[0m: 	learning_rate: 0.00013790983115644
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.1152 | AUC_ROC: 0.8206
Val Loss: 0.0597 | AUC_ROC: 0.9680
Epoch 2/5
Train Loss: 0.0562 | AUC_ROC: 0.9634
Val Loss: 0.0532 | AUC_ROC: 0.9739
Epoch 3/5
Train Loss: 0.0502 | AUC_ROC: 0.9709
Val Loss: 0.0507 | AUC_ROC: 0.9758
Epoch 4/5
Train Loss: 0.0466 | AUC_ROC: 0.9740
Val Loss: 0.0525 | AUC_ROC: 0.9759
Epoch 5/5
Train Loss: 0.0438 | AUC_ROC: 0.9772
Val Loss: 0.0502 | AUC_ROC: 0.9767


0,1
epoch,▁▃▅▆█
train_loss,█▂▂▁▁
train_roc_auc,▁▇███
val_loss,█▃▁▃▁
val_roc_auc,▁▆▇▇█

0,1
epoch,5.0
train_loss,0.0438
train_roc_auc,0.97718
val_loss,0.05021
val_roc_auc,0.97671


[34m[1mwandb[0m: Agent Starting Run: nolwbj0r with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4369570767153954
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001560681307012586
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.0961 | AUC_ROC: 0.8824
Val Loss: 0.0567 | AUC_ROC: 0.9714
Epoch 2/5
Train Loss: 0.0544 | AUC_ROC: 0.9655
Val Loss: 0.0520 | AUC_ROC: 0.9750
Epoch 3/5
Train Loss: 0.0483 | AUC_ROC: 0.9717
Val Loss: 0.0504 | AUC_ROC: 0.9762
Epoch 4/5
Train Loss: 0.0442 | AUC_ROC: 0.9760
Val Loss: 0.0521 | AUC_ROC: 0.9765
Epoch 5/5
Train Loss: 0.0405 | AUC_ROC: 0.9794
Val Loss: 0.0506 | AUC_ROC: 0.9768


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▁▁
train_roc_auc,▁▇▇██
val_loss,█▃▁▃▁
val_roc_auc,▁▆▇██

0,1
epoch,5.0
train_loss,0.04046
train_roc_auc,0.97943
val_loss,0.05063
val_roc_auc,0.9768
