# Load libraries and data

In [2]:
from pathlib import Path
import re
import numpy as np
import pandas as pd
from collections import Counter
import torch
import torch.nn as nn
import torch.optim as optim
# import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import roc_auc_score
import wandb

In [3]:
train_data = pd.read_csv(Path("..", "data", "processed", "train.csv"))
val_data = pd.read_csv(Path("..", "data", "processed", "val.csv"))

# Parameters & wandb

In [4]:
EMBEDDING_DIM = 100
MAX_LEN = 150
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {DEVICE}")

Using cuda


In [5]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mdaniele-didino[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Tokenizer

In [6]:
# Tokenizer (basic word splitting)
def basic_tokenizer(text: str) -> str:
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove special characters
    return text.split()


def build_vocab(texts):
    token_counts = Counter()
    for text in texts:
        cleaned_text = basic_tokenizer(text)
        token_counts.update(cleaned_text)
    vocab = {word: idx + 2 for idx, (word, _) in enumerate(token_counts.most_common())}  # Reserve index 0 for padding, 1 for unknown
    vocab["<PAD>"] = 0
    vocab["<UNK>"] = 1
    return vocab


# Convert texts to numerical sequences
def encode_text(text, vocab, max_len=150):
    tokens = basic_tokenizer(text)
    encoded = [vocab.get(word, vocab["<UNK>"]) for word in tokens[:max_len]]
    return np.pad(encoded, (0, max_len - len(encoded)), constant_values=vocab["<PAD>"])[:max_len]

# Model

In [7]:
# Dataset Class
class ToxicDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len=150):
        texts = [encode_text(text, vocab, max_len) for text in texts]
        self.texts = [torch.tensor(text, dtype=torch.long) for text in texts]
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return {
            "input_ids": self.texts[idx],
            "labels": self.labels[idx]
        }


# Load Pretrained Embeddings (GloVe)
def load_glove_embeddings(filepath, vocab, embedding_dim=100):
    embeddings = np.random.uniform(-0.25, 0.25, (len(vocab), embedding_dim))
    with open(filepath, 'r', encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype="float32")
            if word in vocab:
                embeddings[vocab[word]] = vector
    return torch.tensor(embeddings, dtype=torch.float32)


# Model: CNN + GRU
class ToxicClassifier(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, num_filters, kernel_size, dropout, num_classes):
        super().__init__()
        # Embedding layers
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        # CNN layer
        self.conv = nn.Conv1d(
            in_channels=embedding_dim,
            out_channels=num_filters,
            kernel_size=kernel_size,
            padding=1)
        self.pool = nn.AdaptiveMaxPool1d(50) # This reduces the sequence length
        # GRU layer
        self.gru = nn.GRU(
            input_size=num_filters,
            hidden_size=hidden_dim,
            batch_first=True,
            bidirectional=True)
        # Fully connected layer
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, x):
        x = self.embedding(x) # (batch_size, seq_len, embedding_dim)
        x = x.permute(0, 2, 1) # change shape for conv1d (batch_size, channels, seq_len)
        x = torch.relu(self.conv(x))
        x = self.pool(x)
        x = x.permute(0, 2, 1) # change shape back for GRU (batch_size, seq_len, channels)
        x, _ = self.gru(x)
        x = self.dropout(x[:, -1, :]) # take the last time step
        return self.fc(x)


# Training function
def model_train(model, train_loader, val_loader, criterion, optimizer, epochs, device):
    model.to(device)

    for epoch in range(epochs):
        model.train()  # set model to training mode
        total_train_loss = 0
        all_preds = []
        all_labels = []

        for batch in train_loader:
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            outputs = torch.sigmoid(outputs)
            all_preds.append(outputs.cpu().detach().numpy())
            all_labels.append(labels.cpu().numpy())
        
        # Compute Loss
        train_loss = total_train_loss / len(train_loader)

        # Compute AUC_ROC
        all_preds = np.concatenate(all_preds, axis=0).T
        all_labels = np.concatenate(all_labels, axis=0).T
        train_roc_auc = np.mean(
            [roc_auc_score(y_true, y_pred) for y_true, y_pred in zip(all_labels, all_preds)]
        )

        # Validation Step
        model.eval()
        val_preds = []
        val_labels = []
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                labels = batch['labels'].to(device)
                outputs = model(input_ids)
                
                # Compute validation loss
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()

                val_preds.append(outputs.cpu().numpy())
                val_labels.append(labels.cpu().numpy())
        
        # Compute validation loss
        val_loss = total_val_loss / len(val_loader)

        # Compute AUC_ROC
        val_preds = np.concatenate(val_preds, axis=0).T
        val_labels = np.concatenate(val_labels, axis=0).T
        val_roc_auc = np.mean(
            [roc_auc_score(y_true, y_pred) for y_true, y_pred in zip(val_labels, val_preds)]
        )

        print(f"Epoch {epoch + 1}/{epochs}")
        print(f"Train Loss: {train_loss:.4f} | AUC_ROC: {train_roc_auc:.4f}")
        print(f"Val Loss: {val_loss:.4f} | AUC_ROC: {val_roc_auc:.4f}")

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_roc_auc": train_roc_auc,
            "val_loss": val_loss,
            "val_roc_auc": val_roc_auc,
        })

    return train_loss, train_roc_auc, val_loss, val_roc_auc


# wandb Sweeps

In [8]:
sweep_config = {
    "method": "bayes", # "random" or "grid" or "bayes"
    "metric": {"name": "val_roc_auc", "goal": "maximize"},
    "parameters": {
        "embed_dim": {"values": [EMBEDDING_DIM]},
        "hidden_dim": {"values": [64, 128, 256]},
        "kernel_size": {"min": 2, "max": 5},
        "num_filters": {"values": [64, 128, 256]},
        "dropout": {"min": 0.2, "max": 0.5},
        "learning_rate": {"min": 1e-4, "max": 1e-2, "distribution": "log_uniform_values"},
        "batch_size": {"values": [16, 32, 64]},
        "epochs": {"min": 1, "max": 5},
    }
}

# Create the sweep
sweep_id = wandb.sweep(sweep_config, project="toxic_comment_clf")

Create sweep with ID: k88nhahc
Sweep URL: https://wandb.ai/daniele-didino/toxic_comment_clf/sweeps/k88nhahc


In [9]:
train_input = train_data.comment_text.to_list()
train_labels = train_data.loc[:, ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values.tolist()

val_input = val_data.comment_text.to_list()
val_labels = val_data.loc[:,  ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values.tolist()

vocab = build_vocab(train_input)

embedding_matrix = load_glove_embeddings("../embedding/glove.6B.100d.txt", vocab, EMBEDDING_DIM)

# Prepare train dataset
train_dataset = ToxicDataset(train_input, train_labels, vocab, MAX_LEN)

# Prepare validation dataset
val_dataset = ToxicDataset(val_input, val_labels, vocab, MAX_LEN)

In [10]:
# Define the training function
def train_sweep():

    # EPOCHS = 2
    num_classes = 6  # toxic, severe_toxic, obscene, threat, insult, identity_hate

    with wandb.init() as run:
        config = wandb.config # sample hyperparameters
        
        # Initialize DataLoaders
        train_dataloader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
        val_dataloader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=True)
    
        # Initialize model
        model = ToxicClassifier(
            config["embed_dim"],
            config["hidden_dim"],
            config["num_filters"],
            config["kernel_size"],
            config["dropout"],
            num_classes=num_classes)
        model.to(DEVICE)
    
        # Loss
        criterion = nn.BCEWithLogitsLoss() # Multi-label loss
    
        # Optimizer
        optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])
    
        # Training loop
        # _ = model_train(model, train_dataloader, val_dataloader, criterion, optimizer, EPOCHS, DEVICE)
        _ = model_train(model, train_dataloader, val_dataloader, criterion, optimizer, config["epochs"], DEVICE)

In [11]:
# Launch the sweep
wandb.agent(sweep_id, function=train_sweep, count=20)

[34m[1mwandb[0m: Agent Starting Run: mknohfsz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.23164020317945097
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 2
[34m[1mwandb[0m: 	learning_rate: 0.007164277430900863
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/3
Train Loss: 0.1306 | AUC_ROC: 0.7147
Val Loss: 0.1348 | AUC_ROC: 0.7208
Epoch 2/3
Train Loss: 0.1318 | AUC_ROC: 0.7151
Val Loss: 0.1168 | AUC_ROC: 0.8338
Epoch 3/3
Train Loss: 0.1112 | AUC_ROC: 0.8044
Val Loss: 0.0865 | AUC_ROC: 0.9295


0,1
epoch,▁▅█
train_loss,██▁
train_roc_auc,▁▁█
val_loss,█▅▁
val_roc_auc,▁▅█

0,1
epoch,3.0
train_loss,0.11122
train_roc_auc,0.80438
val_loss,0.08652
val_roc_auc,0.92951


[34m[1mwandb[0m: Agent Starting Run: uoxn7p9z with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4496706039753011
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	kernel_size: 2
[34m[1mwandb[0m: 	learning_rate: 0.0005190657832714231
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/1
Train Loss: 0.0886 | AUC_ROC: 0.8953
Val Loss: 0.0536 | AUC_ROC: 0.9732


0,1
epoch,▁
train_loss,▁
train_roc_auc,▁
val_loss,▁
val_roc_auc,▁

0,1
epoch,1.0
train_loss,0.08855
train_roc_auc,0.8953
val_loss,0.05356
val_roc_auc,0.97315


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 930o4xk5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4355158369287524
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.000919648441533851
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/4
Train Loss: 0.0660 | AUC_ROC: 0.9485
Val Loss: 0.0494 | AUC_ROC: 0.9774
Epoch 2/4
Train Loss: 0.0437 | AUC_ROC: 0.9794
Val Loss: 0.0489 | AUC_ROC: 0.9797
Epoch 3/4
Train Loss: 0.0346 | AUC_ROC: 0.9876
Val Loss: 0.0499 | AUC_ROC: 0.9799
Epoch 4/4
Train Loss: 0.0278 | AUC_ROC: 0.9926
Val Loss: 0.0577 | AUC_ROC: 0.9800


0,1
epoch,▁▃▆█
train_loss,█▄▂▁
train_roc_auc,▁▆▇█
val_loss,▁▁▂█
val_roc_auc,▁▇██

0,1
epoch,4.0
train_loss,0.02776
train_roc_auc,0.99263
val_loss,0.05766
val_roc_auc,0.98004


[34m[1mwandb[0m: Agent Starting Run: wa9camog with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4264666154461322
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.0004069590390382843
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/1
Train Loss: 0.0794 | AUC_ROC: 0.9215
Val Loss: 0.0527 | AUC_ROC: 0.9741


0,1
epoch,▁
train_loss,▁
train_roc_auc,▁
val_loss,▁
val_roc_auc,▁

0,1
epoch,1.0
train_loss,0.07945
train_roc_auc,0.92148
val_loss,0.05273
val_roc_auc,0.97411


[34m[1mwandb[0m: Agent Starting Run: 3wpm4pwr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4643962596563937
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.001434926084664491
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/2
Train Loss: 0.0642 | AUC_ROC: 0.9506
Val Loss: 0.0502 | AUC_ROC: 0.9766
Epoch 2/2
Train Loss: 0.0423 | AUC_ROC: 0.9802
Val Loss: 0.0496 | AUC_ROC: 0.9806


0,1
epoch,▁█
train_loss,█▁
train_roc_auc,▁█
val_loss,█▁
val_roc_auc,▁█

0,1
epoch,2.0
train_loss,0.04233
train_roc_auc,0.98016
val_loss,0.04955
val_roc_auc,0.98064


[34m[1mwandb[0m: Agent Starting Run: rm9bbbqe with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.403850996483261
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002543928193473569
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/5
Train Loss: 0.0874 | AUC_ROC: 0.9126
Val Loss: 0.0588 | AUC_ROC: 0.9700
Epoch 2/5
Train Loss: 0.0509 | AUC_ROC: 0.9707
Val Loss: 0.0521 | AUC_ROC: 0.9745
Epoch 3/5
Train Loss: 0.0451 | AUC_ROC: 0.9767
Val Loss: 0.0498 | AUC_ROC: 0.9766
Epoch 4/5
Train Loss: 0.0407 | AUC_ROC: 0.9805
Val Loss: 0.0492 | AUC_ROC: 0.9777
Epoch 5/5
Train Loss: 0.0366 | AUC_ROC: 0.9844
Val Loss: 0.0519 | AUC_ROC: 0.9776


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
train_roc_auc,▁▇▇██
val_loss,█▃▁▁▃
val_roc_auc,▁▅▇██

0,1
epoch,5.0
train_loss,0.03665
train_roc_auc,0.9844
val_loss,0.0519
val_roc_auc,0.97756


[34m[1mwandb[0m: Agent Starting Run: 6052ymt3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4934900000793439
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00038909462473586177
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.0762 | AUC_ROC: 0.9327
Val Loss: 0.0547 | AUC_ROC: 0.9728
Epoch 2/5
Train Loss: 0.0479 | AUC_ROC: 0.9738
Val Loss: 0.0496 | AUC_ROC: 0.9766
Epoch 3/5
Train Loss: 0.0415 | AUC_ROC: 0.9801
Val Loss: 0.0516 | AUC_ROC: 0.9781
Epoch 4/5
Train Loss: 0.0357 | AUC_ROC: 0.9856
Val Loss: 0.0502 | AUC_ROC: 0.9787
Epoch 5/5
Train Loss: 0.0310 | AUC_ROC: 0.9886
Val Loss: 0.0554 | AUC_ROC: 0.9784


0,1
epoch,▁▃▅▆█
train_loss,█▄▃▂▁
train_roc_auc,▁▆▇██
val_loss,▇▁▃▂█
val_roc_auc,▁▆▇██

0,1
epoch,5.0
train_loss,0.03102
train_roc_auc,0.98859
val_loss,0.05539
val_roc_auc,0.9784


[34m[1mwandb[0m: Agent Starting Run: nmpxagoc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4686180140554112
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 4
[34m[1mwandb[0m: 	learning_rate: 0.00011550674802716038
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/4
Train Loss: 0.1125 | AUC_ROC: 0.8475
Val Loss: 0.0641 | AUC_ROC: 0.9620
Epoch 2/4
Train Loss: 0.0584 | AUC_ROC: 0.9603
Val Loss: 0.0565 | AUC_ROC: 0.9697
Epoch 3/4
Train Loss: 0.0522 | AUC_ROC: 0.9692
Val Loss: 0.0538 | AUC_ROC: 0.9725
Epoch 4/4
Train Loss: 0.0486 | AUC_ROC: 0.9732
Val Loss: 0.0521 | AUC_ROC: 0.9739


0,1
epoch,▁▃▆█
train_loss,█▂▁▁
train_roc_auc,▁▇██
val_loss,█▄▂▁
val_roc_auc,▁▆▇█

0,1
epoch,4.0
train_loss,0.04856
train_roc_auc,0.97322
val_loss,0.05213
val_roc_auc,0.9739


[34m[1mwandb[0m: Agent Starting Run: tn82vw0x with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4859380598670568
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.005199142407436801
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/1
Train Loss: 0.0672 | AUC_ROC: 0.9409
Val Loss: 0.0570 | AUC_ROC: 0.9723


0,1
epoch,▁
train_loss,▁
train_roc_auc,▁
val_loss,▁
val_roc_auc,▁

0,1
epoch,1.0
train_loss,0.06718
train_roc_auc,0.94089
val_loss,0.05699
val_roc_auc,0.97226


[34m[1mwandb[0m: Agent Starting Run: o08qswj8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4773952226575746
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00017849138438529028
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/4
Train Loss: 0.0820 | AUC_ROC: 0.9233
Val Loss: 0.0555 | AUC_ROC: 0.9711
Epoch 2/4
Train Loss: 0.0515 | AUC_ROC: 0.9704
Val Loss: 0.0518 | AUC_ROC: 0.9744
Epoch 3/4
Train Loss: 0.0461 | AUC_ROC: 0.9754
Val Loss: 0.0504 | AUC_ROC: 0.9763
Epoch 4/4
Train Loss: 0.0421 | AUC_ROC: 0.9791
Val Loss: 0.0516 | AUC_ROC: 0.9771


0,1
epoch,▁▃▆█
train_loss,█▃▂▁
train_roc_auc,▁▇██
val_loss,█▃▁▃
val_roc_auc,▁▅▇█

0,1
epoch,4.0
train_loss,0.04211
train_roc_auc,0.97906
val_loss,0.05156
val_roc_auc,0.97707


[34m[1mwandb[0m: Agent Starting Run: nclgg2jy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.44833159490781305
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00017962438882573832
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/3
Train Loss: 0.0901 | AUC_ROC: 0.9042
Val Loss: 0.0579 | AUC_ROC: 0.9694
Epoch 2/3
Train Loss: 0.0525 | AUC_ROC: 0.9696
Val Loss: 0.0524 | AUC_ROC: 0.9746
Epoch 3/3
Train Loss: 0.0470 | AUC_ROC: 0.9748
Val Loss: 0.0503 | AUC_ROC: 0.9760


0,1
epoch,▁▅█
train_loss,█▂▁
train_roc_auc,▁▇█
val_loss,█▃▁
val_roc_auc,▁▇█

0,1
epoch,3.0
train_loss,0.04697
train_roc_auc,0.97477
val_loss,0.05034
val_roc_auc,0.97595


[34m[1mwandb[0m: Agent Starting Run: oxqo7kmk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.48184180719422326
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00013651911830324755
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0810 | AUC_ROC: 0.9229
Val Loss: 0.0563 | AUC_ROC: 0.9709
Epoch 2/5
Train Loss: 0.0516 | AUC_ROC: 0.9699
Val Loss: 0.0515 | AUC_ROC: 0.9753
Epoch 3/5
Train Loss: 0.0461 | AUC_ROC: 0.9757
Val Loss: 0.0522 | AUC_ROC: 0.9769
Epoch 4/5
Train Loss: 0.0417 | AUC_ROC: 0.9806
Val Loss: 0.0500 | AUC_ROC: 0.9789
Epoch 5/5
Train Loss: 0.0374 | AUC_ROC: 0.9848
Val Loss: 0.0545 | AUC_ROC: 0.9797


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
train_roc_auc,▁▆▇██
val_loss,█▃▃▁▆
val_roc_auc,▁▅▆▇█

0,1
epoch,5.0
train_loss,0.03738
train_roc_auc,0.98477
val_loss,0.05446
val_roc_auc,0.97967


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: apu5xjr1 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.4734899976595548
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00012282196779418814
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.0735 | AUC_ROC: 0.9388
Val Loss: 0.0564 | AUC_ROC: 0.9726
Epoch 2/5
Train Loss: 0.0501 | AUC_ROC: 0.9715
Val Loss: 0.0520 | AUC_ROC: 0.9754
Epoch 3/5
Train Loss: 0.0447 | AUC_ROC: 0.9768
Val Loss: 0.0497 | AUC_ROC: 0.9775
Epoch 4/5
Train Loss: 0.0398 | AUC_ROC: 0.9825
Val Loss: 0.0512 | AUC_ROC: 0.9788
Epoch 5/5
Train Loss: 0.0354 | AUC_ROC: 0.9867
Val Loss: 0.0509 | AUC_ROC: 0.9796


0,1
epoch,▁▃▅▆█
train_loss,█▄▃▂▁
train_roc_auc,▁▆▇▇█
val_loss,█▃▁▃▂
val_roc_auc,▁▄▆▇█

0,1
epoch,5.0
train_loss,0.03537
train_roc_auc,0.98675
val_loss,0.05094
val_roc_auc,0.97955


[34m[1mwandb[0m: Agent Starting Run: j99585zc with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4994835519831928
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00025166815670713704
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/4
Train Loss: 0.0762 | AUC_ROC: 0.9325
Val Loss: 0.0545 | AUC_ROC: 0.9713
Epoch 2/4
Train Loss: 0.0494 | AUC_ROC: 0.9722
Val Loss: 0.0505 | AUC_ROC: 0.9756
Epoch 3/4
Train Loss: 0.0440 | AUC_ROC: 0.9775
Val Loss: 0.0488 | AUC_ROC: 0.9764
Epoch 4/4
Train Loss: 0.0393 | AUC_ROC: 0.9814
Val Loss: 0.0505 | AUC_ROC: 0.9777


0,1
epoch,▁▃▆█
train_loss,█▃▂▁
train_roc_auc,▁▇▇█
val_loss,█▃▁▃
val_roc_auc,▁▆▇█

0,1
epoch,4.0
train_loss,0.03934
train_roc_auc,0.98144
val_loss,0.05047
val_roc_auc,0.97766


[34m[1mwandb[0m: Agent Starting Run: c5jb2jbg with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.44454032970149704
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00010599475743294708
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/3
Train Loss: 0.0805 | AUC_ROC: 0.9221
Val Loss: 0.0575 | AUC_ROC: 0.9707
Epoch 2/3
Train Loss: 0.0521 | AUC_ROC: 0.9691
Val Loss: 0.0556 | AUC_ROC: 0.9745
Epoch 3/3
Train Loss: 0.0467 | AUC_ROC: 0.9754
Val Loss: 0.0509 | AUC_ROC: 0.9768


0,1
epoch,▁▅█
train_loss,█▂▁
train_roc_auc,▁▇█
val_loss,█▆▁
val_roc_auc,▁▅█

0,1
epoch,3.0
train_loss,0.04669
train_roc_auc,0.97542
val_loss,0.05087
val_roc_auc,0.97677


[34m[1mwandb[0m: Agent Starting Run: gx80u28b with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.494821372775571
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.002481672479230934
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/4
Train Loss: 0.0651 | AUC_ROC: 0.9482
Val Loss: 0.0514 | AUC_ROC: 0.9756
Epoch 2/4
Train Loss: 0.0436 | AUC_ROC: 0.9779
Val Loss: 0.0549 | AUC_ROC: 0.9765
Epoch 3/4
Train Loss: 0.0353 | AUC_ROC: 0.9844
Val Loss: 0.0545 | AUC_ROC: 0.9747
Epoch 4/4
Train Loss: 0.0305 | AUC_ROC: 0.9879
Val Loss: 0.0592 | AUC_ROC: 0.9749


0,1
epoch,▁▃▆█
train_loss,█▄▂▁
train_roc_auc,▁▆▇█
val_loss,▁▄▄█
val_roc_auc,▅█▁▂

0,1
epoch,4.0
train_loss,0.03049
train_roc_auc,0.98792
val_loss,0.0592
val_roc_auc,0.97488


[34m[1mwandb[0m: Agent Starting Run: 6ddvtni1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4974983617620834
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001047016084355755
[34m[1mwandb[0m: 	num_filters: 256


Epoch 1/5
Train Loss: 0.1007 | AUC_ROC: 0.8767
Val Loss: 0.0603 | AUC_ROC: 0.9669
Epoch 2/5
Train Loss: 0.0563 | AUC_ROC: 0.9646
Val Loss: 0.0545 | AUC_ROC: 0.9724
Epoch 3/5
Train Loss: 0.0505 | AUC_ROC: 0.9721
Val Loss: 0.0522 | AUC_ROC: 0.9744
Epoch 4/5
Train Loss: 0.0465 | AUC_ROC: 0.9752
Val Loss: 0.0527 | AUC_ROC: 0.9753
Epoch 5/5
Train Loss: 0.0435 | AUC_ROC: 0.9781
Val Loss: 0.0516 | AUC_ROC: 0.9762


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▁▁
train_roc_auc,▁▇███
val_loss,█▃▂▂▁
val_roc_auc,▁▅▇▇█

0,1
epoch,5.0
train_loss,0.04353
train_roc_auc,0.97806
val_loss,0.05158
val_roc_auc,0.97619


[34m[1mwandb[0m: Agent Starting Run: apuxpza2 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4834350527052873
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002042997934962757
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/3
Train Loss: 0.0776 | AUC_ROC: 0.9300
Val Loss: 0.0543 | AUC_ROC: 0.9725
Epoch 2/3
Train Loss: 0.0503 | AUC_ROC: 0.9719
Val Loss: 0.0507 | AUC_ROC: 0.9759
Epoch 3/3
Train Loss: 0.0445 | AUC_ROC: 0.9771
Val Loss: 0.0508 | AUC_ROC: 0.9770


0,1
epoch,▁▅█
train_loss,█▂▁
train_roc_auc,▁▇█
val_loss,█▁▁
val_roc_auc,▁▆█

0,1
epoch,3.0
train_loss,0.04455
train_roc_auc,0.97707
val_loss,0.05082
val_roc_auc,0.97697


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tng82le9 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.49624636175478953
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00014458484009016086
[34m[1mwandb[0m: 	num_filters: 64


Epoch 1/1
Train Loss: 0.0770 | AUC_ROC: 0.9323
Val Loss: 0.0550 | AUC_ROC: 0.9709


0,1
epoch,▁
train_loss,▁
train_roc_auc,▁
val_loss,▁
val_roc_auc,▁

0,1
epoch,1.0
train_loss,0.07698
train_roc_auc,0.93233
val_loss,0.05502
val_roc_auc,0.97089


[34m[1mwandb[0m: Agent Starting Run: n8z1rfva with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.484136792473341
[34m[1mwandb[0m: 	embed_dim: 100
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.00025718111048783296
[34m[1mwandb[0m: 	num_filters: 128


Epoch 1/5
Train Loss: 0.0836 | AUC_ROC: 0.9179
Val Loss: 0.0547 | AUC_ROC: 0.9722
Epoch 2/5
Train Loss: 0.0503 | AUC_ROC: 0.9706
Val Loss: 0.0519 | AUC_ROC: 0.9751
Epoch 3/5
Train Loss: 0.0448 | AUC_ROC: 0.9769
Val Loss: 0.0503 | AUC_ROC: 0.9770
Epoch 4/5
Train Loss: 0.0397 | AUC_ROC: 0.9814
Val Loss: 0.0511 | AUC_ROC: 0.9781
Epoch 5/5
Train Loss: 0.0350 | AUC_ROC: 0.9860
Val Loss: 0.0523 | AUC_ROC: 0.9787


0,1
epoch,▁▃▅▆█
train_loss,█▃▂▂▁
train_roc_auc,▁▆▇██
val_loss,█▃▁▂▄
val_roc_auc,▁▄▆▇█

0,1
epoch,5.0
train_loss,0.03496
train_roc_auc,0.98598
val_loss,0.05226
val_roc_auc,0.97875
