In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from IPython.display import display, Markdown
from collections import Counter
import random
from lightgbm import LGBMClassifier

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import lightning as L
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torchmetrics.classification import AUROC
from lightning.pytorch.callbacks import EarlyStopping

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
EXP_NAME = 'gemma_att'

In [4]:
os.getcwd()

'/net/tscratch/people/plgmatpat'

In [5]:
context_df = None

for f_ in os.listdir(os.path.join(EXP_NAME)):
    
    if f_.startswith('attension'):

        if context_df is None:
            context_df = pd.read_parquet(os.path.join(EXP_NAME, f_))
        else:
            context_df = pd.concat((context_df, pd.read_parquet(os.path.join(EXP_NAME, f_))))

In [6]:
context_df['dataset'].value_counts()

dataset
cnndm          19400
nq              8030
xsum            7776
poquad_v2       6247
hotpotqa_en     3121
bioask          3056
hotpotqa_pl     2208
polqa           1869
Name: count, dtype: int64

In [7]:
context_df = context_df[context_df['dataset'].isin(['xsum', 'nq'])]

In [8]:
context_df['label'].value_counts()

label
0    14012
1     1794
Name: count, dtype: int64

In [9]:
import torch
from torch.utils.data import Dataset

class HalluDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)

In [10]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.hidden_dim = hidden_dim

    def forward(self, x):

        if len(x.shape) == 2:
            x = x.unsqueeze(0)

        _, (hn, _) = self.lstm(x)  # hn is (num_layers, batch_size, hidden_dim)
        out = self.fc(hn[-1])  # Use the last layer's hidden state
        return out

In [11]:
class LSTMClassifier(L.LightningModule):
    def __init__(self, input_dim, hidden_dim, output_dim, lr=5e-4, num_layers=1, cls_weight=None):
        super().__init__()
        self.model = LSTMModel(input_dim, hidden_dim, output_dim, num_layers=num_layers)
        self.criterion = nn.BCEWithLogitsLoss(pos_weight=cls_weight)
        self.lr = lr

        self.test_dataloader = None

        # AUROC metric for binary classification
        self.train_auc = AUROC(task="binary")
        self.val_auc = AUROC(task="binary")
        self.test_auc = AUROC(task="binary")
        self.save_hyperparameters()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        data, labels = batch
        preds = self(data)
        labels = labels.view(-1, 1).float() 

        if preds.size() != labels.size():
            preds = preds.view_as(labels)

        loss = self.criterion(preds, labels)
        # Calculate AUC during training
        prob = torch.sigmoid(preds)
        auc = self.train_auc(prob, labels)
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log("train_auc", auc, on_step=False, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        data, labels = batch
        preds = self(data)
        labels = labels.view(-1, 1).float() 


        loss = self.criterion(preds, labels)
        # Calculate AUC during validation
        prob = torch.sigmoid(preds)
        auc = self.val_auc(prob, labels)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_auc", auc, prog_bar=True)
        return loss
    
    def test_step(self, batch, batch_idx):
        data, labels = batch
        preds = self(data)
        labels = labels.view(-1, 1).float() 
        loss = self.criterion(preds, labels)
        # Calculate AUC during validation
        prob = torch.sigmoid(preds)
        auc = self.val_auc(prob, labels)
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_auc", auc, prog_bar=True)
        return loss
    
    def on_train_epoch_end(self):
        for i, batch in enumerate(self.test_dataloader):
            batch = self.transfer_batch_to_device(batch, self.device, 0)
            self.test_step(batch, i)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [12]:
train_cols = [col for col in context_df.columns if col not in ['dataset', 'label']]

In [13]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from collections import Counter
from pytorch_lightning.loggers import TensorBoardLogger

In [14]:
SPLIT_VAL = True
INPUT_DIM = 16
GEMMA_LAYERS = 42
HIDDEN_DIM = 64
OUTPUT_DIM = 1
NUM_LSTM_LAYERS = 2
LR = 1e-3
MAX_EPOCHS = 25
BATCH_SIZE = 1024

In [15]:
from lightning.pytorch.loggers import WandbLogger 
import wandb

In [16]:
validation_results = []

# Loop over each unique dataset
for dataset in context_df['dataset'].unique():

    # run = wandb.init(entity=dataset, project="hallu_project")

    in_dist_sample = context_df.loc[context_df['dataset'] != dataset]
    out_dist_sample = context_df.loc[context_df['dataset'] == dataset]

    X_train, X_test = in_dist_sample[train_cols], out_dist_sample[train_cols]
    y_train, y_test = in_dist_sample['label'].to_numpy().astype(np.int64), out_dist_sample['label'].to_numpy().astype(np.int64)

    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

    counter = Counter(y_train)
    pos_weight = torch.tensor([counter[0] / counter[1]], dtype=torch.float32)

    # rus = RandomUnderSampler(random_state=42)
    # X_train, y_train = rus.fit_resample(X_train, y_train)

    scaler = RobustScaler()

    X_train = scaler.fit_transform(X_train).reshape(-1, GEMMA_LAYERS, INPUT_DIM)
    X_val = scaler.transform(X_val).reshape(-1, GEMMA_LAYERS, INPUT_DIM)
    X_test = scaler.transform(X_test).reshape(-1, GEMMA_LAYERS, INPUT_DIM)

    train_dataset = HalluDataset(X_train, y_train)
    val_dataset = HalluDataset(X_val, y_val)
    test_dataset = HalluDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=16)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=16)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=16)


    model = LSTMClassifier(
        input_dim=INPUT_DIM, 
        hidden_dim=HIDDEN_DIM, 
        output_dim=OUTPUT_DIM, 
        lr=LR, 
        num_layers=NUM_LSTM_LAYERS,
        cls_weight=pos_weight
    )

    model.test_dataloader = test_loader

    wandb_logger = WandbLogger(
        # name=f"lstm_on_{dataset}",
        project="hallu_project",
        group=f"lstm_hallu",
        reinit=True
    )

    wandb_logger.experiment.config.update(
        {
            "trained_on": set(context_df['dataset'].unique()) - {dataset},
            "tested_on": dataset,
        }
    )

    # wandb_logger.log()

    trainer = L.Trainer(
        max_epochs=MAX_EPOCHS,
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        log_every_n_steps=10,
        callbacks=[EarlyStopping(
            monitor="val_loss",  # The metric to monitor
            patience=7,          # Number of epochs with no improvement after which training will stop
            verbose=True,        # Display a message when stopping
            mode="min",          # Minimize the monitored metric (for loss)
        )],
        logger=wandb_logger
    )

    trainer.fit(model, train_loader, val_loader)

    metrics = trainer.callback_metrics
    res = {k: v.item() for k, v in metrics.items()}

    test_res = trainer.test(model, test_loader)[0]
    print(f"Test Loss: {test_res['test_loss']}, Test AUC: {test_res['test_auc']}")

    validation_results.append({
        'dataset': dataset,
        'train_loss': res['train_loss'],
        'val_loss': res['val_loss'],
        'test_loss': res['test_loss'],
        'train_auc': res['train_auc'],
        'val_auc': res['val_auc'],
        'test_auc': res['test_auc']
    })

    wandb_logger.finalize("success")
    wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmatpat[0m ([33mhallucination[0m). Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | LSTMModel         | 54.3 K | train
1 | criterion | BCEWithLogitsLoss | 0      | train
2 | train_auc | BinaryAUROC       | 0      | train
3 | val_auc   | BinaryAUROC       | 0      | train
4 | test_auc  | BinaryAUROC       | 0      | train
--------------------------------------------------------
54.3 K    Trainable params
0         Non-trainable params
5

Sanity Checking: |          | 0/? [00:00<?, ?it/s]



                                                                           

/net/tscratch/people/plgmatpat/miniconda3/envs/hallu/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 0: 100%|██████████| 7/7 [00:01<00:00,  4.14it/s, v_num=lx4t, val_loss=1.240, val_auc=0.532, train_loss=1.240, train_auc=0.497]

Metric val_loss improved. New best score: 1.236


Epoch 1: 100%|██████████| 7/7 [00:02<00:00,  2.46it/s, v_num=lx4t, val_loss=1.240, val_auc=0.529, train_loss=1.240, train_auc=0.526, test_loss=1.320, test_auc=0.387]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.235


Epoch 2: 100%|██████████| 7/7 [00:02<00:00,  2.39it/s, v_num=lx4t, val_loss=1.230, val_auc=0.533, train_loss=1.240, train_auc=0.526, test_loss=1.340, test_auc=0.389]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.235


Epoch 3: 100%|██████████| 7/7 [00:02<00:00,  2.62it/s, v_num=lx4t, val_loss=1.230, val_auc=0.538, train_loss=1.240, train_auc=0.534, test_loss=1.340, test_auc=0.390]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.234


Epoch 4: 100%|██████████| 7/7 [00:02<00:00,  2.57it/s, v_num=lx4t, val_loss=1.230, val_auc=0.552, train_loss=1.230, train_auc=0.548, test_loss=1.340, test_auc=0.390]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.233


Epoch 5: 100%|██████████| 7/7 [00:02<00:00,  2.67it/s, v_num=lx4t, val_loss=1.230, val_auc=0.556, train_loss=1.230, train_auc=0.575, test_loss=1.340, test_auc=0.389]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 1.230


Epoch 6: 100%|██████████| 7/7 [00:02<00:00,  2.61it/s, v_num=lx4t, val_loss=1.230, val_auc=0.553, train_loss=1.230, train_auc=0.559, test_loss=1.350, test_auc=0.387]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.229


Epoch 8: 100%|██████████| 7/7 [00:02<00:00,  2.57it/s, v_num=lx4t, val_loss=1.220, val_auc=0.567, train_loss=1.220, train_auc=0.615, test_loss=1.360, test_auc=0.385]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.220


Epoch 9: 100%|██████████| 7/7 [00:02<00:00,  2.61it/s, v_num=lx4t, val_loss=1.220, val_auc=0.587, train_loss=1.190, train_auc=0.634, test_loss=1.370, test_auc=0.386]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.218


Epoch 10: 100%|██████████| 7/7 [00:02<00:00,  2.53it/s, v_num=lx4t, val_loss=1.220, val_auc=0.597, train_loss=1.180, train_auc=0.643, test_loss=1.380, test_auc=0.384]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 1.215


Epoch 11: 100%|██████████| 7/7 [00:02<00:00,  2.47it/s, v_num=lx4t, val_loss=1.210, val_auc=0.606, train_loss=1.180, train_auc=0.651, test_loss=1.400, test_auc=0.380]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.208


Epoch 13: 100%|██████████| 7/7 [00:02<00:00,  2.55it/s, v_num=lx4t, val_loss=1.200, val_auc=0.621, train_loss=1.160, train_auc=0.679, test_loss=1.350, test_auc=0.394]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.199


Epoch 15: 100%|██████████| 7/7 [00:02<00:00,  2.37it/s, v_num=lx4t, val_loss=1.200, val_auc=0.625, train_loss=1.140, train_auc=0.687, test_loss=1.430, test_auc=0.378]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.198


Epoch 17: 100%|██████████| 7/7 [00:02<00:00,  2.47it/s, v_num=lx4t, val_loss=1.190, val_auc=0.633, train_loss=1.130, train_auc=0.699, test_loss=1.390, test_auc=0.477]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.189


Epoch 18: 100%|██████████| 7/7 [00:03<00:00,  2.31it/s, v_num=lx4t, val_loss=1.180, val_auc=0.632, train_loss=1.120, train_auc=0.707, test_loss=1.310, test_auc=0.586]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 1.184


Epoch 22: 100%|██████████| 7/7 [00:02<00:00,  2.51it/s, v_num=lx4t, val_loss=1.180, val_auc=0.644, train_loss=1.100, train_auc=0.720, test_loss=1.320, test_auc=0.576]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.182


Epoch 24: 100%|██████████| 7/7 [00:02<00:00,  2.46it/s, v_num=lx4t, val_loss=1.190, val_auc=0.641, train_loss=1.080, train_auc=0.729, test_loss=1.410, test_auc=0.593]

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 7/7 [00:04<00:00,  1.64it/s, v_num=lx4t, val_loss=1.190, val_auc=0.641, train_loss=1.080, train_auc=0.729, test_loss=1.410, test_auc=0.593]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]





SLURM auto-requeueing enabled. Setting signal handlers.


Testing DataLoader 0: 100%|██████████| 8/8 [00:00<00:00, 85.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.5270460844039917
        test_loss            1.354711651802063
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Test Loss: 1.354711651802063, Test AUC: 0.5270460844039917


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
test_auc,▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▄▄█▄▂▂▇▇█▆▆
test_loss,▁▂▂▂▂▂▂▂▂▃▃▂▂▃▄▂▃▁▃█▇▁▃▄▂▂
train_auc,▁▂▂▂▃▃▃▄▅▅▅▆▆▆▇▇▇▇▇█▇████
train_loss,██████▇▇▇▆▅▅▅▄▄▃▃▃▂▂▂▂▂▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
val_auc,▁▁▁▂▂▃▂▂▃▅▅▆▆▇▆▇▇▇▇▇▇████
val_loss,█████▇▇█▆▆▅▄▅▃▃▃▄▂▁▂▅▂▁▃▃

0,1
epoch,25.0
test_auc,0.52705
test_loss,1.35471
train_auc,0.72862
train_loss,1.08386
trainer/global_step,175.0
val_auc,0.64109
val_loss,1.19477




GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | LSTMModel         | 54.3 K | train
1 | criterion | BCEWithLogitsLoss | 0      | train
2 | train_auc | BinaryAUROC       | 0      | train
3 | val_auc   | BinaryAUROC       | 0      | train
4 | test_auc  | BinaryAUROC       | 0      | train
--------------------------------------------------------
54.3 K    Trainable params
0         Non-trainable params
54.3 K    Total params
0.217     Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]



                                                                           

/net/tscratch/people/plgmatpat/miniconda3/envs/hallu/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (7) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 0: 100%|██████████| 7/7 [00:01<00:00,  4.34it/s, v_num=1qqe, val_loss=1.200, val_auc=0.598, train_loss=1.210, train_auc=0.592]

Metric val_loss improved. New best score: 1.199


Epoch 1: 100%|██████████| 7/7 [00:02<00:00,  2.45it/s, v_num=1qqe, val_loss=1.180, val_auc=0.607, train_loss=1.180, train_auc=0.637, test_loss=1.160, test_auc=0.491]

Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 1.181


Epoch 3: 100%|██████████| 7/7 [00:02<00:00,  2.50it/s, v_num=1qqe, val_loss=1.160, val_auc=0.618, train_loss=1.150, train_auc=0.649, test_loss=1.260, test_auc=0.492]

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 1.164


Epoch 4: 100%|██████████| 7/7 [00:02<00:00,  2.48it/s, v_num=1qqe, val_loss=1.160, val_auc=0.626, train_loss=1.150, train_auc=0.653, test_loss=1.200, test_auc=0.498]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.158


Epoch 5: 100%|██████████| 7/7 [00:02<00:00,  2.42it/s, v_num=1qqe, val_loss=1.150, val_auc=0.639, train_loss=1.140, train_auc=0.660, test_loss=1.180, test_auc=0.501]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 1.152


Epoch 6: 100%|██████████| 7/7 [00:02<00:00,  2.45it/s, v_num=1qqe, val_loss=1.150, val_auc=0.651, train_loss=1.140, train_auc=0.668, test_loss=1.170, test_auc=0.506]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 1.148


Epoch 7: 100%|██████████| 7/7 [00:02<00:00,  2.55it/s, v_num=1qqe, val_loss=1.140, val_auc=0.665, train_loss=1.140, train_auc=0.675, test_loss=1.180, test_auc=0.511]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 1.142


Epoch 8: 100%|██████████| 7/7 [00:02<00:00,  2.65it/s, v_num=1qqe, val_loss=1.130, val_auc=0.678, train_loss=1.130, train_auc=0.684, test_loss=1.170, test_auc=0.522]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.133


Epoch 9: 100%|██████████| 7/7 [00:03<00:00,  2.21it/s, v_num=1qqe, val_loss=1.120, val_auc=0.692, train_loss=1.120, train_auc=0.691, test_loss=1.160, test_auc=0.528]

Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 1.118


Epoch 10: 100%|██████████| 7/7 [00:02<00:00,  2.44it/s, v_num=1qqe, val_loss=1.100, val_auc=0.700, train_loss=1.110, train_auc=0.697, test_loss=1.160, test_auc=0.543]

Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 1.100


Epoch 11: 100%|██████████| 7/7 [00:02<00:00,  2.44it/s, v_num=1qqe, val_loss=1.100, val_auc=0.735, train_loss=1.110, train_auc=0.698, test_loss=1.230, test_auc=0.535]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.096


Epoch 12: 100%|██████████| 7/7 [00:02<00:00,  2.37it/s, v_num=1qqe, val_loss=1.090, val_auc=0.738, train_loss=1.110, train_auc=0.709, test_loss=1.260, test_auc=0.520]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.092


Epoch 13: 100%|██████████| 7/7 [00:02<00:00,  2.39it/s, v_num=1qqe, val_loss=1.080, val_auc=0.738, train_loss=1.110, train_auc=0.714, test_loss=1.190, test_auc=0.551]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.083


Epoch 14: 100%|██████████| 7/7 [00:02<00:00,  2.37it/s, v_num=1qqe, val_loss=1.080, val_auc=0.742, train_loss=1.110, train_auc=0.718, test_loss=1.180, test_auc=0.552]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.081


Epoch 15: 100%|██████████| 7/7 [00:02<00:00,  2.50it/s, v_num=1qqe, val_loss=1.080, val_auc=0.746, train_loss=1.100, train_auc=0.720, test_loss=1.170, test_auc=0.551]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.078


Epoch 16: 100%|██████████| 7/7 [00:02<00:00,  2.41it/s, v_num=1qqe, val_loss=1.060, val_auc=0.741, train_loss=1.090, train_auc=0.722, test_loss=1.180, test_auc=0.551]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 1.064


Epoch 17: 100%|██████████| 7/7 [00:02<00:00,  2.39it/s, v_num=1qqe, val_loss=1.050, val_auc=0.751, train_loss=1.080, train_auc=0.729, test_loss=1.180, test_auc=0.553]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 1.049


Epoch 18: 100%|██████████| 7/7 [00:03<00:00,  2.32it/s, v_num=1qqe, val_loss=1.040, val_auc=0.758, train_loss=1.070, train_auc=0.730, test_loss=1.200, test_auc=0.555]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 1.037


Epoch 21: 100%|██████████| 7/7 [00:03<00:00,  2.32it/s, v_num=1qqe, val_loss=1.030, val_auc=0.762, train_loss=1.050, train_auc=0.746, test_loss=1.170, test_auc=0.550]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.028


Epoch 24: 100%|██████████| 7/7 [00:02<00:00,  2.38it/s, v_num=1qqe, val_loss=1.020, val_auc=0.767, train_loss=1.030, train_auc=0.757, test_loss=1.220, test_auc=0.533]

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.021
`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24: 100%|██████████| 7/7 [00:04<00:00,  1.55it/s, v_num=1qqe, val_loss=1.020, val_auc=0.767, train_loss=1.030, train_auc=0.757, test_loss=1.220, test_auc=0.533]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]





SLURM auto-requeueing enabled. Setting signal handlers.


Testing DataLoader 0: 100%|██████████| 8/8 [00:00<00:00, 73.39it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.5456796884536743
        test_loss            1.154437780380249
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Test Loss: 1.154437780380249, Test AUC: 0.5456796884536743


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
test_auc,▁▁▁▂▂▃▃▄▅▇▆▄▇█▇▇████▇▇▇▆▇▇
test_loss,▂██▄▃▂▃▂▁▂▆█▄▃▂▃▃▄▅▄▂▁▁▅▁▁
train_auc,▁▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇████
train_loss,█▇▆▆▆▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▂▂▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
val_auc,▁▁▂▂▂▃▃▄▄▅▅▇▇▇▇▇▇▇█████▇█
val_loss,█▇▇▇▆▆▆▆▅▅▄▄▄▃▃▃▃▂▂▃▂▁▂▂▁

0,1
epoch,25.0
test_auc,0.54568
test_loss,1.15444
train_auc,0.75719
train_loss,1.02767
trainer/global_step,175.0
val_auc,0.76703
val_loss,1.0205
