# Multimodal Sentiment Analysis

In [7]:
!pip install transformers




In [21]:
!pip install albumentations
!pip install torch
!pip install torchmetrics


Collecting torchmetrics
  Downloading torchmetrics-1.4.2-py3-none-any.whl.metadata (19 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.7-py3-none-any.whl.metadata (5.2 kB)

In [8]:
import copy
import gc
import os
import random
from collections import defaultdict
from typing import Dict, Optional, Tuple

import albumentations as A
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import wandb
from albumentations.pytorch import ToTensorV2
from colorama import Back, Fore, Style
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torchmetrics import AUROC, Accuracy, F1Score, Precision, Recall
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer

import warnings
warnings.simplefilter('ignore')

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

c_ = Fore.GREEN
sr_ = Style.RESET_ALL

# Config

In [10]:
class Config:
    seed = 101
    debug = False  # set debug=False for Full Training
    exp_name = "vit/sbert"
    model_name = "vit-sbert-multimodal"
    backbone = "google/vit-base-patch16-224+sentence-transformers/all-mpnet-base-v2-ep10"
    tokenizer = "sentence-transformers/all-mpnet-base-v2"
    image_encoder = "google/vit-base-patch16-224"
    train_bs = 16
    valid_bs = 32
    img_size = [224, 224]
    max_len = 128
    epochs = 10
    competition = "memotions-7k"

    # Optimizers
    optimizer     = 'Adam'
    learning_rate = 3e-4
    rho           = 0.9
    eps           = 1e-6
    lr_decay      = 0
    betas         = (0.9, 0.999)
    momentum      = 0
    alpha         = 0.99

    # Scheduler
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(30000/train_bs*epochs)+50
    T_0           = 25
    warmup_epochs = 0
    weight_decay  = 1e-6

    # Config
    n_accumulate  = max(1, 32//train_bs)
    num_folds     = 5
    num_classes   = 3

    device        = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Utils

## Seed

In [11]:
def set_seed(seed: int = 42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(">>> SEEDED <<<")

set_seed(Config.seed)

>>> SEEDED <<<


## WandB

In [12]:
# Import wandb library for logging and tracking experiments
!pip install wandb

import wandb

# Try to get the API key from Kaggle secrets
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("WANDB")
    # Login to wandb with the API key
    wandb.login(key=api_key)
    # Set anonymous mode to None
    anonymous = None
except:
    # If Kaggle secrets are not available, set anonymous mode to 'must'
    anonymous = 'must'
    # Login to wandb anonymously and relogin if needed
    wandb.login(anonymous=anonymous, relogin=True)



wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\pavani\_netrc


## Get Optimizer

In [13]:
def get_optimizer(model: nn.Module):
    """
    Returns the optimizer based on the Config files.
    """
    if Config.optimizer == "Adadelta":
        optimizer = optim.Adadelta(
            model.parameters(), lr=Config.learning_rate, rho=Config.rho, eps=Config.eps
        )
    elif Config.optimizer == "Adagrad":
        optimizer = optim.Adagrad(
            model.parameters(),
            lr=Config.learning_rate,
            lr_decay=Config.lr_decay,
            weight_decay=Config.weight_decay,
        )
    elif Config.optimizer == "Adam":
        optimizer = optim.Adam(
            model.parameters(),
            lr=Config.learning_rate,
            betas=Config.betas,
            eps=Config.eps,
        )
    elif Config.optimizer == "RMSProp":
        optimizer = optim.RMSprop(
            model.parameters(),
            lr=Config.learning_rate,
            alpha=Config.alpha,
            eps=Config.eps,
            weight_decay=Config.weight_decay,
            momentum=Config.momentum,
        )
    else:
        raise NotImplementedError(
            f"The optimizer {Config.optimizer} has not been implemented."
        )
    return optimizer

## Get Scheduler

In [14]:
def get_scheduler(optimizer: optim):
    """
    A method which returns the required schedulers.
        - Extracted from Awsaf's Kaggle.
    """
    if Config.scheduler == "CosineAnnealingLR":
        scheduler = lr_scheduler.CosineAnnealingLR(
            optimizer=optimizer, T_max=Config.T_max, eta_min=Config.min_lr
        )
    elif Config.scheduler == "CosineAnnealingWarmRestarts":
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer=optimizer, T_0=Config.T_0, eta_min=Config.eta_min
        )
    elif Config.scheduler == "ReduceLROnPlateau":
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer=optimizer,
            mode="min",
            factor=0.1,
            patience=10,
            threshold=0.0001,
            min_lr=Config.min_lr,
        )
    elif Config.scheduler == "ExponentialLR":
        scheduler = lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.85)
    elif Config.scheduler is None:
        scheduler = None
    else:
        raise NotImplementedError(
            "The Scheduler you have asked has not been implemented"
        )
    return scheduler

# Data

## Create Folds

In [18]:
!pip install kaggle



In [27]:
!mkdir output


In [33]:
def create_folds():
    df = pd.read_csv('/kaggle/input/memotion-dataset-7k/memotion_dataset_7k/labels.csv')
    df = df.drop('Unnamed: 0', axis=1)
    df = df.sample(frac=1).reset_index(drop=True)
    df['label'] = df['offensive']
    df['label'] = np.where(df['label'] == 'hateful_offensive', 'very_offensive', df['label'])
    
    mskf = StratifiedKFold(n_splits=5)

    df['kfold'] = -1
    for fold, (train, valid) in enumerate(mskf.split(X=df, y=df['label'])):
        df.loc[valid, 'kfold'] = fold
    
    df['label'] = df['label'].map({
        'not_offensive': 0, 
        'slight': 1, 
        'very_offensive': 2
    })

    df.to_csv('folds.csv', index=False)

create_folds()

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/memotion-dataset-7k/memotion_dataset_7k/labels.csv'

# Dataset and DataLoaders

In [35]:
class MemotionDataset(Dataset):
    def __init__(self, df: pd.DataFrame) -> None:
        super().__init__()
        self.df = df
        self.tokenizer = AutoTokenizer.from_pretrained(Config.tokenizer)
        self.transforms = A.Compose([
            A.Resize(height=Config.img_size[0], width=Config.img_size[1]),
            ToTensorV2(),
        ])

    def __len__(self) -> int:
        return self.df.shape[0]
    
    def __getitem__(self, ix: int) -> Dict[str, torch.Tensor]:
        row = self.df.iloc[ix]

        # Image
        image_path = os.path.join('/kaggle/input/memotion-dataset-7k/memotion_dataset_7k/images', row['image_name'])
        img = np.array(Image.open(image_path).convert('RGB'))
        img = self.transforms(image=img)['image']

        # Text
        text = str(row['text_corrected']).lower()
        out = self.tokenizer(
            text=text, 
            max_length=Config.max_len,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        # __import__('pprint').pprint(out)

        return  {
            'image': img, 
            'input_ids': out['input_ids'].squeeze(),
            'attention_mask': out['attention_mask'].squeeze(),
            'label': torch.LongTensor([row['label']]).squeeze()
        }

# Model

## Image Encoder

In [9]:
class ImageEncoder(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.encoder = AutoModel.from_pretrained(Config.image_encoder)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.encoder.forward(x)["pooler_output"]
        return x

## Text Encoder

In [10]:
class TextEncoder(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.encoder = AutoModel.from_pretrained(Config.tokenizer)

    def forward(
        self, input_ids: torch.Tensor, attention_mask: torch.Tensor
    ) -> torch.Tensor:
        x = self.encoder.forward(input_ids=input_ids, attention_mask=attention_mask)
        return x["pooler_output"]

## Memotion Model

In [11]:
class MemotionModel(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.image_encoder = ImageEncoder()
        self.text_encoder = TextEncoder()
        self.alpha_img = torch.randn(size=(1,), requires_grad=True, device=Config.device)
        self.alpha_txt = torch.randn(size=(1,), requires_grad=True, device=Config.device)
        self.fc1 = nn.Linear(768, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 3)
        self.dropout = nn.Dropout(p=0.2)

    def forward(
        self, image: torch.Tensor, input_ids: torch.Tensor, attention_mask: torch.Tensor, label: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        img_out = self.image_encoder.forward(image)
        txt_out = self.text_encoder.forward(
            input_ids=input_ids, attention_mask=attention_mask
        )
        wt_emb = self.alpha_txt * txt_out + self.alpha_img * img_out
        x = self.fc1(self.dropout(wt_emb))
        x = self.fc2(self.dropout(x))
        return self.fc3(x)

# Training

## Train One Epoch

In [12]:
def train_one_epoch(
    model: nn.Module,
    optimizer: optim,
    dataloader: DataLoader,
    scheduler=None,
) -> float:
    model.train()
    dataset_size = 0
    running_loss = 0

    criterion = nn.CrossEntropyLoss()
    accuracy_metric = Accuracy(task="multiclass", num_classes=Config.num_classes)
    precision_metric = Precision(task="multiclass", num_classes=Config.num_classes)
    recall_metric = Recall(task="multiclass", num_classes=Config.num_classes)
    auroc_metric = AUROC(task="multiclass", num_classes=Config.num_classes)
    f1_metrics = F1Score(task="multiclass", num_classes=Config.num_classes)

    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"(train) ")
    for step, batch in pbar:
        batch = {k: v.to(Config.device) for k, v in batch.items()}
        labels = batch["label"]
        yHat = model.forward(**batch)

        optimizer.zero_grad()
        loss = criterion(yHat, labels)
        loss.backward()
        optimizer.step()

        if scheduler is not None:
            scheduler.step()

        running_loss += loss.item() * labels.shape[0]
        dataset_size += labels.shape[0]

        epoch_loss = running_loss / dataset_size

        out = torch.argmax(yHat, axis=1)
        accuracy = accuracy_metric(out.cpu(), labels.cpu())
        precision = precision_metric(out.cpu(), labels.cpu())
        recall = recall_metric(out.cpu(), labels.cpu())
        auroc = auroc_metric(F.softmax(yHat, dim=1).cpu(), labels.cpu())
        f1 = f1_metrics(out.cpu(), labels.cpu())
        current_lr = optimizer.param_groups[0]["lr"]

        wandb.log(
            {
                "train/loss": epoch_loss,
                "train/accuracy": accuracy,
                "train/precision": precision,
                "train/recall": recall,
                "train/auroc": auroc,
                "train/f1": f1,
                "train/current_lr": current_lr,
            },
            step=step,
        )

        pbar.set_postfix(epoch_loss=f"{epoch_loss:.5f}", current_lr=f"{current_lr:.5f}")

    return epoch_loss

## Validate One Epoch

In [13]:
@torch.no_grad()
def validate_one_epoch(
    model: nn.Module, dataloader: DataLoader
) -> Tuple[float, dict]:
    model.eval()
    dataset_size = 0
    running_loss = 0

    criterion = nn.CrossEntropyLoss()
    accuracy_metric = Accuracy(task="multiclass", num_classes=Config.num_classes)
    precision_metric = Precision(task="multiclass", num_classes=Config.num_classes)
    recall_metric = Recall(task="multiclass", num_classes=Config.num_classes)
    auroc_metric = AUROC(task="multiclass", num_classes=Config.num_classes)
    f1_metrics = F1Score(task="multiclass", num_classes=Config.num_classes)

    val_scores = defaultdict(list)

    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"(valid) ")
    for step, batch in pbar:
        batch = {k: v.to(Config.device) for k, v in batch.items()}
        labels = batch["label"]
        yHat = model.forward(**batch)

        loss = criterion(yHat, labels)

        running_loss += loss.item() * labels.shape[0]
        dataset_size += labels.shape[0]

        epoch_loss = running_loss / dataset_size

        out = torch.argmax(yHat, axis=1)
        accuracy = accuracy_metric(out.cpu(), labels.cpu())
        precision = precision_metric(out.cpu(), labels.cpu())
        recall = recall_metric(out.cpu(), labels.cpu())
        auroc = auroc_metric(F.softmax(yHat, dim=1).cpu(), labels.cpu())
        f1 = f1_metrics(out.cpu(), labels.cpu())

        val_scores["accuracy"].append(accuracy)
        val_scores["precision"].append(precision)
        val_scores["recall"].append(recall)
        val_scores["auroc"].append(auroc)
        val_scores["f1"].append(f1)

        wandb.log(
            {
                "valid/loss": epoch_loss,
                "valid/accuracy": accuracy,
                "valid/precision": precision,
                "valid/recall": recall,
                "valid/auroc": auroc,
                "valid/f1": f1,
            },
            step=step,
        )

    return epoch_loss, val_scores

## Train One Fold

In [14]:
def run_training(
    model: nn.Module,
    optimizer: optim,
    trainloader: DataLoader,
    validloader: DataLoader,
    run: wandb,
    fold: int,
    scheduler: lr_scheduler = None,
) -> Tuple[nn.Module, defaultdict]:
    wandb.watch(models=[model], log_freq=100)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf
    best_epoch = -1
    history = defaultdict(list)

    for epoch in range(Config.epochs):
        gc.collect()
        print(f"\t\t\t\t########## EPOCH [{epoch+1}/{Config.epochs}] ##########")
        train_loss = train_one_epoch(
            model=model,
            optimizer=optimizer,
            scheduler=scheduler,
            dataloader=trainloader,
        )
        valid_loss, valid_scores = validate_one_epoch(
            model=model, dataloader=validloader
        )

        wandb.log(
            {
                "train/epoch/loss": train_loss,
                "valid/epoch/loss": valid_loss,
                "valid/epoch/accuracy": np.mean(valid_scores["accuracy"]),
                "valid/epoch/precision": np.mean(valid_scores["precision"]),
                "valid/epoch/recall": np.mean(valid_scores["recall"]),
                "valid/epoch/auroc": np.mean(valid_scores["auroc"]),
                "valid/epoch/f1": np.mean(valid_scores["f1"]),
                "current_lr": optimizer.param_groups[0]["lr"],
            }
        )

        history["accuracy"].append(np.mean(valid_scores["accuracy"]))
        history["precision"].append(np.mean(valid_scores["precision"]))
        history["recall"].append(np.mean(valid_scores["recall"]))
        history["auroc"].append(np.mean(valid_scores["auroc"]))
        history["f1"].append(np.mean(valid_scores["f1"]))

        print(
            f'Valid Accuracy: {np.mean(valid_scores["accuracy"]):.5f} | Valid Loss: {valid_loss:.5f}'
        )

        if valid_loss < best_loss:
            print(
                f"{c_}Validation Score Improved from {best_loss:.5f} to {valid_loss:.5f}"
            )
            best_epoch = epoch + 1
            best_loss = valid_loss
            run.summary["Best Loss"] = best_loss
            run.summary["Best Epoch"] = best_epoch
            run.summary["Best Accuracy"] = np.mean(valid_scores["accuracy"])
            run.summary["Best Precision"] = np.mean(valid_scores["precision"])
            run.summary["Best Recall"] = np.mean(valid_scores["recall"])
            run.summary["Best AUROC"] = np.mean(valid_scores["auroc"])
            run.summary["Best F1 Score"] = np.mean(valid_scores["f1"])

            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = f"models/best/best_epoch-{fold:02d}.bin"
            torch.save(obj=best_model_wts, f=PATH)
            wandb.save(PATH)
            print(f"MODEL SAVED!{sr_}")

        last_model_wts = copy.deepcopy(model.state_dict())
        PATH = f"models/last/last_epoch-{fold:02d}.bin"
        torch.save(last_model_wts, PATH)

    model.load_state_dict(best_model_wts, strict=True)
    torch.save(history, f=f"history/fold-{fold:02d}.pth")
    return model, history

# Run Training

In [15]:
%%time
def prepare_dataloaders(fold) -> Tuple[DataLoader]:
    df = pd.read_csv('folds.csv')
    train_df = df[df['kfold'] != fold].reset_index(drop=True)
    valid_df = df[df['kfold'] == fold].reset_index(drop=True)
    
    train_dataset = MemotionDataset(df=train_df)
    valid_dataset = MemotionDataset(df=valid_df)
    
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=Config.train_bs, shuffle=True)
    valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=Config.valid_bs, shuffle=False)
    
    return train_dataloader, valid_dataloader


train, valid = prepare_dataloaders(2)

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

CPU times: user 266 ms, sys: 36.7 ms, total: 303 ms
Wall time: 4.5 s


In [16]:
os.makedirs('models')
os.makedirs('history')
os.makedirs('models/best')
os.makedirs('models/last')

In [17]:
for fold in range(Config.num_folds):
    print('#'*15)
    print(f'### Fold [{fold+1}/{Config.num_folds}]')
    print('#'*15)
    
    run = wandb.init(
        project='multimodal-sentiment-analysis',
        config={k:v for k, v in dict(vars(Config)).items() if '__' not in k},
        name=f'FOLD-{fold+1}|MODEL-{Config.backbone}', 
        group=f'FOLD-{fold+1}|MODEL-{Config.backbone}'
    )
    
    trainloader, validloader = prepare_dataloaders(fold=fold)
    
    model = MemotionModel().to(Config.device)
    optimizer = get_optimizer(model=model)
    scheduler = get_scheduler(optimizer=optimizer)
    
    model, history = run_training(model=model, optimizer=optimizer, 
                                  trainloader=trainloader, validloader=validloader, 
                                  run=run, fold=fold, scheduler=scheduler)
    run.finish()

[34m[1mwandb[0m: Currently logged in as: [33maaparajit02[0m. Use [1m`wandb login --relogin`[0m to force relogin


###############
### Fold [1/5]
###############


Downloading (…)lve/main/config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)lve/main/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

				########## EPOCH [1/10] ##########


(train) : 100%|██████████| 350/350 [04:24<00:00,  1.32it/s, current_lr=0.00030, epoch_loss=1.08468]
(valid) : 100%|██████████| 44/44 [00:30<00:00,  1.43it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.08211
[32mValidation Score Improved from inf to 1.08211
MODEL SAVED![0m
				########## EPOCH [2/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00030, epoch_loss=1.08166]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.75it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.10442
				########## EPOCH [3/10] ##########


(train) : 100%|██████████| 350/350 [03:50<00:00,  1.52it/s, current_lr=0.00030, epoch_loss=1.08306]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.71it/s]


Valid Accuracy: 0.36972 | Valid Loss: 1.08082
[32mValidation Score Improved from 1.08211 to 1.08082
MODEL SAVED![0m
				########## EPOCH [4/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00030, epoch_loss=1.08142]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.73it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.07830
[32mValidation Score Improved from 1.08082 to 1.07830
MODEL SAVED![0m
				########## EPOCH [5/10] ##########


(train) : 100%|██████████| 350/350 [03:57<00:00,  1.47it/s, current_lr=0.00029, epoch_loss=1.08031]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.64it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.08212
				########## EPOCH [6/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00029, epoch_loss=1.07931]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.73it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.08308
				########## EPOCH [7/10] ##########


(train) : 100%|██████████| 350/350 [03:50<00:00,  1.52it/s, current_lr=0.00029, epoch_loss=1.07985]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.73it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.07846
				########## EPOCH [8/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00028, epoch_loss=1.08017]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.74it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.07907
				########## EPOCH [9/10] ##########


(train) : 100%|██████████| 350/350 [03:49<00:00,  1.52it/s, current_lr=0.00028, epoch_loss=1.07995]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.73it/s]


Valid Accuracy: 0.38565 | Valid Loss: 1.07826
[32mValidation Score Improved from 1.07830 to 1.07826
MODEL SAVED![0m
				########## EPOCH [10/10] ##########


(train) : 100%|██████████| 350/350 [03:52<00:00,  1.51it/s, current_lr=0.00028, epoch_loss=1.07887]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.75it/s]


Valid Accuracy: 0.37883 | Valid Loss: 1.07945


0,1
current_lr,██▇▇▆▅▅▃▂▁
train/accuracy,▆▅▅▇▅▅▃▄▅▆▄▄▄▆▅▂▃█▅▄▅▅▅▄▅▇▅▅▃▅▆▅▆▆▅▆▁▅▃▄
train/auroc,▆▅▃▆▅▅▇▇▆█▅▄█▃▇▄▇▂▅▅▆▅▆▇▅▁▅▄▁▆▅▇▅▇▄▆▆▆▇▅
train/current_lr,███████████▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
train/epoch/loss,█▄▆▄▃▂▂▃▂▁
train/f1,▆▅▅▇▅▅▃▄▅▆▄▄▄▆▅▂▃█▅▄▅▅▅▄▅▇▅▅▃▅▆▅▆▆▅▆▁▅▃▄
train/loss,█▆▄▄▄▄▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/precision,▆▅▅▇▅▅▃▄▅▆▄▄▄▆▅▂▃█▅▄▅▅▅▄▅▇▅▅▃▅▆▅▆▆▅▆▁▅▃▄
train/recall,▆▅▅▇▅▅▃▄▅▆▄▄▄▆▅▂▃█▅▄▅▅▅▄▅▇▅▅▃▅▆▅▆▆▅▆▁▅▃▄
valid/epoch/accuracy,██▁██████▅

0,1
Best AUROC,0.5012
Best Accuracy,0.38565
Best Epoch,9.0
Best F1 Score,0.38565
Best Loss,1.07826
Best Precision,0.38565
Best Recall,0.38565
current_lr,0.00028
train/accuracy,0.55556
train/auroc,0.33333


###############
### Fold [2/5]
###############


Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


				########## EPOCH [1/10] ##########


(train) : 100%|██████████| 350/350 [04:02<00:00,  1.44it/s, current_lr=0.00030, epoch_loss=1.08252]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.67it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.08013
[32mValidation Score Improved from inf to 1.08013
MODEL SAVED![0m
				########## EPOCH [2/10] ##########


(train) : 100%|██████████| 350/350 [03:52<00:00,  1.51it/s, current_lr=0.00030, epoch_loss=1.08129]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.07858
[32mValidation Score Improved from 1.08013 to 1.07858
MODEL SAVED![0m
				########## EPOCH [3/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00030, epoch_loss=1.07985]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.08140
				########## EPOCH [4/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00030, epoch_loss=1.08060]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.67it/s]


Valid Accuracy: 0.37028 | Valid Loss: 1.07855
[32mValidation Score Improved from 1.07858 to 1.07855
MODEL SAVED![0m
				########## EPOCH [5/10] ##########


(train) : 100%|██████████| 350/350 [03:51<00:00,  1.51it/s, current_lr=0.00029, epoch_loss=1.07987]
(valid) : 100%|██████████| 44/44 [00:27<00:00,  1.62it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.07966
				########## EPOCH [6/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00029, epoch_loss=1.07951]
(valid) : 100%|██████████| 44/44 [00:27<00:00,  1.62it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.07946
				########## EPOCH [7/10] ##########


(train) : 100%|██████████| 350/350 [03:54<00:00,  1.50it/s, current_lr=0.00029, epoch_loss=1.07939]
(valid) : 100%|██████████| 44/44 [00:27<00:00,  1.61it/s]


Valid Accuracy: 0.36700 | Valid Loss: 1.08262
				########## EPOCH [8/10] ##########


(train) : 100%|██████████| 350/350 [03:53<00:00,  1.50it/s, current_lr=0.00028, epoch_loss=1.07931]
(valid) : 100%|██████████| 44/44 [00:27<00:00,  1.61it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.07954
				########## EPOCH [9/10] ##########


(train) : 100%|██████████| 350/350 [03:53<00:00,  1.50it/s, current_lr=0.00028, epoch_loss=1.07915]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.65it/s]


Valid Accuracy: 0.38896 | Valid Loss: 1.08017
				########## EPOCH [10/10] ##########


(train) : 100%|██████████| 350/350 [03:53<00:00,  1.50it/s, current_lr=0.00028, epoch_loss=1.07846]
(valid) : 100%|██████████| 44/44 [00:27<00:00,  1.62it/s]


Valid Accuracy: 0.39038 | Valid Loss: 1.07869


0,1
current_lr,██▇▇▆▅▅▃▂▁
train/accuracy,▂▆▆▅▄▅▃▆▅▆▅▆▆▁▅▄▇█▇▃▆▅▂▅▄▅▇▅▆▅▇▅▅▅▅▇▃▄▅▃
train/auroc,▁▆▃▅▄▄▅▅▄▆█▄▇▅▃▃▅▄▆▅█▅▄▄▅▄▇▆▅▁▃▄▃▃▆▄▄▄▂▃
train/current_lr,███████████▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
train/epoch/loss,█▆▃▅▃▃▃▂▂▁
train/f1,▂▆▆▅▄▅▃▆▅▆▅▆▆▁▅▄▇█▇▃▆▅▂▅▄▅▇▅▆▅▇▅▅▅▅▇▃▄▅▃
train/loss,█▅▄▃▁▁▂▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▂▂▂▁▁▂▂▂▁▂▁▁▁▂▂▂▁▁
train/precision,▂▆▆▅▄▅▃▆▅▆▅▆▆▁▅▄▇█▇▃▆▅▂▅▄▅▇▅▆▅▇▅▅▅▅▇▃▄▅▃
train/recall,▂▆▆▅▄▅▃▆▅▆▅▆▆▁▅▄▇█▇▃▆▅▂▅▄▅▇▅▆▅▇▅▅▅▅▇▃▄▅▃
valid/epoch/accuracy,███▂██▁███

0,1
Best AUROC,0.48309
Best Accuracy,0.37028
Best Epoch,4.0
Best F1 Score,0.37028
Best Loss,1.07855
Best Precision,0.37028
Best Recall,0.37028
current_lr,0.00028
train/accuracy,0.44444
train/auroc,0.625


###############
### Fold [3/5]
###############


Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


				########## EPOCH [1/10] ##########


(train) : 100%|██████████| 350/350 [04:07<00:00,  1.41it/s, current_lr=0.00030, epoch_loss=1.08322]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07834
[32mValidation Score Improved from inf to 1.07834
MODEL SAVED![0m
				########## EPOCH [2/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.48it/s, current_lr=0.00030, epoch_loss=1.08018]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.67it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07880
				########## EPOCH [3/10] ##########


(train) : 100%|██████████| 350/350 [03:54<00:00,  1.50it/s, current_lr=0.00030, epoch_loss=1.08021]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07821
[32mValidation Score Improved from 1.07834 to 1.07821
MODEL SAVED![0m
				########## EPOCH [4/10] ##########


(train) : 100%|██████████| 350/350 [03:56<00:00,  1.48it/s, current_lr=0.00030, epoch_loss=1.07922]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.72it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.08013
				########## EPOCH [5/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00029, epoch_loss=1.07965]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.68it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.08027
				########## EPOCH [6/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00029, epoch_loss=1.07991]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.68it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07874
				########## EPOCH [7/10] ##########


(train) : 100%|██████████| 350/350 [03:54<00:00,  1.49it/s, current_lr=0.00029, epoch_loss=1.07958]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.68it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07829
				########## EPOCH [8/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00028, epoch_loss=1.07901]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.69it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07888
				########## EPOCH [9/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00028, epoch_loss=1.07914]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]


Valid Accuracy: 0.37048 | Valid Loss: 1.07867
				########## EPOCH [10/10] ##########


(train) : 100%|██████████| 350/350 [03:57<00:00,  1.47it/s, current_lr=0.00028, epoch_loss=1.07899]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.39017 | Valid Loss: 1.07843


0,1
current_lr,██▇▇▆▅▅▃▂▁
train/accuracy,▅▄▃▆▅▅▅▅▃▆▆▅▃▅▄▆▃▆▁▅█▄▂▂▆▄▃▄▄▁▄▄▆▅▄▃▂▂▁▄
train/auroc,▆▃▂▄▄▄▂▁▃▆▇▅█▅▃▃▄▄█▄▆▁▅▄▆▅▁▅▃▄▆▃▅▄▃▄▄▁▄▅
train/current_lr,███████████▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
train/epoch/loss,█▃▃▁▂▃▂▁▁▁
train/f1,▅▄▃▆▅▅▅▅▃▆▆▅▃▅▄▆▃▆▁▅█▄▂▂▆▄▃▄▄▁▄▄▆▅▄▃▂▂▁▄
train/loss,▁▆█▅▃▄▅▅▅▄▄▄▄▄▄▄▄▅▅▄▃▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
train/precision,▅▄▃▆▅▅▅▅▃▆▆▅▃▅▄▆▃▆▁▅█▄▂▂▆▄▃▄▄▁▄▄▆▅▄▃▂▂▁▄
train/recall,▅▄▃▆▅▅▅▅▃▆▆▅▃▅▄▆▃▆▁▅█▄▂▂▆▄▃▄▄▁▄▄▆▅▄▃▂▂▁▄
valid/epoch/accuracy,████████▁█

0,1
Best AUROC,0.52006
Best Accuracy,0.39017
Best Epoch,3.0
Best F1 Score,0.39017
Best Loss,1.07821
Best Precision,0.39017
Best Recall,0.39017
current_lr,0.00028
train/accuracy,0.6
train/auroc,0.125


###############
### Fold [4/5]
###############


Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


				########## EPOCH [1/10] ##########


(train) : 100%|██████████| 350/350 [04:08<00:00,  1.41it/s, current_lr=0.00030, epoch_loss=1.08429]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.68it/s]


Valid Accuracy: 0.36790 | Valid Loss: 1.08092
[32mValidation Score Improved from inf to 1.08092
MODEL SAVED![0m
				########## EPOCH [2/10] ##########


(train) : 100%|██████████| 350/350 [03:58<00:00,  1.47it/s, current_lr=0.00030, epoch_loss=1.08080]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.73it/s]


Valid Accuracy: 0.36790 | Valid Loss: 1.07912
[32mValidation Score Improved from 1.08092 to 1.07912
MODEL SAVED![0m
				########## EPOCH [3/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00030, epoch_loss=1.08157]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.67it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07896
[32mValidation Score Improved from 1.07912 to 1.07896
MODEL SAVED![0m
				########## EPOCH [4/10] ##########


(train) : 100%|██████████| 350/350 [03:57<00:00,  1.48it/s, current_lr=0.00030, epoch_loss=1.07922]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07929
				########## EPOCH [5/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00029, epoch_loss=1.07959]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.69it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07965
				########## EPOCH [6/10] ##########


(train) : 100%|██████████| 350/350 [03:56<00:00,  1.48it/s, current_lr=0.00029, epoch_loss=1.07895]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07949
				########## EPOCH [7/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.48it/s, current_lr=0.00029, epoch_loss=1.08039]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.68it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07881
[32mValidation Score Improved from 1.07896 to 1.07881
MODEL SAVED![0m
				########## EPOCH [8/10] ##########


(train) : 100%|██████████| 350/350 [03:57<00:00,  1.48it/s, current_lr=0.00028, epoch_loss=1.07952]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07885
				########## EPOCH [9/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00028, epoch_loss=1.07916]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.70it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07877
[32mValidation Score Improved from 1.07881 to 1.07877
MODEL SAVED![0m
				########## EPOCH [10/10] ##########


(train) : 100%|██████████| 350/350 [03:56<00:00,  1.48it/s, current_lr=0.00028, epoch_loss=1.07920]
(valid) : 100%|██████████| 44/44 [00:25<00:00,  1.72it/s]


Valid Accuracy: 0.39205 | Valid Loss: 1.07918


0,1
current_lr,██▇▇▆▅▅▃▂▁
train/accuracy,▅▄▄▇▅▄▆▄▄▃▂▃▃▃▂▅▆▃▃▄▄▅▆▄▆▄▅▁▄▄▃▄▅▅▄▅█▇▄▇
train/auroc,▆▂▄▆▅▁▃▇▆▅▇▆▇▃█▅▅▇▆▆▄▄▇▅▆▅▃▆▅▃▃▄▆▅██▄▄▄▂
train/current_lr,███████████▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
train/epoch/loss,█▃▄▁▂▁▃▂▁▁
train/f1,▅▄▄▇▅▄▆▄▄▃▂▃▃▃▂▅▆▃▃▄▄▅▆▄▆▄▅▁▄▄▃▄▅▅▄▅█▇▄▇
train/loss,█▁▂▁▂▂▁▃▃▃▃▃▃▃▂▂▃▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▃▃
train/precision,▅▄▄▇▅▄▆▄▄▃▂▃▃▃▂▅▆▃▃▄▄▅▆▄▆▄▅▁▄▄▃▄▅▅▄▅█▇▄▇
train/recall,▅▄▄▇▅▄▆▄▄▃▂▃▃▃▂▅▆▃▃▄▄▅▆▄▆▄▅▁▄▄▃▄▅▅▄▅█▇▄▇
valid/epoch/accuracy,▁▁████████

0,1
Best AUROC,0.48886
Best Accuracy,0.39205
Best Epoch,9.0
Best F1 Score,0.39205
Best Loss,1.07877
Best Precision,0.39205
Best Recall,0.39205
current_lr,0.00028
train/accuracy,0.1
train/auroc,0.74636


###############
### Fold [5/5]
###############


Some weights of the model checkpoint at google/vit-base-patch16-224 were not used when initializing ViTModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.weight', 'vit.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


				########## EPOCH [1/10] ##########


(train) : 100%|██████████| 350/350 [04:08<00:00,  1.41it/s, current_lr=0.00030, epoch_loss=1.08615]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.68it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.08114
[32mValidation Score Improved from inf to 1.08114
MODEL SAVED![0m
				########## EPOCH [2/10] ##########


(train) : 100%|██████████| 350/350 [03:56<00:00,  1.48it/s, current_lr=0.00030, epoch_loss=1.08220]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.08114
				########## EPOCH [3/10] ##########


(train) : 100%|██████████| 350/350 [03:54<00:00,  1.49it/s, current_lr=0.00030, epoch_loss=1.08103]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.69it/s]


Valid Accuracy: 0.37016 | Valid Loss: 1.10939
				########## EPOCH [4/10] ##########


(train) : 100%|██████████| 350/350 [03:58<00:00,  1.47it/s, current_lr=0.00030, epoch_loss=1.08040]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.67it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.07986
[32mValidation Score Improved from 1.08114 to 1.07986
MODEL SAVED![0m
				########## EPOCH [5/10] ##########


(train) : 100%|██████████| 350/350 [03:57<00:00,  1.48it/s, current_lr=0.00029, epoch_loss=1.08049]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.65it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.07871
[32mValidation Score Improved from 1.07986 to 1.07871
MODEL SAVED![0m
				########## EPOCH [6/10] ##########


(train) : 100%|██████████| 350/350 [03:57<00:00,  1.47it/s, current_lr=0.00029, epoch_loss=1.07978]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.07933
				########## EPOCH [7/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.48it/s, current_lr=0.00029, epoch_loss=1.07980]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.67it/s]


Valid Accuracy: 0.37016 | Valid Loss: 1.08148
				########## EPOCH [8/10] ##########


(train) : 100%|██████████| 350/350 [03:56<00:00,  1.48it/s, current_lr=0.00028, epoch_loss=1.08042]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.07958
				########## EPOCH [9/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.49it/s, current_lr=0.00028, epoch_loss=1.07983]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.66it/s]


Valid Accuracy: 0.39011 | Valid Loss: 1.08082
				########## EPOCH [10/10] ##########


(train) : 100%|██████████| 350/350 [03:55<00:00,  1.48it/s, current_lr=0.00028, epoch_loss=1.07965]
(valid) : 100%|██████████| 44/44 [00:26<00:00,  1.69it/s]


Valid Accuracy: 0.38688 | Valid Loss: 1.07946


0,1
current_lr,██▇▇▆▅▅▃▂▁
train/accuracy,▃▇▇▄▆▄▆▆▆▅▆▆▆▁▅▆▅▆▇▄▇▆▃▆▇▆█▆▆█▇▃▆▅▅▆▄▆▇▆
train/auroc,▆▃▇▄▃▂▆▄▄▆▂▁▅▁▄▆█▃▃▆▃▇▄▅▅▂▆▄▅▄▃▃▄▄▁▅▃▆▄▂
train/current_lr,███████████▇▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
train/epoch/loss,█▄▂▂▂▁▁▂▁▁
train/f1,▃▇▇▄▆▄▆▆▆▅▆▆▆▁▅▆▅▆▇▄▇▆▃▆▇▆█▆▆█▇▃▆▅▅▆▄▆▇▆
train/loss,█▄▄▃▃▂▂▂▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/precision,▃▇▇▄▆▄▆▆▆▅▆▆▆▁▅▆▅▆▇▄▇▆▃▆▇▆█▆▆█▇▃▆▅▅▆▄▆▇▆
train/recall,▃▇▇▄▆▄▆▆▆▅▆▆▆▁▅▆▅▆▇▄▇▆▃▆▇▆█▆▆█▇▃▆▅▅▆▄▆▇▆
valid/epoch/accuracy,▇▇▁▇▇▇▁▇█▇

0,1
Best AUROC,0.47933
Best Accuracy,0.38688
Best Epoch,5.0
Best F1 Score,0.38688
Best Loss,1.07871
Best Precision,0.38688
Best Recall,0.38688
current_lr,0.00028
train/accuracy,0.2
train/auroc,0.55778


In [18]:
import shutil
try:
    !rm -rf ./wandb
except:
    pass

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
