In [1]:
!pip install -q medmnist

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for fire (setup.py) ... [?25l[?25hdone


In [2]:
import numpy as np
import pandas as pd
import os
import medmnist
from medmnist import INFO, Evaluator
import torch
from torch.utils.data import Dataset, Subset, DataLoader
from pathlib import Path
import random
import timm
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from torch.nn.utils import clip_grad_norm_
from sklearn.metrics import f1_score, recall_score  # recall_score 추가
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast
from collections import defaultdict

# --- Configuration ---
config = {
    'backbone': 'convnext_tiny.in12k_ft_in1k',
    'pretrained': True,
    'batch_size': 512,
    'num_epochs': 30,
    'lr': 1e-3,
    'optimizer': 'AdamW',
    'weight_decay': 0.05,
    'gradient_clip_norm': 1.0,
    'early_stopping_patience': 7,
    'scheduler': 'onecycle',
    'num_workers': 4,
    'use_amp': True,
    'head_type': 'bottleneck',
    'dropout_rate': 0.2,
    'stochastic_depth_rate': 0.1,
    'use_focal_loss': False,  # Focal Loss 사용 여부
    'focal_loss_gamma': 2.0, # Focal Loss 감마 값
    'early_stopping_target_task': 'breastmnist',  # 조기 종료 기준 작업
    'early_stopping_metric': 'recall',  # 조기 종료 기준 지표 (recall 또는 f1)
}

DEBUG = False  # Set to True for faster debugging (fewer epochs/data)
if DEBUG:
    config['num_epochs'] = 1
    config['batch_size'] = 64

# --- Data Loading ---
def load_medmnist_from_npz(data_flag, debug=False):
    data_path = Path('/kaggle/input/tensor-reloaded-multi-task-med-mnist/data') / f'{data_flag}.npz'
    data = np.load(data_path)
    info = INFO[data_flag]
    n_classes = len(info['label'])

    class NPZDataset(Dataset):
        def __init__(self, images, labels=None):
            self.images = images
            self.labels = labels

        def __len__(self):
            return len(self.images)

        def __getitem__(self, idx):
            image = self.images[idx]
            label = self.labels[idx] if self.labels is not None else None
            return image, label  # Return NumPy arrays

    if debug:
        # Use a smaller subset of the training data for debugging
        train_size = len(data['train_images'])
        indices = list(range(train_size))
        random.seed(42)  # For reproducibility
        debug_size = int(0.1 * train_size)  # Use 10% of the data
        debug_indices = random.sample(indices, debug_size)

        train_images = data['train_images'][debug_indices]
        train_labels = data['train_labels'][debug_indices] if 'train_labels' in data else None
        train_dataset = NPZDataset(train_images, train_labels)
    else:
        train_dataset = NPZDataset(data['train_images'], data.get('train_labels'))

    val_dataset = NPZDataset(data['val_images'], data.get('val_labels'))
    test_dataset = NPZDataset(data['test_images'], data.get('test_labels'))  # Include test labels

    return train_dataset, val_dataset, test_dataset, info

DATASETS = [
    'pathmnist', 'dermamnist', 'octmnist', 'pneumoniamnist', 'retinamnist',
    'breastmnist', 'bloodmnist', 'tissuemnist', 'organamnist', 'organcmnist',
    'organsmnist'
]

def load_all_datasets(debug=False):
    datasets = {}
    for data_flag in DATASETS:
        print(f"Loading {data_flag}...")
        train, val, test, info = load_medmnist_from_npz(data_flag, debug=debug)
        datasets[data_flag] = {
            'train': train,
            'val': val,
            'test': test,
            'info': info
        }
    return datasets

def calculate_class_weights(datasets):
    weights = {}
    for data_flag in DATASETS:
        labels = datasets[data_flag]['train'].labels
        if labels is None:  # Handle cases where labels are missing
            weights[data_flag] = None
            continue
        if isinstance(labels, torch.Tensor):
            labels = labels.numpy()  # Convert to NumPy array if needed
        num_classes = len(datasets[data_flag]['info']['label'])
        class_counts = np.bincount(labels.flatten(), minlength=num_classes)
        total = class_counts.sum()
        raw_weights = total / (class_counts + 1e-6)  # Avoid division by zero
        normalized_weights = raw_weights / raw_weights.mean()  # Normalize
        weights[data_flag] = torch.FloatTensor(normalized_weights)
    return weights

# --- Model Definition ---
class SelfAttention(nn.Module):
    """Simple self-attention layer."""
    def __init__(self, dim, heads=8, dropout=0.0):
        super().__init__()
        self.heads = heads
        self.scale = dim ** -0.5  # Scaled dot-product attention
        self.to_qkv = nn.Linear(dim, dim * 3, bias=False)  # Learnable projections
        self.attend = nn.Softmax(dim=-1)  # Softmax for attention weights
        self.dropout = nn.Dropout(dropout)
        self.to_out = nn.Sequential(nn.Linear(dim, dim), nn.Dropout(dropout))

    def forward(self, x):
        b, n, _, h = *x.shape, self.heads
        qkv = self.to_qkv(x).chunk(3, dim=-1)  # Split into Q, K, V
        q, k, v = map(lambda t: t.reshape(b, n, h, -1).permute(0, 2, 1, 3), qkv)
        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale  # Scaled dot product
        attn = self.attend(dots)
        attn = self.dropout(attn)
        out = torch.matmul(attn, v)  # Weighted sum of values
        out = out.permute(0, 2, 1, 3).reshape(b, n, -1)  # Concatenate heads
        return self.to_out(out)

class MedMNISTMultiTaskModel(nn.Module):
    def __init__(self, backbone_name, pretrained, head_type, dropout_rate, stochastic_depth_rate):
        super().__init__()
        self.task_outputs = {d: len(INFO[d]['label']) for d in DATASETS}
        self.backbone = timm.create_model(
            backbone_name, pretrained=pretrained, num_classes=0, drop_path_rate=stochastic_depth_rate
        )
        # Adapt the input layer to handle potential grayscale images:
        self.backbone.stem[0] = nn.Conv2d(3, self.backbone.stem[0].out_channels, kernel_size=3, stride=1, padding=1)
        feat_dim = self.backbone.num_features  # Get feature dimension from backbone

        self.heads = nn.ModuleDict()
        for task, num_classes in self.task_outputs.items():
            if head_type == 'simple':
                head = nn.Sequential(
                    nn.LayerNorm(feat_dim),
                    nn.Linear(feat_dim, num_classes)
                )
            elif head_type == 'bottleneck':
                head = nn.Sequential(
                    nn.LayerNorm(feat_dim),
                    nn.Linear(feat_dim, feat_dim // 4),
                    nn.GELU(),
                    nn.Dropout(dropout_rate),
                    nn.Linear(feat_dim // 4, num_classes)
                )
            elif head_type == 'attention':
                head = nn.Sequential(
                    nn.LayerNorm(feat_dim),
                    nn.Linear(feat_dim, feat_dim//2),
                    nn.GELU(),
                    SelfAttention(feat_dim//2, heads=4, dropout=dropout_rate),
                    nn.Linear(feat_dim // 2, num_classes)
                )
            else:
                raise ValueError(f"Invalid head_type: {head_type}")
            self.heads[task] = head

    def forward(self, x, task_ids=None):
        features = self.backbone(x)
        if task_ids is not None:  # Training/validation: task-specific outputs
            outputs = torch.zeros(len(task_ids), max(self.task_outputs.values())).to(features.device)
            for i, task_id in enumerate(task_ids):
                task_name = DATASETS[task_id]
                task_output = self.heads[task_name](features[i:i+1])
                outputs[i, :self.task_outputs[task_name]] = task_output.squeeze(0)
            return outputs
        else:  # Potentially for inference/feature extraction
            return {task: head(features) for task, head in self.heads.items()}

# --- Dataset ---
class MedMNISTMultiDataset(Dataset):
    def __init__(self, datasets, split='train', transform=None):
        super().__init__()
        self.datasets = datasets
        self.split = split
        self.transform = transform
        self.dataset_indices = []
        # Create a combined list of (dataset_index, sample_index)
        for dataset_idx, (name, dataset_dict) in enumerate(datasets.items()):
            dataset = dataset_dict[split]
            n_samples = len(dataset)
            self.dataset_indices.extend([(dataset_idx, i) for i in range(n_samples)])

    def __len__(self):
        return len(self.dataset_indices)

    def __getitem__(self, idx):
        dataset_idx, sample_idx = self.dataset_indices[idx]
        dataset_name = DATASETS[dataset_idx]
        dataset = self.datasets[dataset_name][self.split]
        image, label = dataset[sample_idx]  # Get NumPy arrays

        # Convert NumPy array to PIL Image
        if isinstance(image, np.ndarray):
            # Handle both grayscale (H, W) and color (H, W, C) images
            if image.ndim == 2:  # Grayscale
                image = transforms.functional.to_pil_image(image, mode='L') # Explicitly specify mode
            elif image.ndim == 3:
                 # Transpose if channels are first
                if image.shape[0] in [1, 3]:
                    image = image.transpose(1, 2, 0)
                image = transforms.functional.to_pil_image(image)


        if self.transform:
            image = self.transform(image)  # Apply transforms

        # Convert label to tensor if it exists
        if label is not None:
            label = torch.tensor(label, dtype=torch.long)
        else:
            label = torch.tensor(-1, dtype=torch.long)  # Dummy label for test set

        return image, label, torch.tensor(dataset_idx, dtype=torch.long)  # Return task ID

# --- Trainer ---
class Trainer:
    def __init__(self, model, train_dataset, val_dataset, config, device='cuda'):
        self.model = model.to(device)
        self.device = device
        self.config = config
        self.scaler = GradScaler(enabled=config['use_amp']) # Use standard GradScaler
        # 조기 종료 기준 관련 변수 초기화
        if self.config['early_stopping_metric'] == 'recall':
          self.best_val_metric = 0.0
        elif self.config["early_stopping_metric"] == "f1":
          self.best_val_metric = 0.0  # Assuming higher is better (e.g., F1 score)
        self.early_stopping_counter = 0
        self.train_loader = DataLoader(
            train_dataset, batch_size=config['batch_size'], shuffle=True,
            num_workers=config['num_workers'], pin_memory=True, persistent_workers=True
        )
        self.val_loader = DataLoader(
            val_dataset, batch_size=config['batch_size'], shuffle=False,
            num_workers=config['num_workers'], pin_memory=True, persistent_workers=True
        )
        self.class_weights = calculate_class_weights(datasets)

        # Optimizer
        if config['optimizer'] == 'AdamW':
            self.optimizer = AdamW(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
        else:
            raise ValueError(f"Invalid optimizer: {config['optimizer']}")

        # Scheduler
        if config['scheduler'] == 'onecycle':
            self.scheduler = OneCycleLR(
                self.optimizer, max_lr=config['lr'], epochs=config['num_epochs'],
                steps_per_epoch=len(self.train_loader)
            )
        else:
            raise ValueError(f"Invalid scheduler: {config['scheduler']}")

        # Loss function (Focal Loss or CrossEntropyLoss)
        if config['use_focal_loss']:
            self.criterion = self.focal_loss  # Use the defined focal_loss method
        else:
            self.criterion = torch.nn.CrossEntropyLoss() # Default criterion

    # Focal Loss implementation (as a method within Trainer)
    def focal_loss(self, outputs, labels, class_weights=None):
        gamma = self.config['focal_loss_gamma']
        ce_loss = F.cross_entropy(outputs, labels, reduction='none', weight=class_weights)
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt)**gamma * ce_loss
        return focal_loss.mean()

    def train_epoch(self):
        self.model.train()
        total_loss = 0
        task_predictions = {task: {'preds': [], 'targets': []} for task in DATASETS}
        pbar = tqdm(self.train_loader, desc='Training', dynamic_ncols=True)

        for images, labels, task_ids in pbar:
            images, labels, task_ids = images.to(self.device), labels.to(self.device), task_ids.to(self.device)

            with autocast(enabled=self.config['use_amp']):
                outputs = self.model(images, task_ids)
                losses = []
                for i, (output, label, task_id) in enumerate(zip(outputs, labels, task_ids)):
                    task_name = DATASETS[task_id]
                    num_classes = self.model.task_outputs[task_name]
                    task_output = output[:num_classes].unsqueeze(0)  # Ensure correct shape
                    task_label = label.view(-1) # Ensure correct shape

                    class_weight = self.class_weights.get(task_name)
                    if class_weight is not None:
                        class_weight = class_weight.to(self.device)

                    # Use Focal Loss if enabled, otherwise use CrossEntropyLoss
                    if self.config['use_focal_loss']:
                      loss = self.focal_loss(task_output, task_label, class_weight)
                    else:
                      loss_fn = torch.nn.CrossEntropyLoss(weight=class_weight)
                      loss = loss_fn(task_output, task_label)

                    losses.append(loss)

                loss = torch.stack(losses).mean() # Use mean for multi-task loss

            self.optimizer.zero_grad()
            self.scaler.scale(loss).backward()
            clip_grad_norm_(self.model.parameters(), self.config['gradient_clip_norm'])
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.scheduler.step()

            total_loss += loss.item()
            pbar.set_postfix({'loss': total_loss / (pbar.n + 1)})

            for i, (output, label, task_id) in enumerate(zip(outputs, labels, task_ids)):
                task_name = DATASETS[task_id]
                num_classes = self.model.task_outputs[task_name]
                pred = output[:num_classes].argmax(dim=0).cpu().item()
                target = label.item()
                task_predictions[task_name]['preds'].append(pred)
                task_predictions[task_name]['targets'].append(target)

        # Calculate F1 scores for each task
        task_f1_scores = {
            task: f1_score(task_predictions[task]['targets'], task_predictions[task]['preds'], average='macro')
            if len(task_predictions[task]['preds']) > 0 else 0.0 for task in DATASETS
        }
        return total_loss / len(self.train_loader), task_f1_scores

    @torch.no_grad()
    def validate(self):
        self.model.eval()
        total_loss = 0
        task_predictions = {task: {'preds': [], 'targets': []} for task in DATASETS}

        pbar = tqdm(self.val_loader, desc='Validating', dynamic_ncols=True)
        for images, labels, task_ids in pbar:
            images, labels, task_ids = images.to(self.device), labels.to(self.device), task_ids.to(self.device)
            labels = labels.view(-1).long()  # Flatten labels
            outputs = self.model(images, task_ids)
            losses = []

            for task_name in set(DATASETS[tid.item()] for tid in task_ids):  # Iterate through unique tasks
                task_mask = torch.tensor([DATASETS[tid.item()] == task_name for tid in task_ids], device=self.device)
                if not task_mask.any():  # Skip if no samples for this task in batch
                    continue
                task_outputs = outputs[task_mask]
                task_labels = labels[task_mask]
                n_classes = self.model.task_outputs[task_name]

                class_weight = self.class_weights.get(task_name)  # Use .get()
                if class_weight is not None:
                    class_weight = class_weight.to(self.device)

                # Use Focal Loss if enabled
                if self.config['use_focal_loss']:
                    task_loss = self.focal_loss(task_outputs[:, :n_classes], task_labels, class_weight)
                else:
                    loss_fn = torch.nn.CrossEntropyLoss(weight=class_weight)
                    task_loss = loss_fn(task_outputs[:, :n_classes], task_labels)

                losses.append(task_loss)

            loss = torch.stack(losses).mean() # Use mean for multi-task loss
            total_loss += loss.item()

            for i, (task_id, label) in enumerate(zip(task_ids, labels)):
                task_name = DATASETS[task_id.item()]
                n_classes = self.model.task_outputs[task_name]
                pred = outputs[i, :n_classes].argmax(dim=0).cpu().item()
                task_predictions[task_name]['preds'].append(pred)
                task_predictions[task_name]['targets'].append(label.cpu().item())

        # Calculate F1 scores and Recall for each task.
        task_f1_scores = {}
        task_recall_scores = {} # recall 추가
        for task in DATASETS:
            if len(task_predictions[task]['preds']) > 0:
                task_f1_scores[task] = f1_score(task_predictions[task]['targets'], task_predictions[task]['preds'], average='macro')
                task_recall_scores[task] = recall_score(task_predictions[task]['targets'], task_predictions[task]['preds'], average='macro') # recall 계산
            else:
                task_f1_scores[task] = 0.0
                task_recall_scores[task] = 0.0

        # 조기 종료 metric 계산
        if self.config['early_stopping_metric'] == 'recall':
          target_task = self.config['early_stopping_target_task']
          val_metric = task_recall_scores[target_task] if target_task in task_recall_scores else 0.0

        elif self.config['early_stopping_metric'] == 'f1':
            # Compute the *harmonic mean* of the F1 scores.
            val_f1_values = list(task_f1_scores.values())
            val_metric = len(val_f1_values) / sum(1 / f1 if f1 > 0 else 1e-6 for f1 in val_f1_values)

        return total_loss / len(self.val_loader), val_metric, task_f1_scores, task_recall_scores  # Return individual task scores


    def train(self):
        for epoch in range(self.config['num_epochs']):
            print(f"\nEpoch {epoch+1}/{self.config['num_epochs']}")
            train_loss, train_f1_scores = self.train_epoch()
            val_loss, val_metric, val_f1_scores, val_recall_scores = self.validate()

            # Calculate harmonic mean of training F1 scores
            train_f1_values = list(train_f1_scores.values())
            train_f1_mean = len(train_f1_values) / sum(1 / f1 if f1 > 0 else 1e-6 for f1 in train_f1_values)

            print(f"Train Loss: {train_loss:.4f} | Train F1 (Harmonic): {train_f1_mean:.4f}")
            print(f"Val Loss:   {val_loss:.4f} | Val Metric ({self.config['early_stopping_metric']}):   {val_metric:.4f}")

            # Print individual task F1 scores and recall scores
            for task in DATASETS:
                if task in val_f1_scores:
                    print(f"  - {task}: F1 = {val_f1_scores[task]:.4f}, Recall = {val_recall_scores[task]:.4f}")  # recall 출력

            if val_metric > self.best_val_metric:
                self.best_val_metric = val_metric
                self.early_stopping_counter = 0
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'scheduler_state_dict': self.scheduler.state_dict(),
                    'best_metric': self.best_val_metric,  # best_f1 대신 best_metric
                    'config': self.config,
                }, 'best_model.pth')
                print("Saved best model.")
            else:
                self.early_stopping_counter += 1
                print(f"Early stopping counter: {self.early_stopping_counter}/{self.config['early_stopping_patience']}")
                if self.early_stopping_counter >= self.config['early_stopping_patience']:
                    print("Early stopping triggered.")
                    break

# --- Data Augmentation ---
# Data augmentation transforms: consistent 3-channel input
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(size=(28, 28), scale=(0.8, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomRotation(degrees=(-15, 15)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

val_transforms = transforms.Compose([
    # For validation/test, we usually just resize and normalize
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# --- Load Data, Create Datasets, Model, and Trainer ---
datasets = load_all_datasets(debug=DEBUG)

# Add Grayscale transform *before* ToTensor and Normalize
def get_transforms(train=True):
  transforms_list = []
  if train:
    transforms_list.extend([
        transforms.RandomResizedCrop(size=(28, 28), scale=(0.8, 1.0), ratio=(0.9, 1.1)),
        transforms.RandomRotation(degrees=(-15, 15)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.2),
    ])
  # else:  # No additional transforms needed for val/test besides Grayscale and Normalize
      # transforms_list.extend([])
  transforms_list.extend([
    transforms.Grayscale(num_output_channels=3),  # Convert to 3 channels *BEFORE* normalization
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
  ])
  return transforms.Compose(transforms_list)


train_dataset = MedMNISTMultiDataset(datasets, split='train', transform=get_transforms(train=True))
val_dataset = MedMNISTMultiDataset(datasets, split='val', transform=get_transforms(train=False))
test_dataset = MedMNISTMultiDataset(datasets, split='test', transform=get_transforms(train=False))

model = MedMNISTMultiTaskModel(
    backbone_name=config['backbone'],
    pretrained=config['pretrained'],
    head_type=config['head_type'],
    dropout_rate=config['dropout_rate'],
    stochastic_depth_rate=config['stochastic_depth_rate']
)
trainer = Trainer(model, train_dataset, val_dataset, config)

# --- Train the Model ---
trainer.train()

# --- Submission Generation (Example - Adapt as Needed) ---
def load_best_model(checkpoint_path, model, device='cuda'):
    try:
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        best_metric = checkpoint['best_metric'] # best_f1 대신 best_metric
        print(f"Loaded best model from epoch {checkpoint['epoch']} with metric: {best_metric:.4f}") # metric 종류 출력
        return model, best_metric
    except FileNotFoundError:
        print(f"No checkpoint found at {checkpoint_path}")
        return None, 0.0  # Or handle the error as appropriate
    except Exception as e:
        print(f"Error loading checkpoint: {str(e)}")
        return None, 0.0
def create_submission(model, test_dataset, config, device='cuda'):
    if model is None:  # Check if the model is loaded
        print("Cannot create submission: No model loaded.")
        return None

    model.eval()
    all_predictions = []
    test_loader = DataLoader(
        test_dataset, batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True, persistent_workers=True
    )
    task_counters = {task: 0 for task in DATASETS}
    global_id = 0 # global id

    with torch.no_grad(), autocast(enabled=config['use_amp']):
        for batch in tqdm(test_loader, desc='Generating predictions', dynamic_ncols=True):
            images, _, task_ids = batch  # Unpack the batch, ignore labels
            images, task_ids = images.to(device, non_blocking=True), task_ids.to(device, non_blocking=True)
            unique_tasks = torch.unique(task_ids)

            for task_idx in unique_tasks:
                task_name = DATASETS[task_idx]
                # Filter the batch to only include samples from the current task
                mask = task_ids == task_idx
                if mask.any():  # Ensure there are samples for this task
                  task_images = images[mask]
                  features = model.backbone(task_images)
                  outputs = model.heads[task_name](features)
                  preds = outputs.argmax(dim=1).cpu().numpy()
                  n_preds = len(preds)

                  task_start_idx = task_counters[task_name]

                  # Create a list of dictionaries for this batch
                  batch_predictions = [
                      {
                          'id': global_id + i,
                          'label': int(pred),  # Convert to int
                          'task_name': task_name,  # Store task name
                          'id_image_in_task': task_start_idx + i,
                      }
                      for i, pred in enumerate(preds)
                  ]
                  all_predictions.extend(batch_predictions)
                  task_counters[task_name] += n_preds
                  global_id += n_preds


    # Convert to DataFrame and save
    df = pd.DataFrame(all_predictions)
    df = df[['id', 'label', 'task_name', 'id_image_in_task']]  # Ensure correct column order
    df.to_csv('submission.csv', index=False)
    print(f"\nSubmission saved with {len(df)} total predictions")
    return df
# --- Load Best Model and Create Submission ---

model, best_metric = load_best_model('best_model.pth', model)  # best_f1 대신 best_metric
submission_df = create_submission(model, test_dataset, config)

if submission_df is not None:
  print(submission_df.head())

Loading pathmnist...
Loading dermamnist...
Loading octmnist...
Loading pneumoniamnist...
Loading retinamnist...
Loading breastmnist...
Loading bloodmnist...
Loading tissuemnist...
Loading organamnist...
Loading organcmnist...
Loading organsmnist...


model.safetensors:   0%|          | 0.00/114M [00:00<?, ?B/s]

  self.scaler = GradScaler(enabled=config['use_amp']) # Use standard GradScaler



Epoch 1/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:17<00:00,  1.14s/it, loss=1.32]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 1.3166 | Train F1 (Harmonic): 0.2892
Val Loss:   1.2339 | Val Metric (recall):   0.5238
  - pathmnist: F1 = 0.6693, Recall = 0.6724
  - dermamnist: F1 = 0.1390, Recall = 0.1565
  - octmnist: F1 = 0.6249, Recall = 0.6135
  - pneumoniamnist: F1 = 0.8515, Recall = 0.8958
  - retinamnist: F1 = 0.2266, Recall = 0.2504
  - breastmnist: F1 = 0.4708, Recall = 0.5238
  - bloodmnist: F1 = 0.6050, Recall = 0.6398
  - tissuemnist: F1 = 0.3839, Recall = 0.3816
  - organamnist: F1 = 0.7826, Recall = 0.8020
  - organcmnist: F1 = 0.6326, Recall = 0.6790
  - organsmnist: F1 = 0.4250, Recall = 0.4812
Saved best model.

Epoch 2/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:25<00:00,  1.15s/it, loss=0.886]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.8856 | Train F1 (Harmonic): 0.3983
Val Loss:   0.9663 | Val Metric (recall):   0.5000
  - pathmnist: F1 = 0.7587, Recall = 0.7516
  - dermamnist: F1 = 0.1813, Recall = 0.1906
  - octmnist: F1 = 0.7336, Recall = 0.7209
  - pneumoniamnist: F1 = 0.9190, Recall = 0.9394
  - retinamnist: F1 = 0.1497, Recall = 0.2106
  - breastmnist: F1 = 0.4222, Recall = 0.5000
  - bloodmnist: F1 = 0.7610, Recall = 0.7534
  - tissuemnist: F1 = 0.4654, Recall = 0.4481
  - organamnist: F1 = 0.9320, Recall = 0.9253
  - organcmnist: F1 = 0.8289, Recall = 0.8490
  - organsmnist: F1 = 0.6194, Recall = 0.6602
Early stopping counter: 1/7

Epoch 3/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:22<00:00,  1.14s/it, loss=0.75]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.7502 | Train F1 (Harmonic): 0.4664
Val Loss:   0.8516 | Val Metric (recall):   0.5238
  - pathmnist: F1 = 0.8369, Recall = 0.8403
  - dermamnist: F1 = 0.2782, Recall = 0.2695
  - octmnist: F1 = 0.7655, Recall = 0.7521
  - pneumoniamnist: F1 = 0.8035, Recall = 0.8780
  - retinamnist: F1 = 0.2305, Recall = 0.2670
  - breastmnist: F1 = 0.4708, Recall = 0.5238
  - bloodmnist: F1 = 0.7644, Recall = 0.7618
  - tissuemnist: F1 = 0.5098, Recall = 0.5072
  - organamnist: F1 = 0.9819, Recall = 0.9814
  - organcmnist: F1 = 0.9143, Recall = 0.9173
  - organsmnist: F1 = 0.7233, Recall = 0.7386
Early stopping counter: 2/7

Epoch 4/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:23<00:00,  1.15s/it, loss=0.728]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.7275 | Train F1 (Harmonic): 0.5118
Val Loss:   0.8108 | Val Metric (recall):   0.6341
  - pathmnist: F1 = 0.8299, Recall = 0.8323
  - dermamnist: F1 = 0.2374, Recall = 0.2400
  - octmnist: F1 = 0.7780, Recall = 0.7662
  - pneumoniamnist: F1 = 0.9272, Recall = 0.9108
  - retinamnist: F1 = 0.1903, Recall = 0.2264
  - breastmnist: F1 = 0.6518, Recall = 0.6341
  - bloodmnist: F1 = 0.8549, Recall = 0.8517
  - tissuemnist: F1 = 0.5258, Recall = 0.5175
  - organamnist: F1 = 0.9772, Recall = 0.9772
  - organcmnist: F1 = 0.9510, Recall = 0.9523
  - organsmnist: F1 = 0.7692, Recall = 0.7747
Saved best model.

Epoch 5/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:26<00:00,  1.15s/it, loss=0.688]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.6878 | Train F1 (Harmonic): 0.5310
Val Loss:   0.8034 | Val Metric (recall):   0.5238
  - pathmnist: F1 = 0.8300, Recall = 0.8301
  - dermamnist: F1 = 0.2922, Recall = 0.2783
  - octmnist: F1 = 0.7789, Recall = 0.7553
  - pneumoniamnist: F1 = 0.8988, Recall = 0.9362
  - retinamnist: F1 = 0.1233, Recall = 0.1963
  - breastmnist: F1 = 0.4708, Recall = 0.5238
  - bloodmnist: F1 = 0.8113, Recall = 0.8007
  - tissuemnist: F1 = 0.5292, Recall = 0.5258
  - organamnist: F1 = 0.9856, Recall = 0.9840
  - organcmnist: F1 = 0.9712, Recall = 0.9742
  - organsmnist: F1 = 0.7944, Recall = 0.8053
Early stopping counter: 1/7

Epoch 6/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:18<00:00,  1.14s/it, loss=0.67]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.47it/s]


Train Loss: 0.6703 | Train F1 (Harmonic): 0.5558
Val Loss:   0.9778 | Val Metric (recall):   0.6429
  - pathmnist: F1 = 0.8294, Recall = 0.8340
  - dermamnist: F1 = 0.3122, Recall = 0.2725
  - octmnist: F1 = 0.6647, Recall = 0.6389
  - pneumoniamnist: F1 = 0.9395, Recall = 0.9534
  - retinamnist: F1 = 0.1241, Recall = 0.2000
  - breastmnist: F1 = 0.6641, Recall = 0.6429
  - bloodmnist: F1 = 0.8053, Recall = 0.8145
  - tissuemnist: F1 = 0.4603, Recall = 0.4557
  - organamnist: F1 = 0.9698, Recall = 0.9736
  - organcmnist: F1 = 0.9317, Recall = 0.9372
  - organsmnist: F1 = 0.7804, Recall = 0.8104
Saved best model.

Epoch 7/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:20<00:00,  1.14s/it, loss=0.664]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.6644 | Train F1 (Harmonic): 0.5510
Val Loss:   0.7732 | Val Metric (recall):   0.7569
  - pathmnist: F1 = 0.8453, Recall = 0.8453
  - dermamnist: F1 = 0.2878, Recall = 0.2896
  - octmnist: F1 = 0.7631, Recall = 0.7336
  - pneumoniamnist: F1 = 0.9389, Recall = 0.9485
  - retinamnist: F1 = 0.2425, Recall = 0.2673
  - breastmnist: F1 = 0.7647, Recall = 0.7569
  - bloodmnist: F1 = 0.8491, Recall = 0.8436
  - tissuemnist: F1 = 0.5306, Recall = 0.5240
  - organamnist: F1 = 0.9846, Recall = 0.9851
  - organcmnist: F1 = 0.9670, Recall = 0.9661
  - organsmnist: F1 = 0.8278, Recall = 0.8228
Saved best model.

Epoch 8/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:51<00:00,  1.18s/it, loss=0.654]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.45it/s]


Train Loss: 0.6543 | Train F1 (Harmonic): 0.5461
Val Loss:   0.7842 | Val Metric (recall):   0.6817
  - pathmnist: F1 = 0.8500, Recall = 0.8461
  - dermamnist: F1 = 0.2886, Recall = 0.2794
  - octmnist: F1 = 0.7689, Recall = 0.7458
  - pneumoniamnist: F1 = 0.9073, Recall = 0.9413
  - retinamnist: F1 = 0.2093, Recall = 0.2422
  - breastmnist: F1 = 0.7111, Recall = 0.6817
  - bloodmnist: F1 = 0.8597, Recall = 0.8718
  - tissuemnist: F1 = 0.5171, Recall = 0.5313
  - organamnist: F1 = 0.9777, Recall = 0.9762
  - organcmnist: F1 = 0.9550, Recall = 0.9562
  - organsmnist: F1 = 0.7929, Recall = 0.8206
Early stopping counter: 1/7

Epoch 9/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:48<00:00,  1.17s/it, loss=0.639]
Validating: 100%|██████████| 116/116 [01:20<00:00,  1.45it/s]


Train Loss: 0.6395 | Train F1 (Harmonic): 0.5574
Val Loss:   0.8288 | Val Metric (recall):   0.7419
  - pathmnist: F1 = 0.8709, Recall = 0.8681
  - dermamnist: F1 = 0.2553, Recall = 0.2346
  - octmnist: F1 = 0.7307, Recall = 0.7176
  - pneumoniamnist: F1 = 0.9321, Recall = 0.9447
  - retinamnist: F1 = 0.2387, Recall = 0.2580
  - breastmnist: F1 = 0.7565, Recall = 0.7419
  - bloodmnist: F1 = 0.8800, Recall = 0.8686
  - tissuemnist: F1 = 0.5165, Recall = 0.5077
  - organamnist: F1 = 0.9905, Recall = 0.9899
  - organcmnist: F1 = 0.9670, Recall = 0.9689
  - organsmnist: F1 = 0.7980, Recall = 0.8192
Early stopping counter: 2/7

Epoch 10/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:35<00:00,  1.16s/it, loss=0.627]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.6269 | Train F1 (Harmonic): 0.5682
Val Loss:   0.7869 | Val Metric (recall):   0.6579
  - pathmnist: F1 = 0.8235, Recall = 0.8233
  - dermamnist: F1 = 0.2970, Recall = 0.3011
  - octmnist: F1 = 0.8158, Recall = 0.7933
  - pneumoniamnist: F1 = 0.9042, Recall = 0.9460
  - retinamnist: F1 = 0.3056, Recall = 0.3521
  - breastmnist: F1 = 0.6823, Recall = 0.6579
  - bloodmnist: F1 = 0.8695, Recall = 0.8838
  - tissuemnist: F1 = 0.5110, Recall = 0.5024
  - organamnist: F1 = 0.9902, Recall = 0.9900
  - organcmnist: F1 = 0.9696, Recall = 0.9682
  - organsmnist: F1 = 0.8214, Recall = 0.8266
Early stopping counter: 3/7

Epoch 11/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:26<00:00,  1.15s/it, loss=0.592]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.5918 | Train F1 (Harmonic): 0.6002
Val Loss:   0.7161 | Val Metric (recall):   0.6905
  - pathmnist: F1 = 0.8821, Recall = 0.8849
  - dermamnist: F1 = 0.3387, Recall = 0.3168
  - octmnist: F1 = 0.8317, Recall = 0.8093
  - pneumoniamnist: F1 = 0.9250, Recall = 0.9540
  - retinamnist: F1 = 0.2252, Recall = 0.3144
  - breastmnist: F1 = 0.7247, Recall = 0.6905
  - bloodmnist: F1 = 0.8931, Recall = 0.8982
  - tissuemnist: F1 = 0.5650, Recall = 0.5495
  - organamnist: F1 = 0.9842, Recall = 0.9843
  - organcmnist: F1 = 0.9685, Recall = 0.9697
  - organsmnist: F1 = 0.8099, Recall = 0.8351
Early stopping counter: 4/7

Epoch 12/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:24<00:00,  1.15s/it, loss=0.597]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.47it/s]


Train Loss: 0.5972 | Train F1 (Harmonic): 0.6062
Val Loss:   0.7086 | Val Metric (recall):   0.7293
  - pathmnist: F1 = 0.8510, Recall = 0.8499
  - dermamnist: F1 = 0.2997, Recall = 0.2678
  - octmnist: F1 = 0.8106, Recall = 0.7849
  - pneumoniamnist: F1 = 0.9247, Recall = 0.9516
  - retinamnist: F1 = 0.2611, Recall = 0.2831
  - breastmnist: F1 = 0.7641, Recall = 0.7293
  - bloodmnist: F1 = 0.8489, Recall = 0.8286
  - tissuemnist: F1 = 0.5824, Recall = 0.5870
  - organamnist: F1 = 0.9854, Recall = 0.9862
  - organcmnist: F1 = 0.9762, Recall = 0.9746
  - organsmnist: F1 = 0.7911, Recall = 0.8168
Early stopping counter: 5/7

Epoch 13/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:22<00:00,  1.14s/it, loss=0.577]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.5769 | Train F1 (Harmonic): 0.6027
Val Loss:   0.7019 | Val Metric (recall):   0.7444
  - pathmnist: F1 = 0.8824, Recall = 0.8845
  - dermamnist: F1 = 0.3665, Recall = 0.3236
  - octmnist: F1 = 0.8248, Recall = 0.8434
  - pneumoniamnist: F1 = 0.9625, Recall = 0.9614
  - retinamnist: F1 = 0.3398, Recall = 0.3481
  - breastmnist: F1 = 0.7743, Recall = 0.7444
  - bloodmnist: F1 = 0.9081, Recall = 0.9052
  - tissuemnist: F1 = 0.5275, Recall = 0.5384
  - organamnist: F1 = 0.9860, Recall = 0.9867
  - organcmnist: F1 = 0.9716, Recall = 0.9735
  - organsmnist: F1 = 0.8399, Recall = 0.8477
Early stopping counter: 6/7

Epoch 14/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:24<00:00,  1.15s/it, loss=0.562]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.5621 | Train F1 (Harmonic): 0.6207
Val Loss:   0.6635 | Val Metric (recall):   0.7682
  - pathmnist: F1 = 0.8839, Recall = 0.8838
  - dermamnist: F1 = 0.3542, Recall = 0.3433
  - octmnist: F1 = 0.8443, Recall = 0.8351
  - pneumoniamnist: F1 = 0.9515, Recall = 0.9646
  - retinamnist: F1 = 0.2771, Recall = 0.2932
  - breastmnist: F1 = 0.7974, Recall = 0.7682
  - bloodmnist: F1 = 0.9098, Recall = 0.9164
  - tissuemnist: F1 = 0.5773, Recall = 0.5763
  - organamnist: F1 = 0.9869, Recall = 0.9856
  - organcmnist: F1 = 0.9792, Recall = 0.9808
  - organsmnist: F1 = 0.8031, Recall = 0.8281
Saved best model.

Epoch 15/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:22<00:00,  1.14s/it, loss=0.544]
Validating: 100%|██████████| 116/116 [01:18<00:00,  1.47it/s]


Train Loss: 0.5443 | Train F1 (Harmonic): 0.6231
Val Loss:   0.7663 | Val Metric (recall):   0.8158
  - pathmnist: F1 = 0.8828, Recall = 0.8835
  - dermamnist: F1 = 0.3283, Recall = 0.2979
  - octmnist: F1 = 0.8209, Recall = 0.7876
  - pneumoniamnist: F1 = 0.8496, Recall = 0.9113
  - retinamnist: F1 = 0.3198, Recall = 0.3230
  - breastmnist: F1 = 0.8406, Recall = 0.8158
  - bloodmnist: F1 = 0.9125, Recall = 0.9094
  - tissuemnist: F1 = 0.5240, Recall = 0.5198
  - organamnist: F1 = 0.9889, Recall = 0.9897
  - organcmnist: F1 = 0.9698, Recall = 0.9719
  - organsmnist: F1 = 0.8409, Recall = 0.8455
Saved best model.

Epoch 16/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:30<00:00,  1.15s/it, loss=0.504]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.5038 | Train F1 (Harmonic): 0.6544
Val Loss:   0.6450 | Val Metric (recall):   0.7920
  - pathmnist: F1 = 0.9029, Recall = 0.9012
  - dermamnist: F1 = 0.3785, Recall = 0.3610
  - octmnist: F1 = 0.8591, Recall = 0.8341
  - pneumoniamnist: F1 = 0.9612, Recall = 0.9746
  - retinamnist: F1 = 0.3169, Recall = 0.3308
  - breastmnist: F1 = 0.8194, Recall = 0.7920
  - bloodmnist: F1 = 0.9239, Recall = 0.9241
  - tissuemnist: F1 = 0.6004, Recall = 0.5923
  - organamnist: F1 = 0.9853, Recall = 0.9848
  - organcmnist: F1 = 0.9809, Recall = 0.9813
  - organsmnist: F1 = 0.8342, Recall = 0.8586
Early stopping counter: 1/7

Epoch 17/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:27<00:00,  1.15s/it, loss=0.477]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.4773 | Train F1 (Harmonic): 0.6709
Val Loss:   0.6713 | Val Metric (recall):   0.6579
  - pathmnist: F1 = 0.9159, Recall = 0.9168
  - dermamnist: F1 = 0.3429, Recall = 0.3321
  - octmnist: F1 = 0.8572, Recall = 0.8336
  - pneumoniamnist: F1 = 0.9682, Recall = 0.9785
  - retinamnist: F1 = 0.3154, Recall = 0.3142
  - breastmnist: F1 = 0.6823, Recall = 0.6579
  - bloodmnist: F1 = 0.9271, Recall = 0.9240
  - tissuemnist: F1 = 0.5667, Recall = 0.5573
  - organamnist: F1 = 0.9878, Recall = 0.9873
  - organcmnist: F1 = 0.9838, Recall = 0.9836
  - organsmnist: F1 = 0.8503, Recall = 0.8587
Early stopping counter: 2/7

Epoch 18/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:46<00:00,  1.17s/it, loss=0.465]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.4647 | Train F1 (Harmonic): 0.7025
Val Loss:   0.6295 | Val Metric (recall):   0.8221
  - pathmnist: F1 = 0.9058, Recall = 0.9056
  - dermamnist: F1 = 0.4125, Recall = 0.3813
  - octmnist: F1 = 0.8569, Recall = 0.8345
  - pneumoniamnist: F1 = 0.9632, Recall = 0.9710
  - retinamnist: F1 = 0.3106, Recall = 0.3165
  - breastmnist: F1 = 0.8319, Recall = 0.8221
  - bloodmnist: F1 = 0.9168, Recall = 0.9205
  - tissuemnist: F1 = 0.5941, Recall = 0.5926
  - organamnist: F1 = 0.9859, Recall = 0.9856
  - organcmnist: F1 = 0.9787, Recall = 0.9783
  - organsmnist: F1 = 0.8251, Recall = 0.8521
Saved best model.

Epoch 19/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:27<00:00,  1.15s/it, loss=0.456]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.4556 | Train F1 (Harmonic): 0.7043
Val Loss:   0.6199 | Val Metric (recall):   0.7682
  - pathmnist: F1 = 0.9003, Recall = 0.9017
  - dermamnist: F1 = 0.4150, Recall = 0.3942
  - octmnist: F1 = 0.8637, Recall = 0.8476
  - pneumoniamnist: F1 = 0.9209, Recall = 0.9539
  - retinamnist: F1 = 0.3016, Recall = 0.3145
  - breastmnist: F1 = 0.7974, Recall = 0.7682
  - bloodmnist: F1 = 0.9218, Recall = 0.9215
  - tissuemnist: F1 = 0.6010, Recall = 0.5987
  - organamnist: F1 = 0.9913, Recall = 0.9914
  - organcmnist: F1 = 0.9827, Recall = 0.9853
  - organsmnist: F1 = 0.8373, Recall = 0.8469
Early stopping counter: 1/7

Epoch 20/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:23<00:00,  1.15s/it, loss=0.462]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.47it/s]


Train Loss: 0.4619 | Train F1 (Harmonic): 0.7055
Val Loss:   0.6324 | Val Metric (recall):   0.7531
  - pathmnist: F1 = 0.9116, Recall = 0.9113
  - dermamnist: F1 = 0.3424, Recall = 0.3002
  - octmnist: F1 = 0.8591, Recall = 0.8314
  - pneumoniamnist: F1 = 0.9519, Recall = 0.9695
  - retinamnist: F1 = 0.3391, Recall = 0.3471
  - breastmnist: F1 = 0.7886, Recall = 0.7531
  - bloodmnist: F1 = 0.9265, Recall = 0.9275
  - tissuemnist: F1 = 0.5950, Recall = 0.5963
  - organamnist: F1 = 0.9900, Recall = 0.9898
  - organcmnist: F1 = 0.9820, Recall = 0.9854
  - organsmnist: F1 = 0.8439, Recall = 0.8485
Early stopping counter: 2/7

Epoch 21/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:42<00:00,  1.17s/it, loss=0.46]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.4597 | Train F1 (Harmonic): 0.6917
Val Loss:   0.6368 | Val Metric (recall):   0.8008
  - pathmnist: F1 = 0.9084, Recall = 0.9062
  - dermamnist: F1 = 0.3695, Recall = 0.3370
  - octmnist: F1 = 0.8619, Recall = 0.8602
  - pneumoniamnist: F1 = 0.9585, Recall = 0.9685
  - retinamnist: F1 = 0.3386, Recall = 0.3550
  - breastmnist: F1 = 0.8342, Recall = 0.8008
  - bloodmnist: F1 = 0.9224, Recall = 0.9349
  - tissuemnist: F1 = 0.5918, Recall = 0.5827
  - organamnist: F1 = 0.9903, Recall = 0.9897
  - organcmnist: F1 = 0.9843, Recall = 0.9866
  - organsmnist: F1 = 0.8443, Recall = 0.8457
Early stopping counter: 3/7

Epoch 22/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:35<00:00,  1.16s/it, loss=0.428]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.4278 | Train F1 (Harmonic): 0.7194
Val Loss:   0.6152 | Val Metric (recall):   0.8095
  - pathmnist: F1 = 0.9280, Recall = 0.9272
  - dermamnist: F1 = 0.4147, Recall = 0.3845
  - octmnist: F1 = 0.8717, Recall = 0.8510
  - pneumoniamnist: F1 = 0.9728, Recall = 0.9762
  - retinamnist: F1 = 0.3898, Recall = 0.4097
  - breastmnist: F1 = 0.8496, Recall = 0.8095
  - bloodmnist: F1 = 0.9367, Recall = 0.9406
  - tissuemnist: F1 = 0.6136, Recall = 0.6086
  - organamnist: F1 = 0.9907, Recall = 0.9905
  - organcmnist: F1 = 0.9898, Recall = 0.9906
  - organsmnist: F1 = 0.8464, Recall = 0.8521
Early stopping counter: 4/7

Epoch 23/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:35<00:00,  1.16s/it, loss=0.39]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.3898 | Train F1 (Harmonic): 0.7492
Val Loss:   0.5888 | Val Metric (recall):   0.8246
  - pathmnist: F1 = 0.9295, Recall = 0.9296
  - dermamnist: F1 = 0.4298, Recall = 0.4030
  - octmnist: F1 = 0.8837, Recall = 0.8658
  - pneumoniamnist: F1 = 0.9540, Recall = 0.9683
  - retinamnist: F1 = 0.3843, Recall = 0.3907
  - breastmnist: F1 = 0.8556, Recall = 0.8246
  - bloodmnist: F1 = 0.9394, Recall = 0.9440
  - tissuemnist: F1 = 0.6263, Recall = 0.6236
  - organamnist: F1 = 0.9916, Recall = 0.9909
  - organcmnist: F1 = 0.9881, Recall = 0.9889
  - organsmnist: F1 = 0.8491, Recall = 0.8605
Saved best model.

Epoch 24/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:39<00:00,  1.16s/it, loss=0.374]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.45it/s]


Train Loss: 0.3738 | Train F1 (Harmonic): 0.7575
Val Loss:   0.6311 | Val Metric (recall):   0.8070
  - pathmnist: F1 = 0.9285, Recall = 0.9283
  - dermamnist: F1 = 0.4015, Recall = 0.3625
  - octmnist: F1 = 0.8823, Recall = 0.8696
  - pneumoniamnist: F1 = 0.9752, Recall = 0.9775
  - retinamnist: F1 = 0.3459, Recall = 0.3726
  - breastmnist: F1 = 0.8260, Recall = 0.8070
  - bloodmnist: F1 = 0.9385, Recall = 0.9349
  - tissuemnist: F1 = 0.6091, Recall = 0.5979
  - organamnist: F1 = 0.9923, Recall = 0.9919
  - organcmnist: F1 = 0.9857, Recall = 0.9875
  - organsmnist: F1 = 0.8485, Recall = 0.8532
Early stopping counter: 1/7

Epoch 25/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [16:34<00:00,  1.16s/it, loss=0.355]
Validating: 100%|██████████| 116/116 [01:19<00:00,  1.46it/s]


Train Loss: 0.3547 | Train F1 (Harmonic): 0.7635
Val Loss:   0.6053 | Val Metric (recall):   0.8246
  - pathmnist: F1 = 0.9329, Recall = 0.9324
  - dermamnist: F1 = 0.4184, Recall = 0.3881
  - octmnist: F1 = 0.8863, Recall = 0.8731
  - pneumoniamnist: F1 = 0.9656, Recall = 0.9723
  - retinamnist: F1 = 0.3457, Recall = 0.3697
  - breastmnist: F1 = 0.8556, Recall = 0.8246
  - bloodmnist: F1 = 0.9394, Recall = 0.9398
  - tissuemnist: F1 = 0.6217, Recall = 0.6157
  - organamnist: F1 = 0.9933, Recall = 0.9931
  - organcmnist: F1 = 0.9893, Recall = 0.9897
  - organsmnist: F1 = 0.8534, Recall = 0.8538
Early stopping counter: 2/7

Epoch 26/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [17:15<00:00,  1.21s/it, loss=0.347]
Validating: 100%|██████████| 116/116 [01:20<00:00,  1.44it/s]


Train Loss: 0.3472 | Train F1 (Harmonic): 0.7801
Val Loss:   0.6334 | Val Metric (recall):   0.8396
  - pathmnist: F1 = 0.9307, Recall = 0.9308
  - dermamnist: F1 = 0.4328, Recall = 0.4036
  - octmnist: F1 = 0.8883, Recall = 0.8740
  - pneumoniamnist: F1 = 0.9517, Recall = 0.9670
  - retinamnist: F1 = 0.3622, Recall = 0.3584
  - breastmnist: F1 = 0.8608, Recall = 0.8396
  - bloodmnist: F1 = 0.9408, Recall = 0.9445
  - tissuemnist: F1 = 0.6143, Recall = 0.6031
  - organamnist: F1 = 0.9929, Recall = 0.9926
  - organcmnist: F1 = 0.9896, Recall = 0.9892
  - organsmnist: F1 = 0.8449, Recall = 0.8451
Saved best model.

Epoch 27/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [17:45<00:00,  1.24s/it, loss=0.335]
Validating: 100%|██████████| 116/116 [01:21<00:00,  1.42it/s]


Train Loss: 0.3345 | Train F1 (Harmonic): 0.7768
Val Loss:   0.6165 | Val Metric (recall):   0.8008
  - pathmnist: F1 = 0.9316, Recall = 0.9316
  - dermamnist: F1 = 0.4267, Recall = 0.3996
  - octmnist: F1 = 0.8929, Recall = 0.8822
  - pneumoniamnist: F1 = 0.9610, Recall = 0.9722
  - retinamnist: F1 = 0.3613, Recall = 0.3612
  - breastmnist: F1 = 0.8342, Recall = 0.8008
  - bloodmnist: F1 = 0.9418, Recall = 0.9440
  - tissuemnist: F1 = 0.6212, Recall = 0.6133
  - organamnist: F1 = 0.9938, Recall = 0.9935
  - organcmnist: F1 = 0.9881, Recall = 0.9893
  - organsmnist: F1 = 0.8551, Recall = 0.8602
Early stopping counter: 1/7

Epoch 28/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [17:59<00:00,  1.26s/it, loss=0.322]
Validating: 100%|██████████| 116/116 [01:21<00:00,  1.42it/s]


Train Loss: 0.3221 | Train F1 (Harmonic): 0.7949
Val Loss:   0.6159 | Val Metric (recall):   0.8333
  - pathmnist: F1 = 0.9345, Recall = 0.9350
  - dermamnist: F1 = 0.4278, Recall = 0.3990
  - octmnist: F1 = 0.8956, Recall = 0.8804
  - pneumoniamnist: F1 = 0.9610, Recall = 0.9722
  - retinamnist: F1 = 0.3545, Recall = 0.3512
  - breastmnist: F1 = 0.8711, Recall = 0.8333
  - bloodmnist: F1 = 0.9425, Recall = 0.9443
  - tissuemnist: F1 = 0.6269, Recall = 0.6202
  - organamnist: F1 = 0.9928, Recall = 0.9924
  - organcmnist: F1 = 0.9886, Recall = 0.9889
  - organsmnist: F1 = 0.8529, Recall = 0.8558
Early stopping counter: 2/7

Epoch 29/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [17:59<00:00,  1.26s/it, loss=0.318]
Validating: 100%|██████████| 116/116 [01:22<00:00,  1.41it/s]


Train Loss: 0.3184 | Train F1 (Harmonic): 0.7815
Val Loss:   0.6158 | Val Metric (recall):   0.8095
  - pathmnist: F1 = 0.9335, Recall = 0.9337
  - dermamnist: F1 = 0.4531, Recall = 0.4252
  - octmnist: F1 = 0.8981, Recall = 0.8849
  - pneumoniamnist: F1 = 0.9634, Recall = 0.9735
  - retinamnist: F1 = 0.3734, Recall = 0.3684
  - breastmnist: F1 = 0.8496, Recall = 0.8095
  - bloodmnist: F1 = 0.9417, Recall = 0.9439
  - tissuemnist: F1 = 0.6260, Recall = 0.6193
  - organamnist: F1 = 0.9930, Recall = 0.9926
  - organcmnist: F1 = 0.9898, Recall = 0.9897
  - organsmnist: F1 = 0.8537, Recall = 0.8552
Early stopping counter: 3/7

Epoch 30/30


  with autocast(enabled=self.config['use_amp']):
Training: 100%|██████████| 859/859 [18:03<00:00,  1.26s/it, loss=0.316]
Validating: 100%|██████████| 116/116 [01:21<00:00,  1.42it/s]
  checkpoint = torch.load(checkpoint_path, map_location=device)


Train Loss: 0.3164 | Train F1 (Harmonic): 0.7905
Val Loss:   0.6168 | Val Metric (recall):   0.8333
  - pathmnist: F1 = 0.9332, Recall = 0.9334
  - dermamnist: F1 = 0.4524, Recall = 0.4235
  - octmnist: F1 = 0.8985, Recall = 0.8855
  - pneumoniamnist: F1 = 0.9634, Recall = 0.9735
  - retinamnist: F1 = 0.3660, Recall = 0.3612
  - breastmnist: F1 = 0.8711, Recall = 0.8333
  - bloodmnist: F1 = 0.9418, Recall = 0.9443
  - tissuemnist: F1 = 0.6255, Recall = 0.6198
  - organamnist: F1 = 0.9928, Recall = 0.9924
  - organcmnist: F1 = 0.9888, Recall = 0.9887
  - organsmnist: F1 = 0.8540, Recall = 0.8554
Early stopping counter: 4/7


  with torch.no_grad(), autocast(enabled=config['use_amp']):


Loaded best model from epoch 25 with metric: 0.8396


Generating predictions: 100%|██████████| 190/190 [00:41<00:00,  4.58it/s]



Submission saved with 96941 total predictions
   id  label  task_name  id_image_in_task
0   0      8  pathmnist                 0
1   1      4  pathmnist                 1
2   2      4  pathmnist                 2
3   3      3  pathmnist                 3
4   4      4  pathmnist                 4
