このNotebookは、モデルを学習させるために作られたものである。

# 1. Configuration

In [1]:
# Run Configuration
RUN_NAME = "efficientnetv2-s-meta"
SAVE_DIR = "../results/" + RUN_NAME
TEST_RUN = False
SEED = 42
DEVICE = "cuda"

# Input Data Configuration
IMAGE_SIZE = 256
NUM_SLICES = 32
BATCH_SIZE = 5
LABEL_NAMES = [
    # 13 classes
    'Left Infraclinoid Internal Carotid Artery',
    'Right Infraclinoid Internal Carotid Artery',
    'Left Supraclinoid Internal Carotid Artery',
    'Right Supraclinoid Internal Carotid Artery',
    'Left Middle Cerebral Artery',
    'Right Middle Cerebral Artery',
    'Anterior Communicating Artery',
    'Left Anterior Cerebral Artery',
    'Right Anterior Cerebral Artery',
    'Left Posterior Communicating Artery',
    'Right Posterior Communicating Artery',
    'Basilar Tip',
    'Other Posterior Circulation',
    # 'Aneurysm Present',
]
NUM_LABELS = len(LABEL_NAMES)

# Training Configuration
NUM_EPOCHS = 25
PATIENCE = 5


In [2]:
RUN_NAME = RUN_NAME + f'-{IMAGE_SIZE}-{NUM_SLICES}'

# Weights & Biases Configuration
if TEST_RUN:
    USE_WANDB = False
else:
    USE_WANDB = True
WANDB_INIT = {
    'project': 'RSNA-IAD',
    'group': 'Image Classification',
    'job_type': 'training_model',
    'save_code': True,
}
ARTIFACT = {
    'name': RUN_NAME,
    'type': 'model, optimizer, scheduler',
}


In [3]:
class Configuration:
    
    # Run
    run_name = RUN_NAME
    save_dir = SAVE_DIR
    test_run = TEST_RUN
    seed = SEED
    device = DEVICE
    
    # Input Data
    image_size = IMAGE_SIZE
    num_slices = NUM_SLICES
    batch_size = BATCH_SIZE
    label_names = LABEL_NAMES
    num_labels = NUM_LABELS
    
    # Training
    num_epochs = NUM_EPOCHS
    patience = PATIENCE
    
    # Weights & Biases
    use_wandb = USE_WANDB
    wandb_init = WANDB_INIT
    artifact = ARTIFACT

CFG = Configuration


# 2. Import

In [4]:
import os
import random
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
from collections import defaultdict
from typing import List, Dict, Optional, Tuple
from IPython.display import display
import datetime
import time
from tqdm.notebook import tqdm

# Data handling
import numpy as np
import polars as pl
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from skmultilearn.model_selection import iterative_train_test_split

# Medical imaging
import pydicom
import cv2

# Machine Lerning 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.amp import autocast
import torchvision
import timm

# Transformations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import PIL.Image as Image

# Experiment Management
import wandb

# Competition API
# import kaggle_evaluation.rsna_inference_server


In [5]:
# datetime for unique checkpoint filenames
date_time = datetime.datetime.now()
date_time = date_time.strftime('%Y-%m-%d_%H-%M-%S')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [6]:
def set_random_seeds(seed=CFG.seed, deterministic=False):
    """
    Set random seed.
    
    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
    if deterministic:
        torch.backends.cudnn.benchmark = True

set_random_seeds(seed=CFG.seed, deterministic=True)


# 3. Weights & Biases

In [7]:
if CFG.use_wandb:
    os.environ['WANDB_NOTEBOOK_NAME'] = CFG.run_name
    wandb.login()
    run = wandb.init(**CFG.wandb_init)
    artifact = wandb.Artifact(**CFG.artifact)
    print(f"WANDB is enabled. Run name: {CFG.run_name}")
else:
    run = None
    artifact = None
    print("WANDB is disabled.")


[34m[1mwandb[0m: Currently logged in as: [33mataracsia[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


WANDB is enabled. Run name: efficientnetv2-s-meta-256-32


In [8]:
def alert_by_wandb(title='', text=''):
    wandb.alert(title, text)


# 4. Model

In [9]:
class EfficientNetV2WithMetaModel(nn.Module):
    def __init__(self, model_name, pretrained=False,
                 num_classes=CFG.num_labels, drop_rate=0.3,
                 drop_path_rate=0.2):
        super().__init__()
        self.model_name = model_name
        
        if model_name == 'efficientnetv2':
            self.backbone = timm.create_model(
                'efficientnetv2_s',
                pretrained=pretrained,
                drop_rate=drop_rate,
                drop_path_rate=drop_path_rate,
                num_classes=0)
            
            # input layer modification: 3 channels -> CFG.num_slices channels
            self.backbone.conv_stem = nn.Conv2d(
                in_channels=CFG.num_slices,
                out_channels=24,
                kernel_size=3,
                stride=2,
                padding=1
            )
        else:
            raise ValueError(f"Model {model_name} is not supported.")
        
        self.meta_features = nn.Sequential(
            nn.Linear(2, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 32),
            nn.ReLU()
        )
        
        # According to "LB #1"
        self.classifier = nn.Sequential(
            nn.Linear(1280 + 32, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(drop_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(drop_rate),
            nn.Linear(256, num_classes)
        )
        
    def forward(self, images, meta): 
        image_features = self.backbone(images)
        meta_fieatures = self.meta_features(meta)
        x = torch.cat([image_features, meta_fieatures], dim=1)
        x = self.classifier(x)
        x = torch.nn.Sigmoid()(x)
        return x

model = EfficientNetV2WithMetaModel(
    model_name='efficientnetv2',
    pretrained=False
)
model


EfficientNetV2WithMetaModel(
  (backbone): EfficientNet(
    (conv_stem): Conv2d(32, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNormAct2d(
      24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (aa): Identity()
          (drop_path): Identity()
        )
        (1): ConvBnAct(
          (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
 

-> timm.createmodel(num_classes=0)とすると、最後のnn.Linear()がnn.Identity()になる。

In [10]:
model.to(device)

is_in_cuda_list = []

for name, parameter in model.named_parameters():
    # determination of cuda and its storage
    is_in_cuda_list.append(parameter.is_cuda)
    
if all(is_in_cuda_list):
    print('All parameters is in cuda')
        
else:
    print('One of the parameters is not in the cuda.')


All parameters is in cuda


In [11]:
# Optimizer
optimizer = torch.optim.AdamW(model.parameters())

# Loss Function
criterion = nn.BCEWithLogitsLoss()

# Schedulers
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=CFG.num_epochs,
    eta_min=1e-6
)


# 5. Dataset

In [12]:
# SeriesInstanceUID list
series_list = os.listdir(f'../series_npy/{CFG.image_size}')

# .npy path DataFrame
image_path_df = pd.read_csv(f'../npy_path/image_{CFG.image_size}_path_df.csv')

# Meta DataFrame
meta_df = pd.read_csv('../meta_data/meta.csv')

# Label DataFrame
label_df = pd.read_csv(f'../train.csv')
label_df = label_df[['SeriesInstanceUID'] + CFG.label_names]


In [13]:
meta_df.loc[meta_df['SeriesInstanceUID'] == '1.2.826.0.1.3680043.8.498.98697915765488213704603518081182644986']


Unnamed: 0,SeriesInstanceUID,modalisy,age,sex
4296,1.2.826.0.1.3680043.8.498.98697915765488213704...,MR,60,1


In [14]:
def minmax_scaling(images: torch.Tensor) -> torch.Tensor:
    if torch.max(images) > 1.0:
        images = images / 255
    return images


In [15]:
# for training
train_transform = A.Compose(
    [
        # # Elastic Transform <- あとで試したい
        # A.ElasticTransform( p=0.5),
        
        # Rotation
        A.Rotate(limit=(-3, 3), p=0.5, border_mode=cv2.BORDER_WRAP,  # cv2.BORDER_WRAP,
                 seed=CFG.seed
        ),
        
        # Normalization
        A.Normalize(normalization='min_max'),
        
        # ToTensor
        ToTensorV2(),
    ]
)

# for inference
inference_transform = A.Compose(
    [
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ]
)
    
# for TTA
tta_transform = A.Compose(
    [
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            
        # Horizontal flip
        A.HorizontalFlip(p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        # Vertical flip
        A.VerticalFlip(p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        # 90 degree rotation
        A.RandomRotate90(p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        # ↓ Original
        # Sharpen
        A.Sharpen(alpha=(0, 1.0), p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        ToTensorV2(),
    ]
)


In [16]:
class BaseDataset(torch.utils.data.Dataset):
    '''
    Datasetの__getitem__()は、num_slicesの枚数分だけ画像を出力する。
    
    Arguments:
    - series_list: 画像のSeriesInstanceUIDのリスト
    - image_path_df: 画像のパスを含むDataFrame
    - meta_df: 患者のメタデータが入ったDataFrame
    - label_df: ラベルが入ったDataFrame
    - num_slices: 1つのシリーズから抽出するスライス数
    - transforms: 画像変換のためのAlbumentationsのComposeオブジェクト
    '''
    def __init__(self,
                 series_list: list,
                 image_path_df=image_path_df,
                 meta_df=meta_df,
                 label_df=label_df,
                 transforms=None
        ):
        self.series_list = series_list
        self.image_path_df = image_path_df
        self.meta_df = meta_df
        self.label_df = label_df
        self.transforms = transforms
        self.num_slices = CFG.num_slices

    def __len__(self):
        return len(self.series_list)

    def __getitem__(self, index):
        # Index to SeriesInstanceUID
        series_id = self.series_list[index]
        
        # SeriesInstanceUID to Image Path
        image_path_df = self.image_path_df[
            self.image_path_df['series_id'] == series_id
        ].reset_index(drop=True)
        
        # Load Images
        indices = np.linspace(0,
                              len(image_path_df) - 1,
                              self.num_slices).astype(np.int32)
        # Stack images to (H, W, CFG.num_slices)
        images = []
        for i in indices:
            image_path = image_path_df.loc[i, 'npy_path']
            image = np.load(image_path).astype(np.uint8)
            images.append(image)
        images = np.stack(images, axis=-1)
        
        # Transform
        if self.transforms:
            # ToTensorV2はnumpy.ndarrayをtorch.Tensorに変換する
            augmented = self.transforms(image=images)
            images = augmented['image']
        else:
            images = torch.tensor(images, dtype=torch.float32)
            images = torch.permute(images, (2, 0, 1))
            # Normlization
            if torch.max(images) > 1.0:
                images = images / 255
                
        # Meta data
        meta = self.meta_df.loc[
            self.meta_df['SeriesInstanceUID'] == series_id, ['age', 'sex']
        ]
        age = min(meta['age'].values[0], 100)
        age = age / 100
        sex = meta['sex'].values[0]
        meta = torch.tensor([age, sex], dtype=torch.float32)

        # Labels
        labels = self.label_df.loc[
            self.label_df['SeriesInstanceUID']==series_id, \
                CFG.label_names].values
        labels = torch.tensor(labels, dtype=torch.float32)
        labels = torch.squeeze(labels, dim=0)
        
        return (images, meta, labels)


# 6. DataLoader

In [17]:
def build_dataloaders():

    series = label_df[["SeriesInstanceUID"]].values
    labels = label_df[CFG.label_names].values

    if CFG.test_run:
        # As the absolute number of data points cannot be specified,
        # split is executed in two stages.
        train_series, train_labels, val_series, _ = iterative_train_test_split(
            series, labels, test_size=(1/len(series)) \
                * 2
        )
        _, _, train_series, train_labels = iterative_train_test_split(
            train_series, train_labels, test_size=(1/len(series)) \
                * 2
        )
        
    else:
        train_series, _, val_series, _ = \
            iterative_train_test_split(
                series, labels, test_size=0.2
            )

    # 2 dimensions -> 1 dimension
    train_series, val_series = train_series.flatten(), val_series.flatten()
    # train_series = [
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557',
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557'
    # ]
    print(f"Train size: {len(train_series)}, Val size: {len(val_series)}")

    # train_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(train_series)].reset_index(drop=True)
    # val_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(val_series)].reset_index(drop=True)
        
    # train_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(train_series)].set_index('SeriesInstanceUID')
    # val_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(val_series)].set_index('SeriesInstanceUID')

    train_dataset = BaseDataset(
        series_list=train_series,
        transforms=train_transform
    )
    val_dataset = BaseDataset(
        series_list=val_series,
        transforms=train_transform # tta_transform
    )
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=CFG.batch_size,
        shuffle=True,
        num_workers=0
    )
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=CFG.batch_size,
        shuffle=False,
        num_workers=0
    )

    return train_dataset, train_dataloader, val_dataloader


In [18]:
def build_dataloaders():

    series = label_df[["SeriesInstanceUID"]].values
    labels = label_df[CFG.label_names].values

    if CFG.test_run:
        # As the absolute number of data points cannot be specified,
        # split is executed in two stages.
        train_series, train_labels, val_series, _ = iterative_train_test_split(
            series, labels, test_size=(1/len(series)) \
                * 2
        )
        _, _, train_series, train_labels = iterative_train_test_split(
            train_series, train_labels, test_size=(1/len(series)) \
                * 2
        )
        
    else:
        train_series, _, val_series, _ = iterative_train_test_split(
            series, labels, test_size=0.2
        )

    # 2 dimensions -> 1 dimension
    train_series, val_series = train_series.flatten(), val_series.flatten()
    # train_series = [
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557',
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557'
    # ]
    print(f"Train size: {len(train_series)}, Val size: {len(val_series)}")

    # train_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(train_series)].reset_index(drop=True)
    # val_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(val_series)].reset_index(drop=True)
        
    # train_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(train_series)].set_index('SeriesInstanceUID')
    # val_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(val_series)].set_index('SeriesInstanceUID')

    # Datasets
    train_dataset = BaseDataset(
        series_list=train_series,
        transforms=train_transform
    )
    val_dataset = BaseDataset(
        series_list=val_series,
        transforms=train_transform # or tta_transform
    )
    
    # Dataloaders
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=CFG.batch_size,
        shuffle=True,
        num_workers=0
    )
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=CFG.batch_size,
        shuffle=False,
        num_workers=0
    )

    return  train_dataloader, val_dataloader


In [19]:
train_dataloader, val_dataloader = build_dataloaders()


Train size: 3478, Val size: 870


In [20]:
# _, ax = plt.subplots(1, 2, figsize=(12, 6))

# # 元の画像とどのくらい違いがあるかを確認

# # 元の画像(.npy)
# src = np.load(f'../series_npy/{CFG.image_size}/1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557/00012.npy')
# print(np.unique(src))
# ax[0].imshow(src)

# # Datasetから取り出した画像
# images, _ = train_dataset[0]
# image = images[8].numpy()  # shape: [H, W]

# # 0-1のfloatなら0-255に変換
# if image.max() <= 1.0:
#     image = (image * 255).astype(np.uint8)
# else:
#     image = image.astype(np.uint8)

# ax[1].imshow(image)


In [21]:
# pil_image = Image.fromarray(image)
# display(pil_image)


# 7. Functions

In [22]:
# count execution time for one epoch
def count_time(start:float) -> float:
    
    elapsed_time = time.time() - start
    elapsed_time /= 60
    
    return elapsed_time


In [23]:
# to save model, optimizer, scheduler
def save_checkpoint(model, optimizer, scheduler, path=""):
    
    path = path.replace('\\', '/')
    
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler is not None else None,
    }
    torch.save(checkpoint, path)

# to load model, optimizer, scheduler
def load_checkpoint(model, optimizer, scheduler, path=""):
    
    path = path.replace('\\', '/')
    
    checkpoint = torch.load(path)
    
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    if scheduler is not None and checkpoint['scheduler_state_dict'] is not None:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    
    return model, optimizer, scheduler


In [24]:
# to log losses to W&B
def log_by_wandb(time, losses):
    epoch_data = {
        'time': time,
        'loss': losses,
    }
    wandb.log(epoch_data)


In [25]:
# to log checkpoint
def log_artifact(run=run, artifact=artifact, checkpoint_path=""):
    artifact.add_file(checkpoint_path)
    run.log_artifact(artifact)
    print('Artifact was logged to W&B')


# 8. Training

In [26]:
def train_one_epoch(epoch: int) -> Tuple[float, float]:
    
    print(f'----- Epoch {epoch + 1} -----')
    
    # Training
    train_losses = []
    model.train()
    
    for images, meta, labels in tqdm(train_dataloader):    
        images = images.to(device)
        meta = meta.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        with autocast(device_type=CFG.device):
            outputs = model(images, meta)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
    
    mean_train_loss = np.mean(train_losses)
    print(f'Last output:', outputs[0])
    print(f'Mean Train Loss: {mean_train_loss:.4f}')
    
    # Validation
    val_losses = []
    model.eval()
    
    with torch.no_grad():
        for images, meta, labels in tqdm(val_dataloader):
            images = images.to(device)
            meta = meta.to(device)
            labels = labels.to(device)
            with autocast(device_type=CFG.device):
                outputs = model(images, meta)
                loss = criterion(outputs, labels)
                val_losses.append(loss.item())
        
    mean_val_loss = np.mean(val_losses)
    print(f'Mean Validation Loss: {mean_val_loss:.4f}')
        
    scheduler.step()
        
    return mean_train_loss, mean_val_loss


In [27]:
def main():
    
    os.makedirs(CFG.save_dir, exist_ok=True)
    best_val_loss = np.inf
    
    for epoch in range(CFG.num_epochs):
        
        # Train & Validation
        start_time = time.time()
        train_loss, val_loss = train_one_epoch(epoch)
        elapsed_time = count_time(start_time)
        print(f'Elapsed time: {elapsed_time}')
        
        # Log to W&B
        if CFG.use_wandb:
            losses = {
                'train_loss': train_loss,
                'val_loss': val_loss
            }
            log_by_wandb(elapsed_time, losses)
        
        # Save all checkpoints
        checkpoint_path = os.path.join(
            CFG.save_dir,
            f'checkpoint_{date_time}.pth'
        )
        checkpoint_path = checkpoint_path.replace('\\', '/')
        save_checkpoint(model, optimizer, scheduler, checkpoint_path)
        print(f'Checkpoint saved at {checkpoint_path}')
        
        # Save best checkpoint
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_checkpoint_path = os.path.join(
                CFG.save_dir,
                f'best_checkpoint_{date_time}.pth'
            )
            best_checkpoint_path = best_checkpoint_path.replace('\\', '/')
            save_checkpoint(model, optimizer, scheduler, best_checkpoint_path)
            print(f'Best checkpoint updated at {best_checkpoint_path}')
    
    # Log artifact to W&B
    if CFG.use_wandb:
        log_artifact(run, artifact, best_checkpoint_path)


In [28]:
main()


----- Epoch 1 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.5557e-05, 1.0729e-05, 2.4498e-05, 2.6286e-05, 1.3709e-05, 2.3723e-05,
        2.2829e-05, 1.4961e-05, 1.3232e-05, 5.6624e-06, 9.1791e-06, 1.8060e-05,
        1.1563e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.7004


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6934
Elapsed time: 38.08044960101446
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 2 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([3.2187e-06, 3.6955e-06, 3.8147e-06, 2.5034e-06, 7.2122e-06, 4.2915e-06,
        6.6757e-06, 3.3975e-06, 3.7551e-06, 2.9206e-06, 7.5698e-06, 2.3842e-06,
        3.7551e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6933


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 35.77650348742803
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 3 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.0333e-06, 1.9252e-05, 1.8358e-05, 3.0696e-05, 1.5199e-05, 1.8775e-05,
        2.1756e-05, 1.7345e-05, 1.2994e-05, 1.2636e-05, 1.0967e-05, 1.6332e-05,
        1.5438e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.34937460422516
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 4 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([0.0002, 0.0003, 0.0003, 0.0003, 0.0003, 0.0002, 0.0002, 0.0002, 0.0003,
        0.0004, 0.0002, 0.0002, 0.0002], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.25769175291062
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 5 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.8678e-06, 5.8413e-06, 4.8876e-06, 6.7949e-06, 5.1856e-06, 3.9935e-06,
        6.7949e-06, 9.5367e-06, 6.4969e-06, 3.0398e-06, 2.4438e-06, 3.9339e-06,
        6.1989e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.30522790352504
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 6 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([6.5565e-05, 4.2677e-05, 6.2525e-05, 5.1022e-05, 8.2850e-05, 4.3333e-05,
        5.5611e-05, 8.8215e-05, 7.0870e-05, 9.1732e-05, 3.7909e-05, 4.2677e-05,
        6.6042e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.394538056850436
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 7 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([5.0664e-06, 7.6294e-06, 2.3723e-05, 7.2122e-06, 8.6427e-06, 5.8413e-06,
        6.5565e-06, 6.0201e-06, 1.0788e-05, 9.6560e-06, 8.3447e-06, 5.9009e-06,
        8.8215e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.46577685674031
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 8 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.1935e-05, 1.2934e-05, 1.5795e-05, 1.2994e-05, 1.0133e-05, 1.4305e-05,
        1.1563e-05, 2.0325e-05, 7.9870e-06, 1.7762e-05, 1.4722e-05, 1.7107e-05,
        1.2219e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.564495559533434
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 9 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([8.3447e-07, 8.3447e-07, 1.0133e-06, 1.0133e-06, 2.0266e-06, 9.5367e-07,
        7.1526e-07, 1.1325e-06, 1.0133e-06, 5.3644e-07, 2.1458e-06, 1.0729e-06,
        1.0133e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.52343866030375
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 10 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([5.9605e-08, 0.0000e+00, 5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08,
        5.9605e-08, 5.9605e-08, 0.0000e+00, 5.9605e-08, 5.9605e-08, 5.9605e-08,
        5.9605e-08], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 35.494934769471485
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 11 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.4498e-05, 3.2723e-05, 4.4703e-05, 4.0054e-05, 4.5419e-05, 2.2113e-05,
        1.7226e-05, 2.6047e-05, 2.1636e-05, 6.4552e-05, 2.3901e-05, 5.2691e-05,
        5.2691e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 34.46856401761373
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 12 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.2650e-06, 3.3379e-06, 6.1989e-06, 5.6624e-06, 5.6624e-06, 3.7551e-06,
        5.0664e-06, 3.6359e-06, 3.4571e-06, 5.6624e-06, 2.5630e-06, 1.7881e-06,
        3.8147e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.5385444800059
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 13 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([4.1723e-06, 2.2054e-06, 4.2319e-06, 1.7285e-06, 3.1590e-06, 6.8545e-06,
        4.2319e-06, 3.0398e-06, 3.4571e-06, 6.3181e-06, 6.4373e-06, 3.4571e-06,
        2.9802e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.44883987903595
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 14 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.4855e-05, 3.4273e-05, 4.2677e-05, 4.1008e-05, 2.9802e-05, 3.5346e-05,
        3.3736e-05, 4.9889e-05, 6.0141e-05, 3.0935e-05, 3.8207e-05, 2.4676e-05,
        5.1022e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.1305549621582
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 15 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08,
        5.9605e-08, 5.9605e-08, 5.9605e-08, 0.0000e+00, 5.9605e-08, 5.9605e-08,
        5.9605e-08], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.49714771509171
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 16 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([0.0000e+00, 5.9605e-08, 5.9605e-08, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        5.9605e-08, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.311351068814595
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 17 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([3.5763e-07, 2.9802e-07, 5.9605e-07, 4.1723e-07, 2.9802e-07, 2.9802e-07,
        2.3842e-07, 2.9802e-07, 3.5763e-07, 4.7684e-07, 2.9802e-07, 2.9802e-07,
        3.5763e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.54236826499303
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 18 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08,
        5.9605e-08, 5.9605e-08, 5.9605e-08, 0.0000e+00, 0.0000e+00, 5.9605e-08,
        5.9605e-08], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.50058249632517
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 19 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.2650e-06, 1.9073e-06, 3.0398e-06, 3.6359e-06, 3.0994e-06, 2.1458e-06,
        2.0862e-06, 2.8610e-06, 3.6955e-06, 4.4107e-06, 3.5763e-06, 1.8477e-06,
        2.4438e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.542909328142805
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 20 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.7486e-07, 6.5565e-07, 7.1526e-07, 9.5367e-07, 5.9605e-07, 1.0729e-06,
        7.1526e-07, 1.0133e-06, 6.5565e-07, 8.3447e-07, 7.1526e-07, 6.5565e-07,
        3.5763e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.49734893242518
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 21 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.1921e-07, 1.1921e-07, 1.7881e-07, 1.1921e-07, 2.3842e-07, 1.7881e-07,
        2.3842e-07, 1.1921e-07, 1.7881e-07, 3.5763e-07, 1.1921e-07, 2.3842e-07,
        1.7881e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.64737315177918
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 22 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([5.9605e-08, 5.9605e-08, 1.1921e-07, 1.1921e-07, 1.1921e-07, 5.9605e-08,
        5.9605e-08, 1.1921e-07, 5.9605e-08, 1.1921e-07, 5.9605e-08, 5.9605e-08,
        1.1921e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.54990657170614
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 23 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.7881e-07, 1.7881e-07, 7.7486e-07, 2.9802e-07, 5.3644e-07, 2.9802e-07,
        4.7684e-07, 2.3842e-07, 3.5763e-07, 2.9802e-07, 4.1723e-07, 2.3842e-07,
        2.3842e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.653025265534716
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Best checkpoint updated at ../results/efficientnetv2-s-meta/best_checkpoint_2025-09-28_00-48-38.pth
----- Epoch 24 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([8.3447e-07, 1.9670e-06, 8.9407e-07, 8.9407e-07, 1.4305e-06, 7.7486e-07,
        1.0729e-06, 1.6093e-06, 6.5565e-07, 8.3447e-07, 1.3709e-06, 8.3447e-07,
        1.2517e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.727806190649666
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
----- Epoch 25 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       dtype=torch.float16, grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.73173448642095
Checkpoint saved at ../results/efficientnetv2-s-meta/checkpoint_2025-09-28_00-48-38.pth
Artifact was logged to W&B


In [29]:
if CFG.use_wandb:
    run.finish()


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
time,█▄▁▁▁▁▂▂▂▃▂▂▂▁▂▁▂▂▂▂▂▂▂▂▂

0,1
time,34.73173
