このNotebookは、モデルを学習させるために作られたものである。

# 1. Configuration

In [1]:
# Run Configuration
RUN_NAME = "swin-s-meta"
SAVE_DIR = "../results/" + RUN_NAME
TEST_RUN = False
SEED = 42
DEVICE = "cuda"

# Input Data Configuration
IMAGE_SIZE = 256
NUM_SLICES = 32
BATCH_SIZE = 5
LABEL_NAMES = [
    # 13 classes
    'Left Infraclinoid Internal Carotid Artery',
    'Right Infraclinoid Internal Carotid Artery',
    'Left Supraclinoid Internal Carotid Artery',
    'Right Supraclinoid Internal Carotid Artery',
    'Left Middle Cerebral Artery',
    'Right Middle Cerebral Artery',
    'Anterior Communicating Artery',
    'Left Anterior Cerebral Artery',
    'Right Anterior Cerebral Artery',
    'Left Posterior Communicating Artery',
    'Right Posterior Communicating Artery',
    'Basilar Tip',
    'Other Posterior Circulation',
    # 'Aneurysm Present',
]
NUM_LABELS = len(LABEL_NAMES)

# Training Configuration
NUM_EPOCHS = 20
PATIENCE = 5


In [2]:
RUN_NAME = RUN_NAME + f'-{IMAGE_SIZE}-{NUM_SLICES}'

# Weights & Biases Configuration
if TEST_RUN:
    USE_WANDB = False
else:
    USE_WANDB = True
WANDB_INIT = {
    'project': 'RSNA-IAD',
    'group': 'Image Classification',
    'job_type': 'training_model',
    'save_code': True,
}
ARTIFACT = {
    'name': RUN_NAME,
    'type': 'model, optimizer, scheduler',
}


In [3]:
class Configuration:
    
    # Run
    run_name = RUN_NAME
    save_dir = SAVE_DIR
    test_run = TEST_RUN
    seed = SEED
    device = DEVICE
    
    # Input Data
    image_size = IMAGE_SIZE
    num_slices = NUM_SLICES
    batch_size = BATCH_SIZE
    label_names = LABEL_NAMES
    num_labels = NUM_LABELS
    
    # Training
    num_epochs = NUM_EPOCHS
    patience = PATIENCE
    
    # Weights & Biases
    use_wandb = USE_WANDB
    wandb_init = WANDB_INIT
    artifact = ARTIFACT

CFG = Configuration


# 2. Import

In [4]:
import os
import random
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
from collections import defaultdict
from typing import List, Dict, Optional, Tuple
from IPython.display import display
import datetime
import time
from tqdm.notebook import tqdm

# Data handling
import numpy as np
import polars as pl
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from skmultilearn.model_selection import iterative_train_test_split

# Medical imaging
import pydicom
import cv2

# Machine Lerning 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.amp import autocast
import torchvision
import timm

# Transformations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import PIL.Image as Image

# Experiment Management
import wandb

# Competition API
# import kaggle_evaluation.rsna_inference_server


In [None]:
# datetime for unique checkpoint filenames
date_time = datetime.datetime.now()
date_time = date_time.strftime('%Y-%m-%d_%H-%M-%S')


Using device: cuda


In [6]:
def set_random_seeds(seed=CFG.seed, deterministic=False):
    """
    Set random seed.
    
    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
    if deterministic:
        torch.backends.cudnn.benchmark = True

set_random_seeds(seed=CFG.seed, deterministic=True)


# 3. Weights & Biases

In [7]:
if CFG.use_wandb:
    os.environ['WANDB_NOTEBOOK_NAME'] = CFG.run_name
    wandb.login()
    run = wandb.init(**CFG.wandb_init)
    artifact = wandb.Artifact(**CFG.artifact)
else:
    run = None
    artifact = None


[34m[1mwandb[0m: Currently logged in as: [33mataracsia[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
def alert_by_wandb(title='', text=''):
    wandb.alert(title, text)


# 4. Model

In [9]:
class SwinWithMetaModel(nn.Module):
    def __init__(self, model_name, pretrained=False,
                 num_classes=CFG.num_labels, drop_rate=0.3,
                 drop_path_rate=0.2):
        super().__init__()
        self.model_name = model_name
        
        if model_name == 'swin_s':
            self.backbone = timm.create_model(
                'swin_small_patch4_window7_224',
                pretrained=pretrained,
                img_size=CFG.image_size,
                drop_rate=drop_rate,
                drop_path_rate=drop_path_rate,
                global_poopling='',
                num_classes=0)
            
            # input layer modification: 3 channels -> CFG.num_slices channels
            self.backbone.patch_embed.proj = nn.Conv2d(
                in_channels=CFG.num_slices,
                out_channels=96,
                kernel_size=4,
                stride=4,
            )
        else:
            raise ValueError(f"Model {model_name} is not supported.")
        
        self.meta_features = nn.Sequential(
            nn.Linear(2, 16),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(16, 32),
            nn.ReLU()
        )
        
        # According to "LB #1"
        self.classifier = nn.Sequential(
            nn.Linear(768 + 32, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(drop_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(drop_rate),
            nn.Linear(256, num_classes)
        )
        
    def forward(self, images, meta): 
        image_features = self.backbone(images)
        meta_fieatures = self.meta_features(meta)
        x = torch.cat([image_features, meta_fieatures], dim=1)
        x = self.classifier(x)
        x = torch.nn.Sigmoid()(x)
        return x

model = SwinWithMetaModel(model_name='swin_s', pretrained=False)
model


SwinWithMetaModel(
  (backbone): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(32, 96, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (layers): Sequential(
      (0): SwinTransformerStage(
        (downsample): Identity()
        (blocks): Sequential(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=96, out_features=288, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=96, out_features=96, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path1): Identity()
            (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
              (fc1): Linear(in_features=96, out_features=384, 

-> timm.createmodel(num_classes=0)とすると、最後のnn.Linear()がnn.Identity()になる。

In [10]:
# Model
# https://docs.pytorch.org/vision/main/models/generated/torchvision.models.swin_s.html#torchvision.models.Swin_S_Weights

# model = torchvision.models.swin_s(pretrained=False, num_classes=13)

# model.features[0][0] = nn.Conv2d(
#     in_channels=CFG.num_slices,
#     out_channels=96,
#     kernel_size=4,
#     stride=4,
#     padding=0
# )
# model.head = nn.Sequential(
#     nn.Linear(in_features=768, out_features=512),
#     nn.BatchNorm1d(512),
#     nn.ReLU(),
#     nn.Dropout(0.3),
#     nn.Linear(512, 256),
#     nn.BatchNorm1d(256),
#     nn.ReLU(),
#     nn.Dropout(0.3),
#     nn.Linear(256, len(CFG.label_names))
# )


In [11]:
model.to(device)

is_in_cuda_list = []

for name, parameter in model.named_parameters():
    # determination of cuda and its storage
    is_in_cuda_list.append(parameter.is_cuda)
    
if all(is_in_cuda_list):
    print('All parameters is in cuda')
        
else:
    print('One of the parameters is not in the cuda.')


All parameters is in cuda


In [12]:
# Optimizer
optimizer = torch.optim.AdamW(model.parameters())

# Loss Function
criterion = nn.BCEWithLogitsLoss()

# Schedulers
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=CFG.num_epochs,
    eta_min=1e-6
)


# 5. Dataset

In [13]:
# SeriesInstanceUID list
series_list = os.listdir(f'../series_npy/{CFG.image_size}')

# .npy path DataFrame
image_path_df = pd.read_csv(f'../npy_path/image_{CFG.image_size}_path_df.csv')

# Meta DataFrame
meta_df = pd.read_csv('../meta_data/meta.csv')

# Label DataFrame
label_df = pd.read_csv(f'../train.csv')
label_df = label_df[['SeriesInstanceUID'] + CFG.label_names]


In [14]:
meta_df.loc[meta_df['SeriesInstanceUID'] == '1.2.826.0.1.3680043.8.498.98697915765488213704603518081182644986']


Unnamed: 0,SeriesInstanceUID,modalisy,age,sex
4296,1.2.826.0.1.3680043.8.498.98697915765488213704...,MR,60,1


In [15]:
def minmax_scaling(images: torch.Tensor) -> torch.Tensor:
    if torch.max(images) > 1.0:
        images = images / 255
    return images


In [16]:
# for training
train_transform = A.Compose(
    [
        # # Elastic Transform <- あとで試したい
        # A.ElasticTransform( p=0.5),
        
        # Rotation
        A.Rotate(limit=(-3, 3), p=0.5, border_mode=cv2.BORDER_WRAP,  # cv2.BORDER_WRAP,
                 seed=CFG.seed
        ),
        
        # Normalization
        A.Normalize(normalization='min_max'),
        
        # ToTensor
        ToTensorV2(),
    ]
)

# for inference
inference_transform = A.Compose(
    [
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ]
)
    
# for TTA
tta_transform = A.Compose(
    [
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            
        # Horizontal flip
        A.HorizontalFlip(p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        # Vertical flip
        A.VerticalFlip(p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        # 90 degree rotation
        A.RandomRotate90(p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        # ↓ Original
        # Sharpen
        A.Sharpen(alpha=(0, 1.0), p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        
        ToTensorV2(),
    ]
)


In [17]:
class BaseDataset(torch.utils.data.Dataset):
    '''
    Datasetの__getitem__()は、num_slicesの枚数分だけ画像を出力する。
    
    Arguments:
    - series_list: 画像のSeriesInstanceUIDのリスト
    - image_path_df: 画像のパスを含むDataFrame
    - meta_df: 患者のメタデータが入ったDataFrame
    - label_df: ラベルが入ったDataFrame
    - num_slices: 1つのシリーズから抽出するスライス数
    - transforms: 画像変換のためのAlbumentationsのComposeオブジェクト
    '''
    def __init__(self,
                 series_list: list,
                 image_path_df=image_path_df,
                 meta_df=meta_df,
                 label_df=label_df,
                 transforms=None
        ):
        self.series_list = series_list
        self.image_path_df = image_path_df
        self.meta_df = meta_df
        self.label_df = label_df
        self.transforms = transforms
        self.num_slices = CFG.num_slices

    def __len__(self):
        return len(self.series_list)

    def __getitem__(self, index):
        # Index to SeriesInstanceUID
        series_id = self.series_list[index]
        
        # SeriesInstanceUID to Image Path
        image_path_df = self.image_path_df[
            self.image_path_df['series_id'] == series_id
        ].reset_index(drop=True)
        
        # Load Images
        indices = np.linspace(0,
                              len(image_path_df) - 1,
                              self.num_slices).astype(np.int32)
        # Stack images to (H, W, CFG.num_slices)
        images = []
        for i in indices:
            image_path = image_path_df.loc[i, 'npy_path']
            image = np.load(image_path).astype(np.uint8)
            images.append(image)
        images = np.stack(images, axis=-1)
        
        # Transform
        if self.transforms:
            # ToTensorV2はnumpy.ndarrayをtorch.Tensorに変換する
            augmented = self.transforms(image=images)
            images = augmented['image']
        else:
            images = torch.tensor(images, dtype=torch.float32)
            images = torch.permute(images, (2, 0, 1))
            # Normlization
            if torch.max(images) > 1.0:
                images = images / 255
                
        # Meta data
        meta = self.meta_df.loc[
            self.meta_df['SeriesInstanceUID'] == series_id, ['age', 'sex']
        ]
        age = min(meta['age'].values[0], 100)
        age = age / 100
        sex = meta['sex'].values[0]
        meta = torch.tensor([age, sex], dtype=torch.float32)

        # Labels
        labels = self.label_df.loc[
            self.label_df['SeriesInstanceUID']==series_id, \
                CFG.label_names].values
        labels = torch.tensor(labels, dtype=torch.float32)
        labels = torch.squeeze(labels, dim=0)
        
        return (images, meta, labels)


# 6. DataLoader

In [18]:
def build_dataloaders():

    series = label_df[["SeriesInstanceUID"]].values
    labels = label_df[CFG.label_names].values

    if CFG.test_run:
        # As the absolute number of data points cannot be specified,
        # split is executed in two stages.
        train_series, train_labels, val_series, _ = iterative_train_test_split(
            series, labels, test_size=(1/len(series)) \
                * 2
        )
        _, _, train_series, train_labels = iterative_train_test_split(
            train_series, train_labels, test_size=(1/len(series)) \
                * 2
        )
        
    else:
        train_series, _, val_series, _ = \
            iterative_train_test_split(
                series, labels, test_size=0.2
            )

    # 2 dimensions -> 1 dimension
    train_series, val_series = train_series.flatten(), val_series.flatten()
    # train_series = [
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557',
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557'
    # ]
    print(f"Train size: {len(train_series)}, Val size: {len(val_series)}")

    # train_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(train_series)].reset_index(drop=True)
    # val_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(val_series)].reset_index(drop=True)
        
    # train_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(train_series)].set_index('SeriesInstanceUID')
    # val_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(val_series)].set_index('SeriesInstanceUID')

    train_dataset = BaseDataset(
        series_list=train_series,
        transforms=train_transform
    )
    val_dataset = BaseDataset(
        series_list=val_series,
        transforms=train_transform # tta_transform
    )
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=CFG.batch_size,
        shuffle=True,
        num_workers=0
    )
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=CFG.batch_size,
        shuffle=False,
        num_workers=0
    )

    return train_dataset, train_dataloader, val_dataloader


In [19]:
def build_dataloaders():

    series = label_df[["SeriesInstanceUID"]].values
    labels = label_df[CFG.label_names].values

    if CFG.test_run:
        # As the absolute number of data points cannot be specified,
        # split is executed in two stages.
        train_series, train_labels, val_series, _ = iterative_train_test_split(
            series, labels, test_size=(1/len(series)) \
                * 2
        )
        _, _, train_series, train_labels = iterative_train_test_split(
            train_series, train_labels, test_size=(1/len(series)) \
                * 2
        )
        
    else:
        train_series, _, val_series, _ = iterative_train_test_split(
            series, labels, test_size=0.2
        )

    # 2 dimensions -> 1 dimension
    train_series, val_series = train_series.flatten(), val_series.flatten()
    # train_series = [
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557',
        # '1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557'
    # ]
    print(f"Train size: {len(train_series)}, Val size: {len(val_series)}")

    # train_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(train_series)].reset_index(drop=True)
    # val_image_path_df = \
    #     image_path_df[image_path_df['series_id'] \
    #     .isin(val_series)].reset_index(drop=True)
        
    # train_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(train_series)].set_index('SeriesInstanceUID')
    # val_label_df = label_df[label_df['SeriesInstanceUID']
    #     .isin(val_series)].set_index('SeriesInstanceUID')

    # Datasets
    train_dataset = BaseDataset(
        series_list=train_series,
        transforms=train_transform
    )
    val_dataset = BaseDataset(
        series_list=val_series,
        transforms=train_transform # or tta_transform
    )
    
    # Dataloaders
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=CFG.batch_size,
        shuffle=True,
        num_workers=0
    )
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=CFG.batch_size,
        shuffle=False,
        num_workers=0
    )

    return  train_dataloader, val_dataloader


In [20]:
sample = torch.tensor([0, 1, 2, 0.5, 0.22, 0.4, 0.99, 0.000001, -1, -100, 10000])
sample = torch.clip(sample, min=0, max=1).to(torch.float16)
sample


tensor([0.0000e+00, 1.0000e+00, 1.0000e+00, 5.0000e-01, 2.1997e-01, 3.9990e-01,
        9.9023e-01, 1.0133e-06, 0.0000e+00, 0.0000e+00, 1.0000e+00],
       dtype=torch.float16)

In [21]:
train_dataloader, val_dataloader = build_dataloaders()


Train size: 3478, Val size: 870


In [22]:
# _, ax = plt.subplots(1, 2, figsize=(12, 6))

# # 元の画像とどのくらい違いがあるかを確認

# # 元の画像(.npy)
# src = np.load(f'../series_npy/{CFG.image_size}/1.2.826.0.1.3680043.8.498.10034081836061566510187499603024895557/00012.npy')
# print(np.unique(src))
# ax[0].imshow(src)

# # Datasetから取り出した画像
# images, _ = train_dataset[0]
# image = images[8].numpy()  # shape: [H, W]

# # 0-1のfloatなら0-255に変換
# if image.max() <= 1.0:
#     image = (image * 255).astype(np.uint8)
# else:
#     image = image.astype(np.uint8)

# ax[1].imshow(image)


In [23]:
# pil_image = Image.fromarray(image)
# display(pil_image)


# 7. Functions

In [24]:
# count execution time for one epoch
def count_time(start:float) -> float:
    
    elapsed_time = time.time() - start
    elapsed_time /= 60
    
    return elapsed_time


In [25]:
# to save model, optimizer, scheduler
def save_checkpoint(model, optimizer, scheduler, path=""):
    
    path = path.replace('\\', '/')
    
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler is not None else None,
    }
    torch.save(checkpoint, path)

# to load model, optimizer, scheduler
def load_checkpoint(model, optimizer, scheduler, path=""):
    
    path = path.replace('\\', '/')
    
    checkpoint = torch.load(path)
    
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    if scheduler is not None and checkpoint['scheduler_state_dict'] is not None:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    
    return model, optimizer, scheduler


In [26]:
# to log losses to W&B
def log_by_wandb(time, losses):
    epoch_data = {
        'time': time,
        'loss': losses,
    }
    wandb.log(epoch_data)


In [27]:
# to log checkpoint
def log_artifact(run=run, artifact=artifact, checkpoint_path=""):
    artifact.add_file(checkpoint_path)
    run.log_artifact(artifact)
    print('Artifact was logged to W&B')


# 8. Training

In [28]:
def train_one_epoch(epoch: int) -> Tuple[float, float]:
    
    print(f'----- Epoch {epoch + 1} -----')
    
    # Training
    train_losses = []
    model.train()
    
    for images, meta, labels in tqdm(train_dataloader):    
        images = images.to(device)
        meta = meta.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        with autocast(device_type=CFG.device):
            outputs = model(images, meta)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
    
    mean_train_loss = np.mean(train_losses)
    print(f'Last output:', outputs[0])
    print(f'Mean Train Loss: {mean_train_loss:.4f}')
    
    # Validation
    val_losses = []
    model.eval()
    
    with torch.no_grad():
        for images, meta, labels in tqdm(val_dataloader):
            images = images.to(device)
            meta = meta.to(device)
            labels = labels.to(device)
            with autocast(device_type=CFG.device):
                outputs = model(images, meta)
                loss = criterion(outputs, labels)
                val_losses.append(loss.item())
        
    mean_val_loss = np.mean(val_losses)
    print(f'Mean Validation Loss: {mean_val_loss:.4f}')
        
    scheduler.step()
        
    return mean_train_loss, mean_val_loss


In [29]:
def main():
    
    os.makedirs(CFG.save_dir, exist_ok=True)
    best_val_loss = np.inf
    
    for epoch in range(CFG.num_epochs):
        
        # Train & Validation
        start_time = time.time()
        train_loss, val_loss = train_one_epoch(epoch)
        elapsed_time = count_time(start_time)
        print(f'Elapsed time: {elapsed_time}')
        
        # Log to W&B
        if CFG.use_wandb:
            losses = {
                'train_loss': train_loss,
                'val_loss': val_loss
            }
            log_by_wandb(elapsed_time, losses)
        
        # Save all checkpoints
        checkpoint_path = os.path.join(
            CFG.save_dir,
            f'checkpoint_{date_time}.pth'
        )
        checkpoint_path = checkpoint_path.replace('\\', '/')
        save_checkpoint(model, optimizer, scheduler, checkpoint_path)
        print(f'Checkpoint saved at {checkpoint_path}')
        
        # Save best checkpoint
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_checkpoint_path = os.path.join(
                CFG.save_dir,
                f'best_checkpoint_{date_time}.pth'
            )
            best_checkpoint_path = best_checkpoint_path.replace('\\', '/')
            save_checkpoint(model, optimizer, scheduler, best_checkpoint_path)
            print(f'Best checkpoint updated at {best_checkpoint_path}')
    
    # Log artifact to W&B
    if CFG.use_wandb:
        log_artifact(run, artifact, best_checkpoint_path)


In [30]:
main()


----- Epoch 1 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.5843e-04, 2.3055e-04, 1.7536e-04, 1.4997e-04, 2.5129e-04, 3.5548e-04,
        2.6941e-04, 3.4738e-04, 3.3021e-04, 2.1148e-04, 1.9872e-04, 2.1315e-04,
        8.8871e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.7012


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 36.93630487918854
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 2 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.8440e-05, 2.2113e-05, 1.0639e-04, 4.2319e-05, 5.1856e-05, 6.8665e-05,
        1.1593e-04, 3.8505e-05, 6.8128e-05, 4.3631e-05, 7.7844e-05, 5.6922e-05,
        2.9564e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6933


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 35.02141271034876
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 3 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([4.1664e-05, 9.3579e-06, 1.4961e-05, 2.1458e-05, 2.5690e-05, 1.2696e-05,
        1.1921e-05, 1.0610e-05, 2.2650e-05, 2.3544e-05, 3.7372e-05, 8.4043e-06,
        7.8678e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 35.43645524183909
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 4 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.3314e-06, 1.8775e-05, 5.9605e-06, 1.2100e-05, 1.5080e-05, 2.8431e-05,
        6.9737e-06, 2.4319e-05, 1.8954e-05, 1.2696e-05, 1.7107e-05, 2.7299e-05,
        6.0797e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 36.61913451353709
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 5 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([9.5963e-06, 9.0599e-06, 5.3048e-06, 1.3709e-05, 2.3246e-06, 2.9802e-06,
        1.0073e-05, 6.3181e-06, 6.8545e-06, 8.3447e-06, 1.1027e-05, 5.8413e-06,
        3.5763e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 37.09324551820755
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 6 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.6689e-06, 2.6226e-06, 1.4901e-06, 1.6689e-06, 3.0398e-06, 2.3246e-06,
        2.4438e-06, 3.0398e-06, 3.1590e-06, 3.1590e-06, 2.6226e-06, 2.5630e-06,
        3.0398e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 37.11773673693339
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 7 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.2815e-05, 1.7643e-05, 2.0444e-05, 9.8348e-06, 2.6703e-05, 2.6047e-05,
        1.7762e-05, 1.8179e-05, 5.8770e-05, 6.0201e-06, 1.7107e-05, 2.0802e-05,
        1.8775e-05], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6932
Elapsed time: 36.26028167804082
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 8 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.6890e-06, 5.9605e-06, 5.0664e-06, 4.7088e-06, 9.9540e-06, 4.8876e-06,
        3.5167e-06, 4.2319e-06, 1.7881e-06, 7.7486e-06, 5.3644e-06, 5.4836e-06,
        5.7817e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 35.64291685024897
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 9 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.0729e-06, 7.7486e-07, 1.0729e-06, 5.9605e-07, 5.3644e-07, 1.1325e-06,
        4.1127e-06, 1.6093e-06, 2.0862e-06, 1.7881e-06, 1.1921e-06, 9.5367e-07,
        2.5630e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6932


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.210670407613115
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 10 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.2517e-06, 2.7418e-06, 1.3709e-06, 3.0398e-06, 7.4506e-06, 2.6226e-06,
        2.3246e-06, 2.9802e-06, 1.5497e-06, 2.0266e-06, 1.6093e-06, 2.8610e-06,
        1.3113e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.23891297181447
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 11 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.6822e-06, 2.3842e-06, 2.1458e-06, 2.9802e-06, 1.9073e-06, 1.6689e-06,
        2.2650e-06, 2.0862e-06, 7.7486e-07, 2.1458e-06, 1.0729e-06, 4.8876e-06,
        2.0862e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.710558672746025
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 12 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.7881e-07, 5.9605e-08, 3.5763e-07, 3.5763e-07, 1.7881e-07, 1.7881e-07,
        2.9802e-07, 1.1921e-07, 1.1921e-07, 5.9605e-08, 1.1921e-07, 5.9605e-08,
        1.7881e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.18319597641627
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 13 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.0729e-06, 5.3644e-07, 1.4305e-06, 7.1526e-07, 1.3113e-06, 5.9605e-07,
        1.4305e-06, 1.3709e-06, 8.3447e-07, 4.1723e-07, 3.5763e-07, 5.9605e-07,
        1.1325e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.303755009174346
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 14 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([7.7486e-07, 9.5367e-07, 1.4901e-06, 1.6093e-06, 7.1526e-07, 1.1921e-06,
        7.1526e-07, 9.5367e-07, 8.9407e-07, 1.0729e-06, 2.9802e-07, 5.9605e-07,
        8.3447e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.68641512393951
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
----- Epoch 15 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.6093e-06, 1.9670e-06, 1.3709e-06, 1.6689e-06, 1.1325e-06, 2.3842e-06,
        7.7486e-07, 7.7486e-07, 1.0729e-06, 1.3113e-06, 1.3709e-06, 1.4305e-06,
        1.5497e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.07976760069529
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 16 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.7881e-07, 2.3842e-07, 4.7684e-07, 2.3842e-07, 1.7881e-07, 7.1526e-07,
        2.9802e-07, 2.3842e-07, 1.7881e-07, 2.3842e-07, 1.7881e-07, 1.7881e-07,
        1.1921e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 35.92095851500829
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 17 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([4.7684e-07, 4.1723e-07, 2.3842e-07, 3.5763e-07, 1.1921e-07, 5.9605e-07,
        2.3842e-07, 2.3842e-07, 4.1723e-07, 5.3644e-07, 2.3842e-07, 2.9802e-07,
        3.5763e-07], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.07723657687505
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 18 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([2.9206e-06, 3.6359e-06, 2.5630e-06, 2.4438e-06, 4.2319e-06, 1.1921e-06,
        2.6822e-06, 1.9073e-06, 2.5034e-06, 2.2054e-06, 5.0664e-06, 2.5034e-06,
        1.9670e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.087802962462106
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 19 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([5.9605e-08, 1.1921e-07, 0.0000e+00, 5.9605e-08, 5.9605e-08, 5.9605e-08,
        5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08, 5.9605e-08, 1.1921e-07,
        5.9605e-08], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 36.0509335398674
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
----- Epoch 20 -----


  0%|          | 0/696 [00:00<?, ?it/s]

Last output: tensor([1.3113e-06, 2.9802e-07, 7.7486e-07, 8.3447e-07, 1.4901e-06, 1.2517e-06,
        2.0862e-06, 7.1526e-07, 8.3447e-07, 1.6093e-06, 1.1921e-06, 1.1921e-06,
        1.2517e-06], device='cuda:0', dtype=torch.float16,
       grad_fn=<SelectBackward0>)
Mean Train Loss: 0.6931


  0%|          | 0/174 [00:00<?, ?it/s]

Mean Validation Loss: 0.6931
Elapsed time: 34.32629758516948
Checkpoint saved at ../results/swin-s-meta/checkpoint_2025-09-27_00-20-44.pth
Best checkpoint updated at ../results/swin-s-meta/best_checkpoint_2025-09-27_00-20-44.pth
Artifact was logged to W&B


In [31]:
if CFG.use_wandb:
    run.finish()


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
time,█▃▄▇██▆▄▆▆▇▆▆▇▅▅▅▅▅▁

0,1
time,34.3263
