In [38]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv

In [39]:
import torch
print(torch.cuda.device_count())
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

1
0 Tesla P100-PCIE-16GB


1
0 Tesla P100-PCIE-16GB

In [40]:
import os, zipfile, random, shutil, glob
from pathlib import Path

random.seed(42)

# Paths
train_zip = "/kaggle/input/dogs-vs-cats/train.zip"
test_zip  = "/kaggle/input/dogs-vs-cats/test1.zip"
work_dir  = "/kaggle/working/dogs_cats"

raw_train = f"{work_dir}/raw_train"
raw_test  = f"{work_dir}/raw_test"

os.makedirs(raw_train, exist_ok=True)
os.makedirs(raw_test, exist_ok=True)

# Unzip
with zipfile.ZipFile(train_zip, 'r') as z:
    z.extractall(raw_train)

with zipfile.ZipFile(test_zip, 'r') as z:
    z.extractall(raw_test)

# The Kaggle dataset usually extracts to raw_train/train/*.jpg
train_imgs = sorted(glob.glob(f"{raw_train}/train/*.jpg"))

# Build folder structure for ImageFolder-like loading
split_root = f"{work_dir}/split"
train_out = f"{split_root}/train"
val_out   = f"{split_root}/val"

for c in ["cat", "dog"]:
    os.makedirs(f"{train_out}/{c}", exist_ok=True)
    os.makedirs(f"{val_out}/{c}", exist_ok=True)

# Separate by label
cats = [p for p in train_imgs if Path(p).name.startswith("cat.")]
dogs = [p for p in train_imgs if Path(p).name.startswith("dog.")]

# Shuffle
random.shuffle(cats)
random.shuffle(dogs)

# 80/20 split
def split_list(lst, val_frac=0.2):
    n_val = int(len(lst) * val_frac)
    return lst[n_val:], lst[:n_val]

cats_train, cats_val = split_list(cats, 0.2)
dogs_train, dogs_val = split_list(dogs, 0.2)

# Copy files
def copy_files(files, dest):
    for p in files:
        shutil.copy2(p, dest)

copy_files(cats_train, f"{train_out}/cat")
copy_files(dogs_train, f"{train_out}/dog")
copy_files(cats_val,   f"{val_out}/cat")
copy_files(dogs_val,   f"{val_out}/dog")

print("Train cats:", len(cats_train), "Train dogs:", len(dogs_train))
print("Val cats:", len(cats_val), "Val dogs:", len(dogs_val))
print("Split dir:", split_root)

Train cats: 10000 Train dogs: 10000
Val cats: 2500 Val dogs: 2500
Split dir: /kaggle/working/dogs_cats/split


Train cats: 10000 Train dogs: 10000
Val cats: 2500 Val dogs: 2500
Split dir: /kaggle/working/dogs_cats/split

In [41]:
!pip -q install timm albumentations opencv-python

In [42]:
import os, zipfile, random, shutil, glob, math, time
from dataclasses import dataclass
from pathlib import Path
from typing import List, Tuple

import cv2
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch import ToTensorV2

import timm
from timm.data import Mixup
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.utils import ModelEmaV2
from timm.scheduler import CosineLRScheduler

print("CUDA:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

CUDA: True
GPU: Tesla P100-PCIE-16GB


CUDA: True
GPU: Tesla P100-PCIE-16GB

In [43]:
# =========================
# Cell 3 - Config
# =========================
@dataclass
class CFG:
    # Kaggle input zips
    train_zip: str = "/kaggle/input/dogs-vs-cats/train.zip"
    test_zip: str  = "/kaggle/input/dogs-vs-cats/test1.zip"

    # Working dirs
    work_dir: str = "/kaggle/working/dogs_cats"
    split_root: str = "/kaggle/working/dogs_cats/split"

    # Data
    img_size: int = 224
    num_classes: int = 2

    # Model: try "convnext_tiny", "convnext_small", "convnext_base"
    model_name: str = "convnext_small"
    drop_path_rate: float = 0.05

    # Training
    epochs: int = 100
    batch_size: int = 64   # good старт for small; reduce if OOM
    num_workers: int = 6

    # Optim
    lr: float = 3e-4
    weight_decay: float = 0.05

    # Scheduler
    warmup_epochs: int = 5
    min_lr: float = 1e-6

    # Mixup / CutMix (moderate)
    mixup_alpha: float = 0.1
    cutmix_alpha: float = 0.2
    mix_prob: float = 1.0
    switch_prob: float = 0.5
    mix_mode: str = "batch"
    
    # Delay Mixup/CutMix to improve peak accuracy
    mix_start_epoch: int = 5
    
    # Label smoothing (used also by Mixup helper for targets)
    label_smoothing: float = 0.05

    # EMA
    use_ema: bool = False
    ema_decay: float = 0.9999
    
    # AMP
    amp: bool = True

    seed: int = 42
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

    log_every: int = 50
    save_path: str = "/kaggle/working/best_ema.pt"

cfg = CFG()
cfg


CFG(train_zip='/kaggle/input/dogs-vs-cats/train.zip', test_zip='/kaggle/input/dogs-vs-cats/test1.zip', work_dir='/kaggle/working/dogs_cats', split_root='/kaggle/working/dogs_cats/split', img_size=224, num_classes=2, model_name='convnext_small', drop_path_rate=0.05, epochs=100, batch_size=64, num_workers=6, lr=0.0003, weight_decay=0.05, warmup_epochs=5, min_lr=1e-06, mixup_alpha=0.1, cutmix_alpha=0.2, mix_prob=1.0, switch_prob=0.5, mix_mode='batch', mix_start_epoch=5, label_smoothing=0.05, use_ema=False, ema_decay=0.9999, amp=True, seed=42, device='cuda', log_every=50, save_path='/kaggle/working/best_ema.pt')

CFG(train_zip='/kaggle/input/dogs-vs-cats/train.zip', test_zip='/kaggle/input/dogs-vs-cats/test1.zip', work_dir='/kaggle/working/dogs_cats', split_root='/kaggle/working/dogs_cats/split', img_size=224, num_classes=2, model_name='convnext_small', drop_path_rate=0.05, epochs=100, batch_size=64, num_workers=6, lr=0.0003, weight_decay=0.05, warmup_epochs=5, min_lr=1e-06, mixup_alpha=0.1, cutmix_alpha=0.2, mix_prob=1.0, switch_prob=0.5, mix_mode='batch', mix_start_epoch=5, label_smoothing=0.05, use_ema=False, ema_decay=0.9999, amp=True, seed=42, device='cuda', log_every=50, save_path='/kaggle/working/best_ema.pt')

In [44]:
# =========================
# Cell 4 - Repro + speed
# =========================
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(cfg.seed)
torch.backends.cudnn.benchmark = True


In [45]:
# =========================
# Cell 5 - Prepare data from zips + make your exact split
#         (10000 train + 2500 val per class)
# =========================
def prepare_split_12500_per_class(cfg: CFG, rebuild: bool = False):
    raw_train = f"{cfg.work_dir}/raw_train"
    raw_test  = f"{cfg.work_dir}/raw_test"

    train_out = f"{cfg.split_root}/train"
    val_out   = f"{cfg.split_root}/val"

    # If already prepared, skip unless rebuild=True
    if (not rebuild) and os.path.isdir(cfg.split_root) and \
       os.path.isdir(f"{train_out}/cat") and os.path.isdir(f"{val_out}/dog"):
        # quick count sanity
        train_c = len(glob.glob(f"{train_out}/cat/*"))
        train_d = len(glob.glob(f"{train_out}/dog/*"))
        val_c   = len(glob.glob(f"{val_out}/cat/*"))
        val_d   = len(glob.glob(f"{val_out}/dog/*"))
        print("Split already exists.")
        print("Train cats:", train_c, "Train dogs:", train_d)
        print("Val cats:", val_c, "Val dogs:", val_d)
        return cfg.split_root

    # Clean old
    if os.path.isdir(cfg.work_dir):
        shutil.rmtree(cfg.work_dir)
    os.makedirs(raw_train, exist_ok=True)
    os.makedirs(raw_test, exist_ok=True)

    # Unzip train/test
    with zipfile.ZipFile(cfg.train_zip, 'r') as z:
        z.extractall(raw_train)

    with zipfile.ZipFile(cfg.test_zip, 'r') as z:
        z.extractall(raw_test)

    # Train images path in this dataset
    train_imgs = sorted(glob.glob(f"{raw_train}/train/*.jpg"))
    if len(train_imgs) == 0:
        raise RuntimeError("No train images found after unzip. Check zip structure.")

    # Create folders
    for c in ["cat", "dog"]:
        os.makedirs(f"{train_out}/{c}", exist_ok=True)
        os.makedirs(f"{val_out}/{c}", exist_ok=True)

    # Separate by label
    cats = [p for p in train_imgs if Path(p).name.startswith("cat.")]
    dogs = [p for p in train_imgs if Path(p).name.startswith("dog.")]

    random.shuffle(cats)
    random.shuffle(dogs)

    # Ensure we have at least 12,500 per class
    need = 12500
    if len(cats) < need or len(dogs) < need:
        raise RuntimeError(f"Not enough images per class for 12,500 cap. "
                           f"Cats={len(cats)}, Dogs={len(dogs)}")

    cats = cats[:need]
    dogs = dogs[:need]

    # 10,000 train + 2,500 val
    cats_train, cats_val = cats[:10000], cats[10000:]
    dogs_train, dogs_val = dogs[:10000], dogs[10000:]

    def copy_files(files, dest):
        for p in files:
            shutil.copy2(p, dest)

    copy_files(cats_train, f"{train_out}/cat")
    copy_files(dogs_train, f"{train_out}/dog")
    copy_files(cats_val,   f"{val_out}/cat")
    copy_files(dogs_val,   f"{val_out}/dog")

    print("Train cats:", len(cats_train), "Train dogs:", len(dogs_train))
    print("Val cats:", len(cats_val), "Val dogs:", len(dogs_val))
    print("Split dir:", cfg.split_root)

    return cfg.split_root

prepare_split_12500_per_class(cfg, rebuild=False)


Split already exists.
Train cats: 10000 Train dogs: 10000
Val cats: 2500 Val dogs: 2500


'/kaggle/working/dogs_cats/split'

Split already exists.
Train cats: 10000 Train dogs: 10000
Val cats: 2500 Val dogs: 2500
'/kaggle/working/dogs_cats/split'

In [46]:
# =========================
# Cell 6 - Dataset + Albumentations
# =========================
class CatsDogsDataset(Dataset):
    def __init__(self, root: str, split: str, transform=None):
        self.transform = transform
        split_dir = os.path.join(root, split)
        class_names = ["cat", "dog"]

        self.samples: List[Tuple[str, int]] = []
        for idx, cname in enumerate(class_names):
            cdir = os.path.join(split_dir, cname)
            if not os.path.isdir(cdir):
                continue
            for fname in os.listdir(cdir):
                fpath = os.path.join(cdir, fname)
                if os.path.isfile(fpath):
                    self.samples.append((fpath, idx))

        if len(self.samples) == 0:
            raise RuntimeError(
                f"No images found in {split_dir}. Expected 'cat' and 'dog' subfolders."
            )

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, i):
        path, label = self.samples[i]
        img = cv2.imread(path)
        if img is None:
            img = np.zeros((cfg.img_size, cfg.img_size, 3), dtype=np.uint8)
        else:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.transform:
            img = self.transform(image=img)["image"]

        return img, label


def build_transforms(img_size: int):
    mean = (0.485, 0.456, 0.406)
    std  = (0.229, 0.224, 0.225)

    train_tf = A.Compose([
        A.RandomResizedCrop(
            size=(img_size, img_size),
            scale=(0.7, 1.0),
            ratio=(0.75, 1.33),
            p=1.0
        ),
        A.HorizontalFlip(p=0.5),
        A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
        A.RandomBrightnessContrast(p=0.3),
        A.HueSaturationValue(p=0.3),
        A.GaussianBlur(blur_limit=(3, 5), p=0.1),
        A.CoarseDropout(
            num_holes_range=(1, 8),
            hole_height_range=(0.05, 0.12),  # fraction of image height
            hole_width_range=(0.05, 0.12),   # fraction of image width
            fill=0,
            p=0.2
        ),
        A.Normalize(mean=mean, std=std),
            ToTensorV2(),
    ])

    val_tf = A.Compose([
        A.Resize(height=img_size + 32, width=img_size + 32),
        A.CenterCrop(height=img_size, width=img_size),
        A.Normalize(mean=mean, std=std),
        ToTensorV2(),
    ])

    return train_tf, val_tf



train_tf, val_tf = build_transforms(cfg.img_size)

train_ds = CatsDogsDataset(cfg.split_root, "train", transform=train_tf)
val_ds   = CatsDogsDataset(cfg.split_root, "val",   transform=val_tf)

train_loader = DataLoader(
    train_ds, batch_size=cfg.batch_size, shuffle=True,
    num_workers=cfg.num_workers, pin_memory=True, drop_last=True
)
val_loader = DataLoader(
    val_ds, batch_size=cfg.batch_size * 2, shuffle=False,
    num_workers=cfg.num_workers, pin_memory=True
)

print("Train size:", len(train_ds), "Val size:", len(val_ds))
x, y = next(iter(train_loader))
print("Batch:", x.shape, y[:8].tolist())


Train size: 20000 Val size: 5000
Batch: torch.Size([64, 3, 224, 224]) [0, 0, 1, 0, 1, 0, 0, 1]


Train size: 20000 Val size: 5000
Batch: torch.Size([64, 3, 224, 224]) [0, 0, 1, 0, 1, 0, 0, 1]

In [47]:
# =========================
# Cell 7 - Model, Mixup/CutMix, Loss, Optim, Scheduler, EMA
# =========================
model = timm.create_model(
    cfg.model_name,
    pretrained=False,
    num_classes=cfg.num_classes,
    drop_path_rate=cfg.drop_path_rate
).to(cfg.device)

model = model.to(memory_format=torch.channels_last)
  
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=cfg.lr,
    weight_decay=cfg.weight_decay
)

# Mixup/CutMix helper
mixup_fn = None
if cfg.mixup_alpha > 0 or cfg.cutmix_alpha > 0:
    mixup_fn = Mixup(
        mixup_alpha=cfg.mixup_alpha,
        cutmix_alpha=cfg.cutmix_alpha,
        prob=cfg.mix_prob,
        switch_prob=cfg.switch_prob,
        mode=cfg.mix_mode,
        label_smoothing=cfg.label_smoothing,
        num_classes=cfg.num_classes
    )

# Training loss
train_loss_fn = SoftTargetCrossEntropy() if mixup_fn is not None \
                else LabelSmoothingCrossEntropy(smoothing=cfg.label_smoothing)

# Validation loss (hard labels)
val_loss_fn = nn.CrossEntropyLoss()

# EMA
ema = ModelEmaV2(model, decay=cfg.ema_decay) if cfg.use_ema else None

# Scheduler: warmup + cosine over updates
steps_per_epoch = len(train_loader)
total_updates = cfg.epochs * steps_per_epoch
warmup_updates = cfg.warmup_epochs * steps_per_epoch

scheduler = CosineLRScheduler(
    optimizer,
    t_initial=total_updates,
    lr_min=cfg.min_lr,
    warmup_t=warmup_updates,
    warmup_lr_init=cfg.min_lr,
    cycle_limit=1,
    t_in_epochs=False
)

hard_train_loss_fn = LabelSmoothingCrossEntropy(smoothing=cfg.label_smoothing)
soft_train_loss_fn = SoftTargetCrossEntropy()

scaler = torch.amp.GradScaler("cuda", enabled=(cfg.amp and cfg.device.startswith("cuda")))

print("Model:", cfg.model_name)
print("Params (M):", sum(p.numel() for p in model.parameters()) / 1e6)
print("Mixup/CutMix:", "ON" if mixup_fn is not None else "OFF")
print("Total updates:", total_updates, "Warmup updates:", warmup_updates)


Model: convnext_small
Params (M): 49.456226
Mixup/CutMix: ON
Total updates: 31200 Warmup updates: 1560


Model: convnext_small
Params (M): 49.456226
Mixup/CutMix: ON
Total updates: 31200 Warmup updates: 1560

In [48]:
# =========================
# Cell 8 - Train / Eval loops
# =========================
def accuracy(logits: torch.Tensor, targets: torch.Tensor) -> float:
    preds = torch.argmax(logits, dim=1)
    return (preds == targets).float().mean().item()


def train_one_epoch(epoch: int):
    model.train()
    running_loss, running_acc, n = 0.0, 0.0, 0

    active_mixup = mixup_fn if (mixup_fn is not None and epoch >= cfg.mix_start_epoch) else None

    for step, (images, targets) in enumerate(train_loader):
        images = images.to(cfg.device, non_blocking=True)
        targets = targets.to(cfg.device, non_blocking=True)

        if active_mixup is not None:
            images, targets = active_mixup(images, targets)

        optimizer.zero_grad(set_to_none=True)

        with torch.amp.autocast("cuda", enabled=scaler.is_enabled()):
            logits = model(images)
            loss = soft_train_loss_fn(logits, targets) if active_mixup is not None \
                   else hard_train_loss_fn(logits, targets)

        if scaler.is_enabled():
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        if ema is not None:
            ema.update(model)

        global_update = epoch * steps_per_epoch + step + 1
        scheduler.step_update(global_update)

        bs = images.size(0)
        running_loss += loss.item() * bs
        n += bs

        # only meaningful without mixup
        if active_mixup is None:
            running_acc += (logits.argmax(1) == targets).float().sum().item()

        if (step + 1) % cfg.log_every == 0:
            avg_loss = running_loss / n
            if active_mixup is None:
                avg_acc = running_acc / n
                print(f"  step {step+1}/{steps_per_epoch} - loss {avg_loss:.4f} - acc {avg_acc:.4f}")
            else:
                print(f"  step {step+1}/{steps_per_epoch} - loss {avg_loss:.4f}")

    epoch_loss = running_loss / n
    epoch_acc = (running_acc / n) if active_mixup is None else float("nan")
    return epoch_loss, epoch_acc

@torch.no_grad()
def evaluate(eval_model):
    eval_model.eval()
    running_loss = 0.0
    running_acc = 0.0
    n = 0

    for images, targets in val_loader:
        images = images.to(cfg.device, non_blocking=True).to(memory_format=torch.channels_last)

        targets = targets.to(cfg.device, non_blocking=True)

        logits = eval_model(images)
        loss = val_loss_fn(logits, targets)

        bs = images.size(0)
        running_loss += loss.item() * bs
        running_acc += accuracy(logits, targets) * bs
        n += bs

    return running_loss / n, running_acc / n

In [49]:
# =========================
# Cell 9 - Training run
# =========================
best_acc = 0.0

print("Device:", cfg.device)
print(f"Train images: {len(train_ds)} | Val images: {len(val_ds)}")
print("")

for epoch in range(cfg.epochs):
    print({
        "model": cfg.model_name,
        "lr": cfg.lr,
        "batch": cfg.batch_size,
        "drop_path": cfg.drop_path_rate,
        "label_smoothing": cfg.label_smoothing,
        "mixup_alpha": cfg.mixup_alpha,
        "cutmix_alpha": cfg.cutmix_alpha,
    })
    print(f"Epoch {epoch+1}/{cfg.epochs}")

    t0 = time.time()
    train_loss, train_acc = train_one_epoch(epoch)

    eval_model = ema.module if ema is not None else model
    val_loss, val_acc = evaluate(eval_model)

    dt = time.time() - t0

    if math.isnan(train_acc):
        print(f"  train loss {train_loss:.4f} | val loss {val_loss:.4f} | val acc {val_acc:.4f} | {dt:.1f}s")
    else:
        print(f"  train loss {train_loss:.4f} | train acc {train_acc:.4f} | val loss {val_loss:.4f} | val acc {val_acc:.4f} | {dt:.1f}s")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            "model_name": cfg.model_name,
            "model_state": eval_model.state_dict(),
            "best_acc": best_acc,
            "cfg": cfg.__dict__
        }, cfg.save_path)
        print(f"  Saved best EMA checkpoint to {cfg.save_path} (acc={best_acc:.4f})")
    print("LR end of epoch:", optimizer.param_groups[0]["lr"])
    print("")

print("Best val acc:", best_acc)


Device: cuda
Train images: 20000 | Val images: 5000

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 1/100
  step 50/312 - loss 0.7011 - acc 0.5041
  step 100/312 - loss 0.7029 - acc 0.4983
  step 150/312 - loss 0.7053 - acc 0.5018
  step 200/312 - loss 0.7042 - acc 0.5081
  step 250/312 - loss 0.7020 - acc 0.5134
  step 300/312 - loss 0.7004 - acc 0.5193
  train loss 0.6998 | train acc 0.5208 | val loss 0.6710 | val acc 0.5774 | 345.8s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.5774)
LR end of epoch: 6.079999999999998e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 2/100
  step 50/312 - loss 0.6814 - acc 0.5816
  step 100/312 - loss 0.6852 - acc 0.5708
  step 150/312 - loss 0.6852 - acc 0.5674
  step 200/312 - loss 0.6821 - acc 0.5730
  step 250/312 - loss 0.6798 -

Device: cuda
Train images: 20000 | Val images: 5000

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 1/100
  step 50/312 - loss 0.7011 - acc 0.5041
  step 100/312 - loss 0.7029 - acc 0.4983
  step 150/312 - loss 0.7053 - acc 0.5018
  step 200/312 - loss 0.7042 - acc 0.5081
  step 250/312 - loss 0.7020 - acc 0.5134
  step 300/312 - loss 0.7004 - acc 0.5193
  train loss 0.6998 | train acc 0.5208 | val loss 0.6710 | val acc 0.5774 | 345.8s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.5774)
LR end of epoch: 6.079999999999998e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 2/100
  step 50/312 - loss 0.6814 - acc 0.5816
  step 100/312 - loss 0.6852 - acc 0.5708
  step 150/312 - loss 0.6852 - acc 0.5674
  step 200/312 - loss 0.6821 - acc 0.5730
  step 250/312 - loss 0.6798 - acc 0.5760
  step 300/312 - loss 0.6773 - acc 0.5790
  train loss 0.6768 | train acc 0.5793 | val loss 0.6516 | val acc 0.6018 | 345.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.6018)
LR end of epoch: 0.00012059999999999996

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 3/100
  step 50/312 - loss 0.6725 - acc 0.5928
  step 100/312 - loss 0.6680 - acc 0.5945
  step 150/312 - loss 0.6667 - acc 0.5986
  step 200/312 - loss 0.6677 - acc 0.5956
  step 250/312 - loss 0.6643 - acc 0.6018
  step 300/312 - loss 0.6607 - acc 0.6066
  train loss 0.6596 | train acc 0.6080 | val loss 0.6312 | val acc 0.6298 | 345.7s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.6298)
LR end of epoch: 0.00018039999999999997

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 4/100
  step 50/312 - loss 0.6389 - acc 0.6409
  step 100/312 - loss 0.6392 - acc 0.6427
  step 150/312 - loss 0.6386 - acc 0.6405
  step 200/312 - loss 0.6356 - acc 0.6449
  step 250/312 - loss 0.6303 - acc 0.6519
  step 300/312 - loss 0.6288 - acc 0.6550
  train loss 0.6292 | train acc 0.6543 | val loss 0.6126 | val acc 0.6548 | 345.3s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.6548)
LR end of epoch: 0.00024019999999999993

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 5/100
  step 50/312 - loss 0.6087 - acc 0.6778
  step 100/312 - loss 0.6098 - acc 0.6763
  step 150/312 - loss 0.6079 - acc 0.6773
  step 200/312 - loss 0.6060 - acc 0.6791
  step 250/312 - loss 0.6061 - acc 0.6789
  step 300/312 - loss 0.6038 - acc 0.6821
  train loss 0.6034 | train acc 0.6830 | val loss 0.5581 | val acc 0.7044 | 345.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7044)
LR end of epoch: 0.00029815940691897306

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 6/100
  step 50/312 - loss 0.6294
  step 100/312 - loss 0.6345
  step 150/312 - loss 0.6253
  step 200/312 - loss 0.6236
  step 250/312 - loss 0.6223
  step 300/312 - loss 0.6209
  train loss 0.6198 | val loss 0.5408 | val acc 0.7200 | 346.2s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7200)
LR end of epoch: 0.00029735194398393893

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 7/100
  step 50/312 - loss 0.6175
  step 100/312 - loss 0.6143
  step 150/312 - loss 0.6106
  step 200/312 - loss 0.6100
  step 250/312 - loss 0.6095
  step 300/312 - loss 0.6091
  train loss 0.6091 | val loss 0.5486 | val acc 0.7290 | 345.4s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7290)
LR end of epoch: 0.0002963995559098427

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 8/100
  step 50/312 - loss 0.6070
  step 100/312 - loss 0.6024
  step 150/312 - loss 0.6050
  step 200/312 - loss 0.6006
  step 250/312 - loss 0.5999
  step 300/312 - loss 0.5997
  train loss 0.5989 | val loss 0.5187 | val acc 0.7398 | 345.7s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7398)
LR end of epoch: 0.00029530318258873034

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 9/100
  step 50/312 - loss 0.5810
  step 100/312 - loss 0.5839
  step 150/312 - loss 0.5810
  step 200/312 - loss 0.5853
  step 250/312 - loss 0.5879
  step 300/312 - loss 0.5872
  train loss 0.5876 | val loss 0.4951 | val acc 0.7626 | 345.4s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7626)
LR end of epoch: 0.00029406390600870296

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 10/100
  step 50/312 - loss 0.5791
  step 100/312 - loss 0.5770
  step 150/312 - loss 0.5786
  step 200/312 - loss 0.5746
  step 250/312 - loss 0.5741
  step 300/312 - loss 0.5751
  train loss 0.5750 | val loss 0.4838 | val acc 0.7764 | 345.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7764)
LR end of epoch: 0.0002926829491861254

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 11/100
  step 50/312 - loss 0.5487
  step 100/312 - loss 0.5603
  step 150/312 - loss 0.5678
  step 200/312 - loss 0.5675
  step 250/312 - loss 0.5635
  step 300/312 - loss 0.5604
  train loss 0.5610 | val loss 0.4754 | val acc 0.7814 | 345.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7814)
LR end of epoch: 0.0002911616749586567

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 12/100
  step 50/312 - loss 0.5760
  step 100/312 - loss 0.5639
  step 150/312 - loss 0.5616
  step 200/312 - loss 0.5612
  step 250/312 - loss 0.5612
  step 300/312 - loss 0.5618
  train loss 0.5622 | val loss 0.4590 | val acc 0.7882 | 345.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.7882)
LR end of epoch: 0.00028950158464029354

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 13/100
  step 50/312 - loss 0.5363
  step 100/312 - loss 0.5427
  step 150/312 - loss 0.5505
  step 200/312 - loss 0.5527
  step 250/312 - loss 0.5508
  step 300/312 - loss 0.5481
  train loss 0.5491 | val loss 0.4381 | val acc 0.8144 | 345.2s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8144)
LR end of epoch: 0.00028770431653975514

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 14/100
  step 50/312 - loss 0.5292
  step 100/312 - loss 0.5346
  step 150/312 - loss 0.5318
  step 200/312 - loss 0.5318
  step 250/312 - loss 0.5329
  step 300/312 - loss 0.5328
  train loss 0.5307 | val loss 0.4054 | val acc 0.8142 | 345.9s
LR end of epoch: 0.0002857716443436699

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 15/100
  step 50/312 - loss 0.5251
  step 100/312 - loss 0.5260
  step 150/312 - loss 0.5283
  step 200/312 - loss 0.5286
  step 250/312 - loss 0.5295
  step 300/312 - loss 0.5320
  train loss 0.5318 | val loss 0.3795 | val acc 0.8394 | 345.1s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8394)
LR end of epoch: 0.00028370547536616097

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 16/100
  step 50/312 - loss 0.5167
  step 100/312 - loss 0.5213
  step 150/312 - loss 0.5203
  step 200/312 - loss 0.5147
  step 250/312 - loss 0.5155
  step 300/312 - loss 0.5167
  train loss 0.5166 | val loss 0.3836 | val acc 0.8386 | 346.0s
LR end of epoch: 0.00028150784866655756

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 17/100
  step 50/312 - loss 0.5131
  step 100/312 - loss 0.5249
  step 150/312 - loss 0.5148
  step 200/312 - loss 0.5096
  step 250/312 - loss 0.5063
  step 300/312 - loss 0.5065
  train loss 0.5066 | val loss 0.3782 | val acc 0.8368 | 345.5s
LR end of epoch: 0.00027918093303708956

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 18/100
  step 50/312 - loss 0.4860
  step 100/312 - loss 0.4884
  step 150/312 - loss 0.4869
  step 200/312 - loss 0.4857
  step 250/312 - loss 0.4880
  step 300/312 - loss 0.4903
  train loss 0.4905 | val loss 0.3670 | val acc 0.8464 | 345.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8464)
LR end of epoch: 0.00027672702486255125

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 19/100
  step 50/312 - loss 0.5028
  step 100/312 - loss 0.5082
  step 150/312 - loss 0.5009
  step 200/312 - loss 0.4999
  step 250/312 - loss 0.4960
  step 300/312 - loss 0.4937
  train loss 0.4935 | val loss 0.3492 | val acc 0.8524 | 345.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8524)
LR end of epoch: 0.00027414854585404696

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 20/100
  step 50/312 - loss 0.5009
  step 100/312 - loss 0.5007
  step 150/312 - loss 0.4956
  step 200/312 - loss 0.4918
  step 250/312 - loss 0.4890
  step 300/312 - loss 0.4819
  train loss 0.4807 | val loss 0.3250 | val acc 0.8588 | 346.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8588)
LR end of epoch: 0.0002714480406590546

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 21/100
  step 50/312 - loss 0.4629
  step 100/312 - loss 0.4682
  step 150/312 - loss 0.4720
  step 200/312 - loss 0.4771
  step 250/312 - loss 0.4771
  step 300/312 - loss 0.4728
  train loss 0.4729 | val loss 0.3101 | val acc 0.8762 | 346.1s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8762)
LR end of epoch: 0.0002686281743501657

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 22/100
  step 50/312 - loss 0.4863
  step 100/312 - loss 0.4869
  step 150/312 - loss 0.4784
  step 200/312 - loss 0.4734
  step 250/312 - loss 0.4692
  step 300/312 - loss 0.4669
  train loss 0.4659 | val loss 0.2996 | val acc 0.8782 | 346.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8782)
LR end of epoch: 0.00026569172979498044

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 23/100
  step 50/312 - loss 0.4712
  step 100/312 - loss 0.4584
  step 150/312 - loss 0.4620
  step 200/312 - loss 0.4650
  step 250/312 - loss 0.4721
  step 300/312 - loss 0.4731
  train loss 0.4724 | val loss 0.3156 | val acc 0.8782 | 346.0s
LR end of epoch: 0.00026264160490975367

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 24/100
  step 50/312 - loss 0.4277
  step 100/312 - loss 0.4312
  step 150/312 - loss 0.4425
  step 200/312 - loss 0.4329
  step 250/312 - loss 0.4338
  step 300/312 - loss 0.4346
  train loss 0.4310 | val loss 0.2743 | val acc 0.8844 | 345.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8844)
LR end of epoch: 0.00025948080979950103

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 25/100
  step 50/312 - loss 0.4628
  step 100/312 - loss 0.4471
  step 150/312 - loss 0.4488
  step 200/312 - loss 0.4456
  step 250/312 - loss 0.4498
  step 300/312 - loss 0.4517
  train loss 0.4529 | val loss 0.2885 | val acc 0.8874 | 345.0s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8874)
LR end of epoch: 0.00025621246378738883

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 26/100
  step 50/312 - loss 0.4264
  step 100/312 - loss 0.4309
  step 150/312 - loss 0.4430
  step 200/312 - loss 0.4359
  step 250/312 - loss 0.4368
  step 300/312 - loss 0.4327
  train loss 0.4320 | val loss 0.2716 | val acc 0.8948 | 346.3s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.8948)
LR end of epoch: 0.00025283979233633894

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 27/100
  step 50/312 - loss 0.4492
  step 100/312 - loss 0.4252
  step 150/312 - loss 0.4238
  step 200/312 - loss 0.4232
  step 250/312 - loss 0.4268
  step 300/312 - loss 0.4282
  train loss 0.4286 | val loss 0.2910 | val acc 0.8888 | 345.3s
LR end of epoch: 0.0002493661238658859

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 28/100
  step 50/312 - loss 0.4045
  step 100/312 - loss 0.4247
  step 150/312 - loss 0.4139
  step 200/312 - loss 0.4102
  step 250/312 - loss 0.4087
  step 300/312 - loss 0.4121
  train loss 0.4099 | val loss 0.2486 | val acc 0.9096 | 345.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9096)
LR end of epoch: 0.0002457948864674291

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 29/100
  step 50/312 - loss 0.4183
  step 100/312 - loss 0.4148
  step 150/312 - loss 0.4164
  step 200/312 - loss 0.4174
  step 250/312 - loss 0.4206
  step 300/312 - loss 0.4255
  train loss 0.4241 | val loss 0.2440 | val acc 0.9076 | 346.2s
LR end of epoch: 0.00024212960452111996

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 30/100
  step 50/312 - loss 0.3939
  step 100/312 - loss 0.4042
  step 150/312 - loss 0.4014
  step 200/312 - loss 0.4067
  step 250/312 - loss 0.4096
  step 300/312 - loss 0.4103
  train loss 0.4106 | val loss 0.2395 | val acc 0.9158 | 346.1s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9158)
LR end of epoch: 0.0002383738952177247

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 31/100
  step 50/312 - loss 0.4266
  step 100/312 - loss 0.4132
  step 150/312 - loss 0.4040
  step 200/312 - loss 0.4067
  step 250/312 - loss 0.4080
  step 300/312 - loss 0.4072
  train loss 0.4081 | val loss 0.2431 | val acc 0.9118 | 345.9s
LR end of epoch: 0.00023453146498889348

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 32/100
  step 50/312 - loss 0.3986
  step 100/312 - loss 0.4087
  step 150/312 - loss 0.3906
  step 200/312 - loss 0.3893
  step 250/312 - loss 0.3909
  step 300/312 - loss 0.3918
  train loss 0.3915 | val loss 0.2279 | val acc 0.9198 | 345.2s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9198)
LR end of epoch: 0.00023060610584935996

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 33/100
  step 50/312 - loss 0.3726
  step 100/312 - loss 0.4144
  step 150/312 - loss 0.3979
  step 200/312 - loss 0.3877
  step 250/312 - loss 0.3907
  step 300/312 - loss 0.3938
  train loss 0.3924 | val loss 0.2069 | val acc 0.9214 | 345.2s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9214)
LR end of epoch: 0.00022660169165468046

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 34/100
  step 50/312 - loss 0.3742
  step 100/312 - loss 0.3914
  step 150/312 - loss 0.3948
  step 200/312 - loss 0.3935
  step 250/312 - loss 0.3934
  step 300/312 - loss 0.3953
  train loss 0.3939 | val loss 0.2108 | val acc 0.9166 | 345.4s
LR end of epoch: 0.00022252217427820638

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 35/100
  step 50/312 - loss 0.3772
  step 100/312 - loss 0.3718
  step 150/312 - loss 0.3777
  step 200/312 - loss 0.3774
  step 250/312 - loss 0.3722
  step 300/312 - loss 0.3748
  train loss 0.3734 | val loss 0.2064 | val acc 0.9266 | 345.9s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9266)
LR end of epoch: 0.0002183715797110622

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 36/100
  step 50/312 - loss 0.3523
  step 100/312 - loss 0.3615
  step 150/312 - loss 0.3721
  step 200/312 - loss 0.3758
  step 250/312 - loss 0.3755
  step 300/312 - loss 0.3718
  train loss 0.3710 | val loss 0.2158 | val acc 0.9204 | 345.7s
LR end of epoch: 0.00021415400408897832

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 37/100
  step 50/312 - loss 0.3398
  step 100/312 - loss 0.3676
  step 150/312 - loss 0.3705
  step 200/312 - loss 0.3724
  step 250/312 - loss 0.3772
  step 300/312 - loss 0.3776
  train loss 0.3783 | val loss 0.2242 | val acc 0.9258 | 345.8s
LR end of epoch: 0.00020987360964989964

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 38/100
  step 50/312 - loss 0.3561
  step 100/312 - loss 0.3710
  step 150/312 - loss 0.3736
  step 200/312 - loss 0.3844
  step 250/312 - loss 0.3754
  step 300/312 - loss 0.3720
  train loss 0.3719 | val loss 0.2174 | val acc 0.9258 | 346.2s
LR end of epoch: 0.0002055346206263593

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 39/100
  step 50/312 - loss 0.3524
  step 100/312 - loss 0.3571
  step 150/312 - loss 0.3668
  step 200/312 - loss 0.3682
  step 250/312 - loss 0.3652
  step 300/312 - loss 0.3570
  train loss 0.3586 | val loss 0.1923 | val acc 0.9322 | 346.1s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9322)
LR end of epoch: 0.00020114131907667107

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 40/100
  step 50/312 - loss 0.3368
  step 100/312 - loss 0.3618
  step 150/312 - loss 0.3554
  step 200/312 - loss 0.3606
  step 250/312 - loss 0.3628
  step 300/312 - loss 0.3587
  train loss 0.3608 | val loss 0.2117 | val acc 0.9296 | 345.8s
LR end of epoch: 0.0001966980406590546

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 41/100
  step 50/312 - loss 0.3372
  step 100/312 - loss 0.3483
  step 150/312 - loss 0.3538
  step 200/312 - loss 0.3591
  step 250/312 - loss 0.3559
  step 300/312 - loss 0.3551
  train loss 0.3557 | val loss 0.2047 | val acc 0.9304 | 344.8s
LR end of epoch: 0.00019220917035286475

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 42/100
  step 50/312 - loss 0.3521
  step 100/312 - loss 0.3497
  step 150/312 - loss 0.3422
  step 200/312 - loss 0.3529
  step 250/312 - loss 0.3566
  step 300/312 - loss 0.3580
  train loss 0.3573 | val loss 0.1884 | val acc 0.9376 | 345.7s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9376)
LR end of epoch: 0.00018767913813114574

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 43/100
  step 50/312 - loss 0.3162
  step 100/312 - loss 0.3412
  step 150/312 - loss 0.3425
  step 200/312 - loss 0.3473
  step 250/312 - loss 0.3447
  step 300/312 - loss 0.3478
  train loss 0.3466 | val loss 0.2066 | val acc 0.9280 | 345.5s
LR end of epoch: 0.00018311241458878307

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 44/100
  step 50/312 - loss 0.3406
  step 100/312 - loss 0.3546
  step 150/312 - loss 0.3547
  step 200/312 - loss 0.3566
  step 250/312 - loss 0.3554
  step 300/312 - loss 0.3509
  train loss 0.3490 | val loss 0.1781 | val acc 0.9342 | 345.9s
LR end of epoch: 0.0001785135065305658

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 45/100
  step 50/312 - loss 0.3345
  step 100/312 - loss 0.3523
  step 150/312 - loss 0.3562
  step 200/312 - loss 0.3607
  step 250/312 - loss 0.3660
  step 300/312 - loss 0.3595
  train loss 0.3605 | val loss 0.2097 | val acc 0.9288 | 345.1s
LR end of epoch: 0.00017388695252351449

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 46/100
  step 50/312 - loss 0.3409
  step 100/312 - loss 0.3257
  step 150/312 - loss 0.3325
  step 200/312 - loss 0.3344
  step 250/312 - loss 0.3467
  step 300/312 - loss 0.3489
  train loss 0.3499 | val loss 0.1986 | val acc 0.9376 | 346.2s
LR end of epoch: 0.00016923731841786345

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 47/100
  step 50/312 - loss 0.3298
  step 100/312 - loss 0.3383
  step 150/312 - loss 0.3474
  step 200/312 - loss 0.3440
  step 250/312 - loss 0.3450
  step 300/312 - loss 0.3499
  train loss 0.3492 | val loss 0.1799 | val acc 0.9342 | 345.9s
LR end of epoch: 0.0001645691928411179

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 48/100
  step 50/312 - loss 0.3051
  step 100/312 - loss 0.3186
  step 150/312 - loss 0.3258
  step 200/312 - loss 0.3345
  step 250/312 - loss 0.3391
  step 300/312 - loss 0.3346
  train loss 0.3328 | val loss 0.1851 | val acc 0.9354 | 346.1s
LR end of epoch: 0.00015988718266963235

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 49/100
  step 50/312 - loss 0.2935
  step 100/312 - loss 0.3184
  step 150/312 - loss 0.3241
  step 200/312 - loss 0.3320
  step 250/312 - loss 0.3374
  step 300/312 - loss 0.3323
  train loss 0.3341 | val loss 0.1861 | val acc 0.9394 | 345.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9394)
LR end of epoch: 0.00015519590848218014

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 50/100
  step 50/312 - loss 0.3216
  step 100/312 - loss 0.3340
  step 150/312 - loss 0.3340
  step 200/312 - loss 0.3396
  step 250/312 - loss 0.3392
  step 300/312 - loss 0.3351
  train loss 0.3368 | val loss 0.2013 | val acc 0.9342 | 345.7s
LR end of epoch: 0.00015049999999999997

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 51/100
  step 50/312 - loss 0.3464
  step 100/312 - loss 0.3393
  step 150/312 - loss 0.3353
  step 200/312 - loss 0.3369
  step 250/312 - loss 0.3355
  step 300/312 - loss 0.3487
  train loss 0.3486 | val loss 0.1921 | val acc 0.9368 | 345.7s
LR end of epoch: 0.00014580409151781983

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 52/100
  step 50/312 - loss 0.3946
  step 100/312 - loss 0.3767
  step 150/312 - loss 0.3791
  step 200/312 - loss 0.3730
  step 250/312 - loss 0.3691
  step 300/312 - loss 0.3585
  train loss 0.3580 | val loss 0.1995 | val acc 0.9344 | 346.0s
LR end of epoch: 0.00014111281733036765

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 53/100
  step 50/312 - loss 0.3376
  step 100/312 - loss 0.3472
  step 150/312 - loss 0.3343
  step 200/312 - loss 0.3286
  step 250/312 - loss 0.3286
  step 300/312 - loss 0.3232
  train loss 0.3238 | val loss 0.1814 | val acc 0.9366 | 345.4s
LR end of epoch: 0.00013643080715888212

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 54/100
  step 50/312 - loss 0.3423
  step 100/312 - loss 0.3465
  step 150/312 - loss 0.3207
  step 200/312 - loss 0.3298
  step 250/312 - loss 0.3256
  step 300/312 - loss 0.3259
  train loss 0.3250 | val loss 0.1777 | val acc 0.9430 | 346.2s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9430)
LR end of epoch: 0.00013176268158213652

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 55/100
  step 50/312 - loss 0.3803
  step 100/312 - loss 0.3491
  step 150/312 - loss 0.3442
  step 200/312 - loss 0.3380
  step 250/312 - loss 0.3354
  step 300/312 - loss 0.3354
  train loss 0.3362 | val loss 0.1857 | val acc 0.9420 | 345.5s
LR end of epoch: 0.00012711304747648546

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 56/100
  step 50/312 - loss 0.3689
  step 100/312 - loss 0.3595
  step 150/312 - loss 0.3529
  step 200/312 - loss 0.3360
  step 250/312 - loss 0.3327
  step 300/312 - loss 0.3267
  train loss 0.3265 | val loss 0.1713 | val acc 0.9408 | 345.6s
LR end of epoch: 0.00012248649346943414

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 57/100
  step 50/312 - loss 0.3069
  step 100/312 - loss 0.3204
  step 150/312 - loss 0.3339
  step 200/312 - loss 0.3427
  step 250/312 - loss 0.3479
  step 300/312 - loss 0.3500
  train loss 0.3473 | val loss 0.1741 | val acc 0.9430 | 345.5s
LR end of epoch: 0.0001178875854112169

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 58/100
  step 50/312 - loss 0.3442
  step 100/312 - loss 0.3329
  step 150/312 - loss 0.3291
  step 200/312 - loss 0.3267
  step 250/312 - loss 0.3251
  step 300/312 - loss 0.3192
  train loss 0.3178 | val loss 0.1716 | val acc 0.9424 | 345.6s
LR end of epoch: 0.00011332086186885422

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 59/100
  step 50/312 - loss 0.3495
  step 100/312 - loss 0.3154
  step 150/312 - loss 0.3198
  step 200/312 - loss 0.3254
  step 250/312 - loss 0.3253
  step 300/312 - loss 0.3232
  train loss 0.3216 | val loss 0.1793 | val acc 0.9382 | 345.8s
LR end of epoch: 0.00010879082964713522

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 60/100
  step 50/312 - loss 0.2933
  step 100/312 - loss 0.3033
  step 150/312 - loss 0.3192
  step 200/312 - loss 0.3285
  step 250/312 - loss 0.3306
  step 300/312 - loss 0.3259
  train loss 0.3279 | val loss 0.1720 | val acc 0.9418 | 346.2s
LR end of epoch: 0.00010430195934094535

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 61/100
  step 50/312 - loss 0.2791
  step 100/312 - loss 0.3080
  step 150/312 - loss 0.3051
  step 200/312 - loss 0.3119
  step 250/312 - loss 0.3117
  step 300/312 - loss 0.3140
  train loss 0.3116 | val loss 0.1675 | val acc 0.9454 | 345.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9454)
LR end of epoch: 9.985868092332892e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 62/100
  step 50/312 - loss 0.2584
  step 100/312 - loss 0.2874
  step 150/312 - loss 0.2955
  step 200/312 - loss 0.3020
  step 250/312 - loss 0.3090
  step 300/312 - loss 0.3073
  train loss 0.3091 | val loss 0.1651 | val acc 0.9454 | 346.1s
LR end of epoch: 9.546537937364064e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 63/100
  step 50/312 - loss 0.3186
  step 100/312 - loss 0.3227
  step 150/312 - loss 0.3126
  step 200/312 - loss 0.3338
  step 250/312 - loss 0.3309
  step 300/312 - loss 0.3247
  train loss 0.3293 | val loss 0.1779 | val acc 0.9452 | 345.6s
LR end of epoch: 9.11263903501003e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 64/100
  step 50/312 - loss 0.3432
  step 100/312 - loss 0.3264
  step 150/312 - loss 0.3136
  step 200/312 - loss 0.3160
  step 250/312 - loss 0.3156
  step 300/312 - loss 0.3141
  train loss 0.3143 | val loss 0.1579 | val acc 0.9470 | 345.7s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9470)
LR end of epoch: 8.684599591102167e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 65/100
  step 50/312 - loss 0.2691
  step 100/312 - loss 0.3118
  step 150/312 - loss 0.3015
  step 200/312 - loss 0.3007
  step 250/312 - loss 0.2975
  step 300/312 - loss 0.2968
  train loss 0.2989 | val loss 0.1727 | val acc 0.9446 | 345.6s
LR end of epoch: 8.262842028893775e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 66/100
  step 50/312 - loss 0.3495
  step 100/312 - loss 0.3441
  step 150/312 - loss 0.3330
  step 200/312 - loss 0.3178
  step 250/312 - loss 0.3184
  step 300/312 - loss 0.3124
  train loss 0.3095 | val loss 0.1570 | val acc 0.9442 | 345.5s
LR end of epoch: 7.84778257217936e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 67/100
  step 50/312 - loss 0.2986
  step 100/312 - loss 0.3086
  step 150/312 - loss 0.3109
  step 200/312 - loss 0.3121
  step 250/312 - loss 0.3076
  step 300/312 - loss 0.3080
  train loss 0.3119 | val loss 0.1665 | val acc 0.9484 | 345.0s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9484)
LR end of epoch: 7.439830834531953e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 68/100
  step 50/312 - loss 0.3663
  step 100/312 - loss 0.3257
  step 150/312 - loss 0.3194
  step 200/312 - loss 0.3085
  step 250/312 - loss 0.3120
  step 300/312 - loss 0.3153
  train loss 0.3144 | val loss 0.1637 | val acc 0.9500 | 346.3s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9500)
LR end of epoch: 7.039389415064002e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 69/100
  step 50/312 - loss 0.3380
  step 100/312 - loss 0.3121
  step 150/312 - loss 0.3195
  step 200/312 - loss 0.3182
  step 250/312 - loss 0.3133
  step 300/312 - loss 0.3139
  train loss 0.3149 | val loss 0.1600 | val acc 0.9472 | 346.0s
LR end of epoch: 6.646853501110649e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 70/100
  step 50/312 - loss 0.3114
  step 100/312 - loss 0.3189
  step 150/312 - loss 0.3213
  step 200/312 - loss 0.3238
  step 250/312 - loss 0.3277
  step 300/312 - loss 0.3279
  train loss 0.3224 | val loss 0.1489 | val acc 0.9496 | 345.9s
LR end of epoch: 6.262610478227527e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 71/100
  step 50/312 - loss 0.3095
  step 100/312 - loss 0.2875
  step 150/312 - loss 0.3044
  step 200/312 - loss 0.2983
  step 250/312 - loss 0.3025
  step 300/312 - loss 0.3010
  train loss 0.2992 | val loss 0.1550 | val acc 0.9502 | 345.0s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9502)
LR end of epoch: 5.8870395478880035e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 72/100
  step 50/312 - loss 0.2792
  step 100/312 - loss 0.3184
  step 150/312 - loss 0.3278
  step 200/312 - loss 0.3318
  step 250/312 - loss 0.3284
  step 300/312 - loss 0.3268
  train loss 0.3297 | val loss 0.1733 | val acc 0.9484 | 345.6s
LR end of epoch: 5.520511353257087e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 73/100
  step 50/312 - loss 0.3186
  step 100/312 - loss 0.3035
  step 150/312 - loss 0.3140
  step 200/312 - loss 0.3191
  step 250/312 - loss 0.3216
  step 300/312 - loss 0.3190
  train loss 0.3188 | val loss 0.1660 | val acc 0.9510 | 345.3s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9510)
LR end of epoch: 5.1633876134114055e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 74/100
  step 50/312 - loss 0.2983
  step 100/312 - loss 0.3163
  step 150/312 - loss 0.3108
  step 200/312 - loss 0.3166
  step 250/312 - loss 0.3186
  step 300/312 - loss 0.3176
  train loss 0.3184 | val loss 0.1590 | val acc 0.9484 | 346.5s
LR end of epoch: 4.816020766366103e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 75/100
  step 50/312 - loss 0.3122
  step 100/312 - loss 0.2904
  step 150/312 - loss 0.2958
  step 200/312 - loss 0.2950
  step 250/312 - loss 0.3025
  step 300/312 - loss 0.2970
  train loss 0.2955 | val loss 0.1466 | val acc 0.9510 | 345.7s
LR end of epoch: 4.478753621261114e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 76/100
  step 50/312 - loss 0.3129
  step 100/312 - loss 0.3099
  step 150/312 - loss 0.3122
  step 200/312 - loss 0.3117
  step 250/312 - loss 0.3001
  step 300/312 - loss 0.3078
  train loss 0.3066 | val loss 0.1529 | val acc 0.9512 | 345.9s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9512)
LR end of epoch: 4.1519190200498946e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 77/100
  step 50/312 - loss 0.3234
  step 100/312 - loss 0.2983
  step 150/312 - loss 0.2982
  step 200/312 - loss 0.3094
  step 250/312 - loss 0.3062
  step 300/312 - loss 0.3017
  train loss 0.3016 | val loss 0.1512 | val acc 0.9526 | 345.8s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9526)
LR end of epoch: 3.835839509024628e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 78/100
  step 50/312 - loss 0.3103
  step 100/312 - loss 0.3183
  step 150/312 - loss 0.3026
  step 200/312 - loss 0.2983
  step 250/312 - loss 0.2956
  step 300/312 - loss 0.3023
  train loss 0.3006 | val loss 0.1585 | val acc 0.9530 | 346.5s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9530)
LR end of epoch: 3.5308270205019514e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 79/100
  step 50/312 - loss 0.2952
  step 100/312 - loss 0.2910
  step 150/312 - loss 0.2878
  step 200/312 - loss 0.3038
  step 250/312 - loss 0.2992
  step 300/312 - loss 0.2966
  train loss 0.2959 | val loss 0.1526 | val acc 0.9524 | 345.6s
LR end of epoch: 3.237182564983431e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 80/100
  step 50/312 - loss 0.2692
  step 100/312 - loss 0.2974
  step 150/312 - loss 0.2888
  step 200/312 - loss 0.2919
  step 250/312 - loss 0.2947
  step 300/312 - loss 0.2956
  train loss 0.3001 | val loss 0.1561 | val acc 0.9538 | 346.2s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9538)
LR end of epoch: 2.955195934094537e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 81/100
  step 50/312 - loss 0.2700
  step 100/312 - loss 0.2580
  step 150/312 - loss 0.2784
  step 200/312 - loss 0.2823
  step 250/312 - loss 0.2847
  step 300/312 - loss 0.2827
  train loss 0.2848 | val loss 0.1537 | val acc 0.9530 | 345.5s
LR end of epoch: 2.685145414595302e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 82/100
  step 50/312 - loss 0.2816
  step 100/312 - loss 0.3081
  step 150/312 - loss 0.3117
  step 200/312 - loss 0.3076
  step 250/312 - loss 0.3101
  step 300/312 - loss 0.3003
  train loss 0.2964 | val loss 0.1500 | val acc 0.9536 | 346.1s
LR end of epoch: 2.4272975137448742e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 83/100
  step 50/312 - loss 0.2856
  step 100/312 - loss 0.2785
  step 150/312 - loss 0.2886
  step 200/312 - loss 0.2871
  step 250/312 - loss 0.2881
  step 300/312 - loss 0.2833
  train loss 0.2830 | val loss 0.1521 | val acc 0.9538 | 346.2s
LR end of epoch: 2.181906696291044e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 84/100
  step 50/312 - loss 0.3408
  step 100/312 - loss 0.3203
  step 150/312 - loss 0.3187
  step 200/312 - loss 0.3108
  step 250/312 - loss 0.3053
  step 300/312 - loss 0.3064
  train loss 0.3050 | val loss 0.1557 | val acc 0.9532 | 346.1s
LR end of epoch: 1.9492151333442392e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 85/100
  step 50/312 - loss 0.2787
  step 100/312 - loss 0.2796
  step 150/312 - loss 0.2762
  step 200/312 - loss 0.2801
  step 250/312 - loss 0.2769
  step 300/312 - loss 0.2830
  train loss 0.2796 | val loss 0.1517 | val acc 0.9534 | 346.3s
LR end of epoch: 1.7294524633839014e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 86/100
  step 50/312 - loss 0.2972
  step 100/312 - loss 0.2869
  step 150/312 - loss 0.2836
  step 200/312 - loss 0.2841
  step 250/312 - loss 0.2885
  step 300/312 - loss 0.2778
  train loss 0.2800 | val loss 0.1490 | val acc 0.9520 | 345.8s
LR end of epoch: 1.522835565633007e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 87/100
  step 50/312 - loss 0.3020
  step 100/312 - loss 0.2950
  step 150/312 - loss 0.2916
  step 200/312 - loss 0.2859
  step 250/312 - loss 0.2792
  step 300/312 - loss 0.2854
  train loss 0.2839 | val loss 0.1536 | val acc 0.9544 | 345.6s
  Saved best EMA checkpoint to /kaggle/working/best_ema.pt (acc=0.9544)
LR end of epoch: 1.3295683460244817e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 88/100
  step 50/312 - loss 0.3231
  step 100/312 - loss 0.3176
  step 150/312 - loss 0.2962
  step 200/312 - loss 0.3028
  step 250/312 - loss 0.3031
  step 300/312 - loss 0.2962
  train loss 0.2988 | val loss 0.1532 | val acc 0.9532 | 346.3s
LR end of epoch: 1.1498415359706406e-05

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 89/100
  step 50/312 - loss 0.3154
  step 100/312 - loss 0.3090
  step 150/312 - loss 0.3271
  step 200/312 - loss 0.3192
  step 250/312 - loss 0.3172
  step 300/312 - loss 0.3137
  train loss 0.3149 | val loss 0.1524 | val acc 0.9526 | 345.7s
LR end of epoch: 9.838325041343294e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 90/100
  step 50/312 - loss 0.3012
  step 100/312 - loss 0.2869
  step 150/312 - loss 0.2900
  step 200/312 - loss 0.2895
  step 250/312 - loss 0.2837
  step 300/312 - loss 0.2805
  train loss 0.2780 | val loss 0.1481 | val acc 0.9536 | 346.1s
LR end of epoch: 8.317050813874547e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 91/100
  step 50/312 - loss 0.2750
  step 100/312 - loss 0.2919
  step 150/312 - loss 0.2765
  step 200/312 - loss 0.2806
  step 250/312 - loss 0.2906
  step 300/312 - loss 0.2916
  train loss 0.2902 | val loss 0.1508 | val acc 0.9534 | 346.2s
LR end of epoch: 6.936093991297027e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 92/100
  step 50/312 - loss 0.2897
  step 100/312 - loss 0.3018
  step 150/312 - loss 0.3140
  step 200/312 - loss 0.3053
  step 250/312 - loss 0.3037
  step 300/312 - loss 0.3068
  train loss 0.3064 | val loss 0.1524 | val acc 0.9536 | 346.0s
LR end of epoch: 5.696817411269653e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 93/100
  step 50/312 - loss 0.3389
  step 100/312 - loss 0.2897
  step 150/312 - loss 0.2925
  step 200/312 - loss 0.2880
  step 250/312 - loss 0.2974
  step 300/312 - loss 0.2977
  train loss 0.3011 | val loss 0.1525 | val acc 0.9538 | 346.1s
LR end of epoch: 4.600444090157274e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 94/100
  step 50/312 - loss 0.3247
  step 100/312 - loss 0.3255
  step 150/312 - loss 0.3155
  step 200/312 - loss 0.3065
  step 250/312 - loss 0.3042
  step 300/312 - loss 0.3001
  train loss 0.2994 | val loss 0.1508 | val acc 0.9544 | 346.6s
LR end of epoch: 3.648056016061052e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 95/100
  step 50/312 - loss 0.3265
  step 100/312 - loss 0.3037
  step 150/312 - loss 0.2926
  step 200/312 - loss 0.2972
  step 250/312 - loss 0.2990
  step 300/312 - loss 0.2979
  train loss 0.3016 | val loss 0.1516 | val acc 0.9532 | 346.6s
LR end of epoch: 2.8405930810269197e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 96/100
  step 50/312 - loss 0.3180
  step 100/312 - loss 0.3181
  step 150/312 - loss 0.3087
  step 200/312 - loss 0.3046
  step 250/312 - loss 0.3070
  step 300/312 - loss 0.3084
  train loss 0.3088 | val loss 0.1530 | val acc 0.9528 | 346.0s
LR end of epoch: 2.178852153485574e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 97/100
  step 50/312 - loss 0.2861
  step 100/312 - loss 0.2909
  step 150/312 - loss 0.2829
  step 200/312 - loss 0.2819
  step 250/312 - loss 0.2889
  step 300/312 - loss 0.2852
  train loss 0.2822 | val loss 0.1507 | val acc 0.9538 | 346.4s
LR end of epoch: 1.6634862918395403e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 98/100
  step 50/312 - loss 0.2318
  step 100/312 - loss 0.2430
  step 150/312 - loss 0.2571
  step 200/312 - loss 0.2727
  step 250/312 - loss 0.2777
  step 300/312 - loss 0.2788
  train loss 0.2817 | val loss 0.1497 | val acc 0.9540 | 346.2s
LR end of epoch: 1.2950040999734018e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 99/100
  step 50/312 - loss 0.2710
  step 100/312 - loss 0.2926
  step 150/312 - loss 0.2816
  step 200/312 - loss 0.2707
  step 250/312 - loss 0.2755
  step 300/312 - loss 0.2777
  train loss 0.2798 | val loss 0.1495 | val acc 0.9540 | 345.9s
LR end of epoch: 1.0737692253231258e-06

{'model': 'convnext_small', 'lr': 0.0003, 'batch': 64, 'drop_path': 0.05, 'label_smoothing': 0.05, 'mixup_alpha': 0.1, 'cutmix_alpha': 0.2}
Epoch 100/100
  step 50/312 - loss 0.3048
  step 100/312 - loss 0.3020
  step 150/312 - loss 0.3032
  step 200/312 - loss 0.3006
  step 250/312 - loss 0.2975
  step 300/312 - loss 0.2954
  train loss 0.2955 | val loss 0.1505 | val acc 0.9540 | 346.4s
LR end of epoch: 1e-06

Best val acc: 0.9544

In [50]:
# =========================
# Cell 10 - Load best checkpoint (optional)
# =========================
ckpt = torch.load(cfg.save_path, map_location="cpu")
print("Best acc in file:", ckpt["best_acc"])
print("Model:", ckpt["model_name"])

best_model = timm.create_model(
    ckpt["model_name"],
    pretrained=False,
    num_classes=cfg.num_classes
)
best_model.load_state_dict(ckpt["model_state"])
best_model = best_model.to(cfg.device)

val_loss, val_acc = evaluate(best_model)
print("Recomputed val acc:", val_acc)


Best acc in file: 0.9544
Model: convnext_small
Recomputed val acc: 0.9544


Best acc in file: 0.9544
Model: convnext_small
Recomputed val acc: 0.9544

In [51]:
# =========================
# Cell 11 - Quick knobs to try
# =========================
# 1) If you want to test ConvNeXt-Base from scratch:
# cfg.model_name = "convnext_base"
# cfg.batch_size = 32  # maybe 24 if OOM
#
# 2) If val stalls:
# cfg.mixup_alpha = 0.1
# cfg.cutmix_alpha = 0.2
# cfg.drop_path_rate = 0.05
# cfg.lr = 2e-4
#
# Then re-run Cells 6-9.
