In [1]:
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
from torch.optim import SGD
from torchvision.transforms import InterpolationMode
from tqdm import tqdm
import numpy as np
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import Dataset
from timm.data import Mixup
from torch.optim.lr_scheduler import OneCycleLR
from timm.data.auto_augment import rand_augment_transform
from torch.optim.lr_scheduler import CosineAnnealingLR


In [2]:
torch.manual_seed(0)
np.random.seed(0)

In [3]:
pip install wandb


Note: you may need to restart the kernel to use updated packages.


In [4]:
# wandb login


In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None):
        self.labels_df = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform
        self.classes = sorted(self.labels_df.iloc[:, 1].unique())
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_id = self.labels_df.iloc[idx, 0] + ".jpg"
        label_str = self.labels_df.iloc[idx, 1]
        img_path = os.path.join(self.image_dir, img_id)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = self.class_to_idx[label_str]
        return image, label


# # 2. Transforms
# transform_train = transforms.Compose([
#     transforms.Resize((224, 224), interpolation=InterpolationMode.LANCZOS),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(15),
#     transforms.RandAugment(),
#     transforms.ToTensor(),
#     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
# ])

transform_train = transforms.Compose([
    transforms.Resize((256, 256), interpolation=InterpolationMode.LANCZOS),  # Больший размер для RandomResizedCrop
    # transforms.RandomResizedCrop(256, scale=(0.7, 1.0), ratio=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(),
    # transforms.RandomVerticalFlip(p=0.3),
    transforms.RandomRotation(30),
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    # transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 0.5)),
    # rand_augment_transform(config_str='rand-m9-mstd0.5', hparams={}),  # Timm's RandAugment
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3)),
])

transform_test = transforms.Compose([
    transforms.Resize((256, 256), interpolation=InterpolationMode.LANCZOS),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])


In [4]:
# 3. Dataset & Split
csv_path = '/kaggle/input/dog-breed-identification/labels.csv'
image_dir = '/kaggle/input/dog-breed-identification/train'
dataset_full = CustomImageDataset(csv_file=csv_path, image_dir=image_dir, transform=transform_train)
df = pd.read_csv(csv_path)

train_idx, val_idx = train_test_split(
    np.arange(len(dataset_full)),
    test_size=0.2,
    stratify=df['breed'],
    random_state=42
)

train_dataset = Subset(dataset_full, train_idx)
val_dataset = Subset(dataset_full, val_idx)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = timm.create_model('convnext_base', pretrained=True, num_classes=120).to(device)


# 5. Loss & optimizer
labels = df.iloc[train_idx, 1].map(dataset_full.class_to_idx).values
class_weights = compute_class_weight('balanced', classes=np.arange(120), y=labels)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

print(device)

model.safetensors:   0%|          | 0.00/354M [00:00<?, ?B/s]

cuda


In [6]:
loss_fn = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=0.1)
# optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=5e-3)
optimizer = SGD(model.parameters(), lr=1e-3 , momentum=0.9)
# scheduler = OneCycleLR(optimizer, max_lr=1e-4, steps_per_epoch=len(train_loader), epochs=25)

scheduler = CosineAnnealingLR(optimizer, T_max=25*len(train_loader), eta_min=1e-6)


In [7]:
# 6. Training functions
def train_epoch(model, loader):
    model.train()
    loss_log, acc_log = [], []
    for data, target in tqdm(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        logits = model(data)
        loss = loss_fn(logits, target)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        acc = (logits.argmax(dim=1) == target).float().mean().item()
        loss_log.append(loss.item())
        acc_log.append(acc)
    return np.mean(loss_log), np.mean(acc_log)


def validate(model, loader):
    model.eval()
    loss_log, acc_log = [], []
    with torch.no_grad():
        for data, target in tqdm(loader):
            data, target = data.to(device), target.to(device)
            logits = model(data)
            loss = loss_fn(logits, target)
            acc = (logits.argmax(dim=1) == target).float().mean().item()
            loss_log.append(loss.item())
            acc_log.append(acc)
    return np.mean(loss_log), np.mean(acc_log)



def train(model, train_loader, val_loader, epochs=25, patience=5):
    best_val_acc = 0
    wait = 0
    best_model_state = None
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        train_loss, train_acc = train_epoch(model, train_loader)
        val_loss, val_acc = validate(model, val_loader)

        print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f}")
        print(f"Val   Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict()
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping.")
                break

    if best_model_state:
        model.load_state_dict(best_model_state)

    return model


In [8]:
model = train(model, train_loader, val_loader)
torch.save(model.state_dict(), "best_model.pth")


Epoch 1/25


100%|██████████| 256/256 [08:21<00:00,  1.96s/it]
100%|██████████| 64/64 [00:36<00:00,  1.74it/s]


Train Loss: 2.5973, Acc: 0.5968
Val   Loss: 1.3742, Acc: 0.8527

Epoch 2/25


100%|██████████| 256/256 [08:52<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.2998, Acc: 0.8527
Val   Loss: 1.2631, Acc: 0.8631

Epoch 3/25


100%|██████████| 256/256 [08:52<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.2166, Acc: 0.8744
Val   Loss: 1.2694, Acc: 0.8514

Epoch 4/25


100%|██████████| 256/256 [08:50<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.1810, Acc: 0.8867
Val   Loss: 1.2236, Acc: 0.8642

Epoch 5/25


100%|██████████| 256/256 [08:51<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.1545, Acc: 0.8977
Val   Loss: 1.2125, Acc: 0.8777

Epoch 6/25


100%|██████████| 256/256 [08:51<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.1204, Acc: 0.9100
Val   Loss: 1.2032, Acc: 0.8748

Epoch 7/25


100%|██████████| 256/256 [08:51<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.1004, Acc: 0.9162
Val   Loss: 1.2036, Acc: 0.8723

Epoch 8/25


100%|██████████| 256/256 [08:49<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0805, Acc: 0.9261
Val   Loss: 1.2022, Acc: 0.8719

Epoch 9/25


100%|██████████| 256/256 [08:50<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0685, Acc: 0.9258
Val   Loss: 1.1802, Acc: 0.8865

Epoch 10/25


100%|██████████| 256/256 [08:51<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0462, Acc: 0.9369
Val   Loss: 1.1814, Acc: 0.8763

Epoch 11/25


100%|██████████| 256/256 [08:48<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0396, Acc: 0.9385
Val   Loss: 1.1810, Acc: 0.8772

Epoch 12/25


100%|██████████| 256/256 [08:51<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0351, Acc: 0.9380
Val   Loss: 1.1713, Acc: 0.8895

Epoch 13/25


100%|██████████| 256/256 [08:50<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0172, Acc: 0.9464
Val   Loss: 1.1593, Acc: 0.8827

Epoch 14/25


100%|██████████| 256/256 [08:50<00:00,  2.07s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0097, Acc: 0.9510
Val   Loss: 1.1470, Acc: 0.8958

Epoch 15/25


100%|██████████| 256/256 [08:52<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 1.0030, Acc: 0.9502
Val   Loss: 1.1681, Acc: 0.8866

Epoch 16/25


100%|██████████| 256/256 [08:51<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 0.9962, Acc: 0.9556
Val   Loss: 1.1617, Acc: 0.8889

Epoch 17/25


100%|██████████| 256/256 [08:51<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 0.9920, Acc: 0.9580
Val   Loss: 1.1618, Acc: 0.8948

Epoch 18/25


100%|██████████| 256/256 [08:52<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 0.9901, Acc: 0.9600
Val   Loss: 1.1843, Acc: 0.8791

Epoch 19/25


100%|██████████| 256/256 [08:52<00:00,  2.08s/it]
100%|██████████| 64/64 [00:36<00:00,  1.75it/s]


Train Loss: 0.9843, Acc: 0.9607
Val   Loss: 1.1614, Acc: 0.8886
Early stopping.


In [10]:
val_loss, val_acc = validate(model, val_loader)
val_loss, val_acc 

100%|██████████| 64/64 [00:30<00:00,  2.10it/s]


(1.1720886696130037, 0.8893958777189255)

In [11]:
import glob

class CustomTestDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        # Получаем список всех файлов изображений
        self.image_paths = sorted(glob.glob(os.path.join(image_dir, "*")))
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, os.path.basename(img_path)  # вернём и имя файла


test_dataset = CustomTestDataset(image_dir='/kaggle/input/dog-breed-identification/test', transform=transform_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)




In [12]:
model.state_dict()
model.eval()
all_predictions = []
all_filenames = []

with torch.no_grad():
    for images, filenames in tqdm(test_loader):
        images = images.to(device)

        outputs = model(images)  # (batch_size, n_classes)
        probs = F.softmax(outputs, dim=1).cpu().numpy()  # Преобразуем в вероятности

        all_predictions.extend(probs)
        all_filenames.extend([os.path.splitext(f)[0] for f in filenames])  # убираем .jpg


100%|██████████| 162/162 [05:35<00:00,  2.07s/it]


NameError: name 'train_dataset_full' is not defined

In [13]:

# Получаем список классов в том же порядке, как в train
class_names = dataset_full.classes  # отсортированный список классов

# Создаём DataFrame
submission_df = pd.DataFrame(all_predictions, columns=class_names)
submission_df.insert(0, "id", all_filenames)

In [15]:
submission_df.to_csv("submission1.csv", index=False)

# Получаем список классов в том же порядке, как в train
class_names = dataset_full.classes  # отсортированный список классов



In [None]:
transform_train = transforms.Compose([
    transforms.Resize((256, 256), interpolation=InterpolationMode.LANCZOS),  # Больший размер для RandomResizedCrop
    # transforms.RandomResizedCrop(256, scale=(0.7, 1.0), ratio=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(),
    # transforms.RandomVerticalFlip(p=0.3),
    transforms.RandomRotation(30),
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    # transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 0.5)),
    # rand_augment_transform(config_str='rand-m9-mstd0.5', hparams={}),  # Timm's RandAugment
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3)),
])

transform_test = transforms.Compose([
    transforms.Resize((256, 256), interpolation=InterpolationMode.LANCZOS),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
