In [3]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124  # adjust CUDA version
!pip install pillow fastapi uvicorn[standard]

Looking in indexes: https://download.pytorch.org/whl/cu124
Collecting fastapi
  Downloading fastapi-0.121.3-py3-none-any.whl.metadata (30 kB)
Collecting uvicorn[standard]
  Downloading uvicorn-0.38.0-py3-none-any.whl.metadata (6.8 kB)
Collecting starlette<0.51.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.50.0-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 (from fastapi)
  Downloading pydantic-2.12.4-py3-none-any.whl.metadata (89 kB)
Collecting annotated-doc>=0.0.2 (from fastapi)
  Downloading annotated_doc-0.0.4-py3-none-any.whl.metadata (6.6 kB)
Collecting annotated-types>=0.6.0 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.41.5 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi)
  Downloading pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64

In [1]:
import os
import shutil
import random
from pathlib import Path

random.seed(42)

DATA_ROOT = Path("/workspace/dog/Mini_Dog_Breed_Data")   # original folder with 5 subfolders
OUT_ROOT = Path("data_split")           # will be created: train/val/test/...

SPLIT_RATIOS = {
    "train": 0.7,
    "val": 0.15,
    "test": 0.15,
}

def main():
    assert abs(sum(SPLIT_RATIOS.values()) - 1.0) < 1e-6

    classes = [d for d in DATA_ROOT.iterdir() if d.is_dir()]
    print("Classes:", [c.name for c in classes])

    for split in SPLIT_RATIOS.keys():
        for cls in classes:
            (OUT_ROOT / split / cls.name).mkdir(parents=True, exist_ok=True)

    for cls in classes:
        images = [p for p in cls.iterdir() if p.is_file()]
        random.shuffle(images)

        n = len(images)
        n_train = int(n * SPLIT_RATIOS["train"])
        n_val   = int(n * SPLIT_RATIOS["val"])
        # rest go to test
        n_test  = n - n_train - n_val

        split_map = {
            "train": images[:n_train],
            "val":   images[n_train:n_train + n_val],
            "test":  images[n_train + n_val:],
        }

        for split, imgs in split_map.items():
            for img in imgs:
                dst = OUT_ROOT / split / cls.name / img.name
                shutil.copy2(img, dst)

        print(cls.name, "->", n_train, "train,", n_val, "val,", n_test, "test")

if __name__ == "__main__":
    main()

Classes: ['siberian_husky', 'silky_terrier', 'scottish_deerhound', 'saint_bernard', 'yorkshire_terrier']
siberian_husky -> 66 train, 14 val, 15 test
silky_terrier -> 62 train, 13 val, 15 test
scottish_deerhound -> 88 train, 18 val, 20 test
saint_bernard -> 58 train, 12 val, 14 test
yorkshire_terrier -> 57 train, 12 val, 13 test


In [4]:
import os
from pathlib import Path
import time
import copy

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

DATA_ROOT = Path("data_split")
BATCH_SIZE = 16
NUM_EPOCHS = 20
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "dog_breed_resnet50.pth"

def get_dataloaders():
    # Standard ImageNet transforms + augmentations for train
    data_transforms = {
        "train": transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.RandomRotation(15),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ]),
        "val": transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ]),
        "test": transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ]),
    }

    image_datasets = {
        split: datasets.ImageFolder(DATA_ROOT / split, transform=data_transforms[split])
        for split in ["train", "val", "test"]
    }

    dataloaders = {
        split: DataLoader(image_datasets[split],
                          batch_size=BATCH_SIZE,
                          shuffle=(split == "train"),
                          num_workers=4)
        for split in ["train", "val", "test"]
    }

    dataset_sizes = {split: len(image_datasets[split]) for split in ["train", "val", "test"]}
    class_names = image_datasets["train"].classes

    return dataloaders, dataset_sizes, class_names

def build_model(num_classes):
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
    # Freeze all layers first
    for param in model.parameters():
        param.requires_grad = False

    # Replace final layer
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 256),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(256, num_classes)
    )

    # Optionally unfreeze last few layers of backbone
    for name, param in model.named_parameters():
        if "layer4" in name or "fc" in name:
            param.requires_grad = True

    return model

def train_model(model, dataloaders, dataset_sizes, num_epochs):
    criterion = nn.CrossEntropyLoss()

    # Only train parameters that require grad
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=LR
    )
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.1, patience=3
    )

    model = model.to(DEVICE)
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        print("-" * 20)

        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double().item() / dataset_sizes[phase]

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == "val":
                scheduler.step(epoch_acc)
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    torch.save({
                        "model_state_dict": best_model_wts,
                        "class_names": dataloaders["train"].dataset.classes
                    }, MODEL_PATH)
                    print(f"** New best model saved (val acc = {best_acc:.4f}) **")

        print()

    print(f"Training complete. Best val acc: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model

def evaluate(model, dataloader, dataset_size):
    model.eval()
    model.to(DEVICE)
    running_corrects = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)

    acc = running_corrects.double().item() / dataset_size
    print(f"Test accuracy: {acc:.4f}")

if __name__ == "__main__":
    dataloaders, dataset_sizes, class_names = get_dataloaders()
    model = build_model(num_classes=len(class_names))
    model = train_model(model, dataloaders, dataset_sizes, NUM_EPOCHS)
    evaluate(model, dataloaders["test"], dataset_sizes["test"])

2.0%

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100.0%


Epoch 1/20
--------------------
train Loss: 1.4944 Acc: 0.4743
val Loss: 1.1694 Acc: 0.7536
** New best model saved (val acc = 0.7536) **

Epoch 2/20
--------------------
train Loss: 1.0580 Acc: 0.7100
val Loss: 0.4916 Acc: 0.8841
** New best model saved (val acc = 0.8841) **

Epoch 3/20
--------------------
train Loss: 0.6512 Acc: 0.8036
val Loss: 0.2058 Acc: 0.9275
** New best model saved (val acc = 0.9275) **

Epoch 4/20
--------------------
train Loss: 0.4141 Acc: 0.8731
val Loss: 0.1671 Acc: 0.9420
** New best model saved (val acc = 0.9420) **

Epoch 5/20
--------------------
train Loss: 0.4058 Acc: 0.8580
val Loss: 0.1843 Acc: 0.9130

Epoch 6/20
--------------------
train Loss: 0.4467 Acc: 0.8097
val Loss: 0.1391 Acc: 0.9420

Epoch 7/20
--------------------
train Loss: 0.3237 Acc: 0.8852
val Loss: 0.1333 Acc: 0.9710
** New best model saved (val acc = 0.9710) **

Epoch 8/20
--------------------
train Loss: 0.3433 Acc: 0.8792
val Loss: 0.1897 Acc: 0.8986

Epoch 9/20
---------------