In [1]:
# ==============================
# 0) Setup and Global Config
# ==============================
import os, random, numpy as np, torch

# Reproducibility
SEED = 42
random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Relative paths (based on current working directory)
PROJECT_DIR = os.getcwd()
DATA_DIR = os.path.join(PROJECT_DIR, "datasets")
TRAIN_DIR = os.path.join(DATA_DIR, "train")
VAL_DIR   = os.path.join(DATA_DIR, "val")
TEST_DIR  = os.path.join(DATA_DIR, "test")
MODEL_DIR = os.path.join(PROJECT_DIR, "models")
WEIGHT_PTH = os.path.join(MODEL_DIR, "efficientnet_b3_imagenet_pretrained.pth")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on:", DEVICE)


Running on: cuda


In [2]:
# ==============================
# 1) Data Preprocessing & Augmentation
# ==============================
from torchvision import transforms
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
import timm

# Get model default input size and normalization
tmp_model = timm.create_model('efficientnet_b3', pretrained=False)
cfg = resolve_data_config({}, model=tmp_model)
img_size = cfg['input_size'][-1]

# Data augmentations for training
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(img_size, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.05),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])

# Validation & test transforms (no heavy augmentation)
eval_transform = transforms.Compose([
    transforms.Resize(int(img_size * 1.15)),
    transforms.CenterCrop(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])


In [4]:
# ==============================
# 2) Dataset and DataLoader
# ==============================
from torchvision import datasets
from torch.utils.data import DataLoader
from collections import Counter
from torchvision.datasets.folder import default_loader
from torch.utils.data import Dataset

BATCH_SIZE = 32
NUM_WORKERS = 4

# train / val 有类别子文件夹，可以直接用 ImageFolder
train_set = datasets.ImageFolder(TRAIN_DIR, transform=train_transform)
val_set   = datasets.ImageFolder(VAL_DIR, transform=eval_transform)

# test 只有图像文件，没有标签 → 用自定义 Dataset
class TestImageFolder(Dataset):
    """Custom dataset for unlabeled test images (flat directory)."""
    def __init__(self, root, transform):
        self.root = root
        img_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
        self.paths = [os.path.join(root, f) for f in sorted(os.listdir(root)) if f.lower().endswith(img_exts)]
        self.transform = transform
    def __len__(self): return len(self.paths)
    def __getitem__(self, idx):
        path = self.paths[idx]
        img = default_loader(path)
        if self.transform: img = self.transform(img)
        return img, os.path.basename(path)

test_set = TestImageFolder(TEST_DIR, eval_transform)

# DataLoaders
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_set,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_loader  = DataLoader(test_set,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

# Dataset statistics (for reporting)
cls_names = train_set.classes
train_stats = Counter([y for _, y in train_set.samples])
val_stats   = Counter([y for _, y in val_set.samples])

print("Classes:", cls_names)
print("Train per-class:", {cls_names[k]: v for k, v in train_stats.items()}, " | Total:", len(train_set))
print("Val per-class:",   {cls_names[k]: v for k, v in val_stats.items()},   " | Total:", len(val_set))
print("Test total:", len(test_set))


Classes: ['cat', 'dog']
Train per-class: {'cat': 10000, 'dog': 10000}  | Total: 20000
Val per-class: {'cat': 2500, 'dog': 2500}  | Total: 5000
Test total: 500


In [5]:
# ==============================
# 3) Build Model and Load Pretrained Weights
# ==============================
import torch.nn as nn
from timm import create_model

num_classes = 2
model = create_model('efficientnet_b3', pretrained=False, num_classes=num_classes)

# Load pretrained ImageNet weights
state_dict = torch.load(WEIGHT_PTH, map_location='cpu')
missing, unexpected = model.load_state_dict(state_dict, strict=False)
print(f"Loaded pretrained weights. Missing keys: {len(missing)}, Unexpected: {len(unexpected)}")

model = model.to(DEVICE)

# Define loss, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)


  state_dict = torch.load(WEIGHT_PTH, map_location='cpu')


Loaded pretrained weights. Missing keys: 0, Unexpected: 0


In [6]:
# ==============================
# 4) Training and Validation Loop
# ==============================
from tqdm import tqdm

def evaluate(model, loader):
    model.eval()
    correct, total, loss_sum = 0, 0, 0.0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
                logits = model(x)
                loss = criterion(logits, y)
            loss_sum += loss.item() * x.size(0)
            correct += (logits.argmax(1) == y).sum().item()
            total += x.size(0)
    return loss_sum / total, correct / total

EPOCHS = 10
best_acc = 0.0
os.makedirs(MODEL_DIR, exist_ok=True)
best_path = os.path.join(MODEL_DIR, "efficientnet_b3_finetuned_best.pt")

scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=='cuda'))

for epoch in range(1, EPOCHS + 1):
    model.train()
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}")
    for x, y in pbar:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
            logits = model(x)
            loss = criterion(logits, y)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        pbar.set_postfix(loss=float(loss.item()))
    scheduler.step()

    val_loss, val_acc = evaluate(model, val_loader)
    print(f"[Val] loss={val_loss:.4f} acc={val_acc*100:.2f}%")
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), best_path)
        print("==> Saved new best:", best_path)

print("Best Val Acc:", best_acc)


  scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=='cuda'))
  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):
Epoch 1/10: 100%|██████████| 625/625 [02:46<00:00,  3.75it/s, loss=0.0505]  
  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


[Val] loss=0.0445 acc=98.38%
==> Saved new best: d:\A\IE4483_project\models\efficientnet_b3_finetuned_best.pt


Epoch 2/10: 100%|██████████| 625/625 [02:43<00:00,  3.83it/s, loss=0.0933]  


[Val] loss=0.0490 acc=97.94%


Epoch 3/10: 100%|██████████| 625/625 [02:43<00:00,  3.82it/s, loss=0.00166] 


[Val] loss=0.0528 acc=98.12%


Epoch 4/10: 100%|██████████| 625/625 [02:43<00:00,  3.83it/s, loss=0.00661] 


[Val] loss=0.0367 acc=98.62%
==> Saved new best: d:\A\IE4483_project\models\efficientnet_b3_finetuned_best.pt


Epoch 5/10: 100%|██████████| 625/625 [02:42<00:00,  3.84it/s, loss=0.0423]  


[Val] loss=0.0361 acc=98.62%


Epoch 6/10: 100%|██████████| 625/625 [02:43<00:00,  3.83it/s, loss=0.00203] 


[Val] loss=0.0520 acc=98.18%


Epoch 7/10: 100%|██████████| 625/625 [02:43<00:00,  3.82it/s, loss=2.06e-5] 


[Val] loss=0.0567 acc=98.60%


Epoch 8/10: 100%|██████████| 625/625 [02:42<00:00,  3.84it/s, loss=0.00272] 


[Val] loss=0.0449 acc=98.96%
==> Saved new best: d:\A\IE4483_project\models\efficientnet_b3_finetuned_best.pt


Epoch 9/10: 100%|██████████| 625/625 [02:42<00:00,  3.84it/s, loss=1.2e-5]  


[Val] loss=0.0419 acc=99.04%
==> Saved new best: d:\A\IE4483_project\models\efficientnet_b3_finetuned_best.pt


Epoch 10/10: 100%|██████████| 625/625 [02:42<00:00,  3.85it/s, loss=4.68e-6] 


[Val] loss=0.0435 acc=99.10%
==> Saved new best: d:\A\IE4483_project\models\efficientnet_b3_finetuned_best.pt
Best Val Acc: 0.991


In [7]:
# ==============================
# 5) Validation Result (Final Accuracy)
# ==============================
best_model = create_model('efficientnet_b3', pretrained=False, num_classes=num_classes).to(DEVICE)
best_model.load_state_dict(torch.load(best_path, map_location=DEVICE))
val_loss, val_acc = evaluate(best_model, val_loader)
print(f"Final Val Acc = {val_acc*100:.2f}%")


  best_model.load_state_dict(torch.load(best_path, map_location=DEVICE))
  with torch.cuda.amp.autocast(enabled=(DEVICE=='cuda')):


Final Val Acc = 99.10%


In [9]:
# ==============================
# 6) Inference on Test Set and Generate submission.csv
# ==============================
import pandas as pd
from torchvision.datasets.folder import default_loader
from torch.utils.data import Dataset

class TestImageFolder(Dataset):
    """Custom dataset to keep original filenames for test set."""
    def __init__(self, root, transform):
        self.root = root
        img_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
        self.paths = [os.path.join(root, f) for f in sorted(os.listdir(root)) if f.lower().endswith(img_exts)]
        self.transform = transform
    def __len__(self): return len(self.paths)
    def __getitem__(self, idx):
        path = self.paths[idx]
        img = default_loader(path)
        if self.transform: img = self.transform(img)
        return img, os.path.basename(path)

test_ds2 = TestImageFolder(TEST_DIR, eval_transform)
test_loader2 = DataLoader(test_ds2, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True)

best_model.eval()
pred_rows = []
with torch.no_grad():
    for x, names in tqdm(test_loader2, desc="Inference (Test)"):
        x = x.to(DEVICE)
        logits = best_model(x)
        pred = logits.argmax(dim=1).cpu().numpy().tolist()
        for n, p in zip(names, pred):
            pred_rows.append((n, cls_names[p]))

sub_df = pd.DataFrame(pred_rows, columns=["filename", "label"])
save_csv = os.path.join(PROJECT_DIR, "submission.csv")
sub_df.to_csv(save_csv, index=False)
print("Saved submission file:", save_csv)


Inference (Test): 100%|██████████| 16/16 [00:07<00:00,  2.27it/s]

Saved submission file: d:\A\IE4483_project\submission.csv





In [15]:
# ==============================
# 7) Visualization of Correct/Incorrect Samples
# ==============================
import matplotlib.pyplot as plt
import torchvision.utils as vutils
import random

def collect_preds(model, loader):
    model.eval()
    xs, ys, ps = [], [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            logits = model(x)
            pred = logits.argmax(1)
            xs.append(x.cpu()); ys.append(y.cpu()); ps.append(pred.cpu())
    X = torch.cat(xs); Y = torch.cat(ys); P = torch.cat(ps)
    return X, Y, P

X, Y, P = collect_preds(best_model, val_loader)
correct_idx = (Y == P).nonzero(as_tuple=False).squeeze(1).tolist()
wrong_idx   = (Y != P).nonzero(as_tuple=False).squeeze(1).tolist()

def save_grid(indices, title, path):
    if not indices:
        print(f"No samples for {title}")
        return
    idx = random.sample(indices, min(16, len(indices)))
    grid = vutils.make_grid(X[idx], nrow=4, padding=2, normalize=True)
    plt.figure(figsize=(8,8))
    plt.axis('off')
    plt.title(title)
    plt.imshow(grid.permute(1,2,0))
    plt.savefig(path, bbox_inches='tight')
    plt.close()
    print("Saved:", path)

os.makedirs(os.path.join(PROJECT_DIR, "analysis"), exist_ok=True)
save_grid(correct_idx, "Correctly Classified (Val)", os.path.join(PROJECT_DIR, "analysis", "correct_val.png"))
save_grid(wrong_idx,   "Incorrectly Classified (Val)", os.path.join(PROJECT_DIR, "analysis", "wrong_val.png"))


Saved: d:\A\IE4483_project\analysis\correct_val.png
Saved: d:\A\IE4483_project\analysis\wrong_val.png
