In [1]:
# Cell 1 - install & imports
!pip install timm -q

import os
from pathlib import Path
import random
import time
from glob import glob

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import timm

from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m98.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━



In [2]:
ROOT = Path("/kaggle/input/dataset/anomaly_detection_test_data")
GOOD = ROOT / "good"
BAD  = ROOT / "bad"

good_imgs = sorted(GOOD.glob("*"))
bad_imgs  = sorted(BAD.glob("*"))

print("Good:", len(good_imgs))
print("Bad :", len(bad_imgs))


Good: 4157
Bad : 1023


In [3]:
from sklearn.model_selection import train_test_split

test_frac = 0.10
val_frac  = 0.10

g_train, g_test = train_test_split(good_imgs, test_size=test_frac, random_state=42)
g_train, g_val  = train_test_split(g_train, test_size=val_frac/(1-test_frac), random_state=42)

b_train, b_test = train_test_split(bad_imgs, test_size=test_frac, random_state=42)
b_train, b_val  = train_test_split(b_train, test_size=val_frac/(1-test_frac), random_state=42)

train_files = list(g_train) + list(b_train)
train_labels = [0]*len(g_train) + [1]*len(b_train)

val_files = list(g_val) + list(b_val)
val_labels = [0]*len(g_val) + [1]*len(b_val)

test_files = list(g_test) + list(b_test)
test_labels = [0]*len(g_test) + [1]*len(b_test)

print("Train:", len(train_files), "Val:", len(val_files), "Test:", len(test_files))


Train: 4142 Val: 519 Test: 519


In [4]:
IMAGE_SIZE = 224

IMAGE_SIZE = 224

train_transform = T.Compose([

    T.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0), ratio=(0.9, 1.1)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.05),  # small chance (rare but helps)
    T.RandomRotation(degrees=5),
    T.RandomPerspective(distortion_scale=0.06, p=0.25),
    

    T.ColorJitter(
        brightness=0.25,
        contrast=0.25,
        saturation=0.20,
        hue=0.05
    ),
    
   
    T.RandomAdjustSharpness(sharpness_factor=2, p=0.3),
    T.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 2.0)),
    

  
    T.RandomAffine(
        degrees=0,
        translate=(0.03, 0.03),   
        scale=(0.95, 1.05)
    ),
    
    
    T.ToTensor(),
    
  
    T.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
    

    T.RandomErasing(
        p=0.25,
        scale=(0.01, 0.05),  
        ratio=(0.3, 3.3),
        value='random'
    ),
])


val_transform = T.Compose([
    T.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    T.ToTensor(),
    T.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    ),
])



In [5]:
class GoodBadDataset(Dataset):
    def __init__(self, imgs, labels, transform):
        self.imgs = imgs
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        p = self.imgs[idx]
        img = Image.open(p).convert("RGB")
        img = self.transform(img)
        return img, int(self.labels[idx])


In [6]:
train_ds = GoodBadDataset(train_files, train_labels, train_transform)
val_ds   = GoodBadDataset(val_files, val_labels, val_transform)
test_ds  = GoodBadDataset(test_files, test_labels, val_transform)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=16, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False, num_workers=2)

print("Loaders ready.")


Loaders ready.


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

MODEL_NAME = "vit_base_patch16_224"  
model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=2)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)


Using device: cuda


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [8]:
def train_one_epoch():
    model.train()
    total, correct = 0, 0
    for imgs, labels in train_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)

        logits = model(imgs)
        loss = criterion(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total += labels.size(0)
        correct += (logits.argmax(1) == labels).sum().item()

    return loss.item(), correct/total


@torch.no_grad()
def eval_epoch(loader):
    model.eval()
    total, correct = 0, 0
    preds, gts = [], []

    for imgs, labels in loader:
        imgs = imgs.to(device)
        labels = labels.to(device)

        logits = model(imgs)
        total += labels.size(0)
        correct += (logits.argmax(1) == labels).sum().item()

        preds += logits.argmax(1).cpu().tolist()
        gts += labels.cpu().tolist()

    acc = correct / total
    prec = precision_score(gts, preds, zero_division=0)
    rec = recall_score(gts, preds, zero_division=0)

    return acc, prec, rec


In [9]:
best_val_acc = 0
EPOCHS = 8

for ep in range(1, EPOCHS+1):
    train_loss, train_acc = train_one_epoch()
    val_acc, val_prec, val_rec = eval_epoch(val_loader)

    print(f"Epoch {ep}/{EPOCHS} | Train Loss={train_loss:.4f} | Train Acc={train_acc:.4f} | Val Acc={val_acc:.4f} | Val Prec={val_prec:.4f} | Val Rec={val_rec:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_vit_classifier.pth")
        print("Saved best model")


Epoch 1/8 | Train Loss=0.3620 | Train Acc=0.9116 | Val Acc=0.9865 | Val Prec=0.9444 | Val Rec=0.9903
Saved best model
Epoch 2/8 | Train Loss=0.3017 | Train Acc=0.9469 | Val Acc=0.9480 | Val Prec=0.8016 | Val Rec=0.9806
Epoch 3/8 | Train Loss=0.0521 | Train Acc=0.9442 | Val Acc=0.9403 | Val Prec=0.7727 | Val Rec=0.9903
Epoch 4/8 | Train Loss=0.0812 | Train Acc=0.9623 | Val Acc=0.9403 | Val Prec=0.7857 | Val Rec=0.9612
Epoch 5/8 | Train Loss=0.4300 | Train Acc=0.9672 | Val Acc=0.8671 | Val Prec=0.6000 | Val Rec=0.9903
Epoch 6/8 | Train Loss=0.0298 | Train Acc=0.9689 | Val Acc=0.9904 | Val Prec=0.9537 | Val Rec=1.0000
Saved best model
Epoch 7/8 | Train Loss=0.0331 | Train Acc=0.9684 | Val Acc=0.9827 | Val Prec=0.9796 | Val Rec=0.9320
Epoch 8/8 | Train Loss=0.0558 | Train Acc=0.9775 | Val Acc=0.9807 | Val Prec=1.0000 | Val Rec=0.9029


In [10]:
model.load_state_dict(torch.load("best_vit_classifier.pth"))
test_acc, test_prec, test_rec = eval_epoch(test_loader)

print("Test Accuracy:", test_acc)
print("Test Precision:", test_prec)
print("Test Recall (bad):", test_rec)


Test Accuracy: 0.9826589595375722
Test Precision: 0.9433962264150944
Test Recall (bad): 0.970873786407767
