# VeriClaim — Damage Classifier Training

**Before running:** Go to Runtime → Change runtime type → Select T4 GPU → Save

Run cells one by one in order. Do not skip any cell.

## Cell 1 — Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.makedirs('/content/drive/MyDrive/VeriClaim', exist_ok=True)
print('Drive mounted. VeriClaim folder ready on Drive.')

## Cell 2 — Install dependencies

In [None]:
!pip install timm==0.9.12 kaggle -q

import torch
print(f'PyTorch version : {torch.__version__}')
print(f'CUDA available  : {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU             : {torch.cuda.get_device_name(0)}')
else:
    print('WARNING: No GPU detected. Go to Runtime -> Change runtime type -> T4 GPU')

## Cell 3 — Upload kaggle.json

When the file picker appears, upload your kaggle.json file.
Get it from: kaggle.com → Account → Settings → API → Create New Token

In [None]:
from google.colab import files
import os, json

print('A file picker will appear below. Upload your kaggle.json file.')
uploaded = files.upload()

if 'kaggle.json' not in uploaded:
    raise Exception('kaggle.json was not uploaded. Re-run this cell and upload the file.')

os.makedirs(os.path.expanduser('~/.kaggle'), exist_ok=True)

with open(os.path.expanduser('~/.kaggle/kaggle.json'), 'w') as f:
    f.write(uploaded['kaggle.json'].decode('utf-8'))

os.chmod(os.path.expanduser('~/.kaggle/kaggle.json'), 0o600)

# Verify the credentials are valid JSON
with open(os.path.expanduser('~/.kaggle/kaggle.json')) as f:
    creds = json.load(f)
print(f'Kaggle credentials configured for user: {creds["username"]}')

## Cell 4 — Download dataset from Kaggle

In [None]:
import os

os.makedirs('/content/data', exist_ok=True)

print('Downloading car damage dataset from Kaggle...')
!kaggle datasets download -d anujms/car-damage-detection -p /content/data --unzip

# Print the folder tree so we can see what was downloaded
print('\nDownloaded structure:')
for root, dirs, files in os.walk('/content/data'):
    level = root.replace('/content/data', '').count(os.sep)
    indent = '  ' * level
    img_count = len([f for f in files if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    folder_name = os.path.basename(root)
    if img_count > 0:
        print(f'{indent}{folder_name}/ ({img_count} images)')
    elif level <= 2:
        print(f'{indent}{folder_name}/')

## Cell 5 — Organize images into minor / moderate / severe

The downloaded dataset has two classes: 00-damage and 01-whole.
We map them as follows:
- 01-whole (undamaged cars) -> minor
- first half of 00-damage   -> moderate
- second half of 00-damage  -> severe

This cell finds those folders regardless of where they extracted to.

In [None]:
import os, shutil, random

ORGANIZED = '/content/data/organized'
for cls in ['minor', 'moderate', 'severe']:
    os.makedirs(os.path.join(ORGANIZED, cls), exist_ok=True)

whole_imgs  = []
damage_imgs = []

# Walk entire /content/data tree looking for 00-damage and 01-whole folders
for root, dirs, files in os.walk('/content/data'):
    folder = os.path.basename(root).lower()
    imgs = [
        os.path.join(root, f) for f in files
        if f.lower().endswith(('.jpg', '.jpeg', '.png'))
    ]
    if not imgs:
        continue
    if '01-whole' in folder or 'whole' in folder:
        whole_imgs.extend(imgs)
    elif '00-damage' in folder or ('damage' in folder and 'organized' not in root):
        damage_imgs.extend(imgs)

print(f'Found {len(whole_imgs)} whole (undamaged) images')
print(f'Found {len(damage_imgs)} damage images')

if len(whole_imgs) + len(damage_imgs) == 0:
    raise Exception(
        'No images found. Check the folder tree printed in Cell 4. '
        'The dataset may have extracted to an unexpected path.'
    )

# Split damage images into moderate and severe
random.seed(42)
random.shuffle(damage_imgs)
mid = len(damage_imgs) // 2
moderate_imgs = damage_imgs[:mid]
severe_imgs   = damage_imgs[mid:]

def copy_images(img_list, cls_name):
    dest = os.path.join(ORGANIZED, cls_name)
    for i, src in enumerate(img_list):
        ext = os.path.splitext(src)[1].lower() or '.jpg'
        shutil.copy2(src, os.path.join(dest, f'{cls_name}_{i:05d}{ext}'))

copy_images(whole_imgs,    'minor')
copy_images(moderate_imgs, 'moderate')
copy_images(severe_imgs,   'severe')

print('\nOrganized dataset:')
total = 0
for cls in ['minor', 'moderate', 'severe']:
    n = len(os.listdir(os.path.join(ORGANIZED, cls)))
    total += n
    print(f'  {cls:10s}: {n} images')
print(f'  {"total":10s}: {total} images')

assert total > 0, 'Organized folder is empty after copying. Something went wrong above.'
print('\nDataset ready for training.')

## Cell 6 — Define Dataset and Model classes

In [None]:
import os
import torch
import torch.nn as nn
import timm
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

CLASSES      = ['minor', 'moderate', 'severe']
CLASS_TO_IDX = {c: i for i, c in enumerate(CLASSES)}

TRAIN_TRANSFORMS = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=20),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3, hue=0.1),
    transforms.RandomGrayscale(p=0.05),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

VAL_TRANSFORMS = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


class CarDamageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.samples   = []
        self.transform = transform
        for cls in CLASSES:
            cls_dir = os.path.join(image_dir, cls)
            if not os.path.exists(cls_dir):
                print(f'  WARNING: {cls_dir} not found, skipping.')
                continue
            count = 0
            for fname in os.listdir(cls_dir):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    self.samples.append((os.path.join(cls_dir, fname), CLASS_TO_IDX[cls]))
                    count += 1
            print(f'  {cls:10s}: {count} images loaded')
        print(f'  Total samples: {len(self.samples)}')

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        try:
            img = Image.open(path).convert('RGB')
        except Exception:
            img = Image.new('RGB', (224, 224), (128, 128, 128))
        if self.transform:
            img = self.transform(img)
        return img, label


# Wrapper to apply different transforms to the validation split
# without touching the original dataset object
class ValWrapper(Dataset):
    def __init__(self, subset, transform):
        self.subset    = subset
        self.transform = transform

    def __len__(self):
        return len(self.subset)

    def __getitem__(self, idx):
        path, label = self.subset.dataset.samples[self.subset.indices[idx]]
        try:
            img = Image.open(path).convert('RGB')
        except Exception:
            img = Image.new('RGB', (224, 224), (128, 128, 128))
        return self.transform(img), label


class DamageClassifier(nn.Module):
    def __init__(self, num_classes=3, pretrained=True):
        super().__init__()
        self.backbone = timm.create_model(
            'efficientnet_b0',
            pretrained=pretrained,
            num_classes=num_classes
        )

    def forward(self, x):
        return self.backbone(x)

    def save(self, path):
        torch.save(self.state_dict(), path)
        print(f'Model saved to {path}')

    @classmethod
    def load(cls, path, num_classes=3):
        model = cls(num_classes=num_classes, pretrained=False)
        model.load_state_dict(torch.load(path, map_location='cpu'))
        model.eval()
        return model


print('Dataset and Model classes defined successfully.')

## Cell 7 — Train the model

In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR

# Config
DATA_DIR  = '/content/data/organized'
SAVE_PATH = '/content/best_model.pt'
DEVICE    = 'cuda' if torch.cuda.is_available() else 'cpu'
EPOCHS    = 20
BATCH     = 16
LR        = 3e-4
PATIENCE  = 7

print(f'Device : {DEVICE}')
print(f'Epochs : {EPOCHS}')
print()

# Load dataset
print('Loading dataset...')
full_ds = CarDamageDataset(DATA_DIR, transform=TRAIN_TRANSFORMS)

assert len(full_ds) > 0, (
    'Dataset is empty. Make sure Cell 5 ran successfully '
    'and printed "Dataset ready for training."'
)

n_val   = max(1, int(len(full_ds) * 0.2))
n_train = len(full_ds) - n_val

train_subset, val_subset = random_split(
    full_ds, [n_train, n_val],
    generator=torch.Generator().manual_seed(42)
)

val_ds = ValWrapper(val_subset, VAL_TRANSFORMS)

train_loader = DataLoader(
    train_subset, batch_size=BATCH, shuffle=True,
    num_workers=2, pin_memory=True
)
val_loader = DataLoader(
    val_ds, batch_size=BATCH, shuffle=False,
    num_workers=2, pin_memory=True
)

print(f'Train : {n_train} images')
print(f'Val   : {n_val} images')
print()

# Build model — freeze backbone, only train classifier head for first 2 epochs
model = DamageClassifier(num_classes=3, pretrained=True).to(DEVICE)

for p in model.backbone.parameters():
    p.requires_grad = False
for p in model.backbone.classifier.parameters():
    p.requires_grad = True

criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR, weight_decay=0.01
)
scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

best_val_acc     = 0.0
patience_counter = 0
unfrozen         = False

print('Starting training...')
print('-' * 55)

for epoch in range(EPOCHS):

    # After 2 warmup epochs unfreeze last 3 blocks for fine-tuning
    if epoch == 2 and not unfrozen:
        for p in model.backbone.blocks[-3:].parameters():
            p.requires_grad = True
        optimizer = AdamW(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=LR / 5, weight_decay=0.01
        )
        scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS - 2)
        unfrozen  = True
        print('  Unfroze last 3 backbone blocks for fine-tuning')

    # Train
    model.train()
    correct, total = 0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(imgs), labels)
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            correct += (model(imgs).argmax(1) == labels).sum().item()
        total += labels.size(0)

    # Validate
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            val_correct += (model(imgs).argmax(1) == labels).sum().item()
            val_total   += labels.size(0)

    train_acc = correct / total
    val_acc   = val_correct / val_total
    scheduler.step()

    print(f'Epoch {epoch+1:02d}/{EPOCHS} | Train: {train_acc:.3f} | Val: {val_acc:.3f}')

    if val_acc > best_val_acc:
        best_val_acc     = val_acc
        patience_counter = 0
        model.save(SAVE_PATH)
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print('Early stopping triggered.')
            break

print('-' * 55)
print(f'Training complete. Best val_acc: {best_val_acc:.3f}')

## Cell 8 — Verify the saved model works

In [None]:
import os, torch
from PIL import Image

assert os.path.exists(SAVE_PATH), f'best_model.pt not found at {SAVE_PATH}'

# Load the saved model
loaded_model = DamageClassifier.load(SAVE_PATH)
loaded_model = loaded_model.to(DEVICE)

# Find any image to test with
test_img_path = None
for cls in ['minor', 'moderate', 'severe']:
    cls_dir = os.path.join(DATA_DIR, cls)
    if os.path.exists(cls_dir):
        files_in_dir = os.listdir(cls_dir)
        if files_in_dir:
            test_img_path = os.path.join(cls_dir, files_in_dir[0])
            break

assert test_img_path is not None, 'No test image found'

img    = Image.open(test_img_path).convert('RGB')
tensor = VAL_TRANSFORMS(img).unsqueeze(0).to(DEVICE)

with torch.no_grad():
    logits = loaded_model(tensor)
    probs  = torch.softmax(logits, dim=1)[0]
    pred   = probs.argmax().item()

print('Model verification passed.')
print(f'Test image  : {test_img_path}')
print(f'Prediction  : {CLASSES[pred]} (confidence: {probs[pred]:.3f})')
print(f'All probs   : { {c: round(probs[i].item(), 3) for i, c in enumerate(CLASSES)} }')
print()
print('best_model.pt is working correctly.')

## Cell 9 — Save best_model.pt to Google Drive

In [None]:
import os, shutil

DRIVE_DEST = '/content/drive/MyDrive/VeriClaim/best_model.pt'

assert os.path.exists(SAVE_PATH), (
    f'{SAVE_PATH} not found. Did Cell 7 complete without errors?'
)

os.makedirs('/content/drive/MyDrive/VeriClaim', exist_ok=True)
shutil.copy2(SAVE_PATH, DRIVE_DEST)

size_mb = os.path.getsize(DRIVE_DEST) / 1024 / 1024
print(f'Saved to Google Drive')
print(f'Path    : {DRIVE_DEST}')
print(f'Size    : {size_mb:.1f} MB')
print(f'Val acc : {best_val_acc:.3f}')

## Cell 10 — Download best_model.pt directly to your computer

This triggers a browser download of the file straight to your Downloads folder.
After it downloads, move it to:

    VeriClaim/models/damage_classifier/best_model.pt

In [None]:
from google.colab import files
import os

assert os.path.exists(SAVE_PATH), f'{SAVE_PATH} not found'

print('Starting download of best_model.pt...')
print('Check your browser for the download prompt.')
files.download(SAVE_PATH)

print()
print('AFTER DOWNLOAD:')
print('Move best_model.pt into your local project at:')
print('   VeriClaim/models/damage_classifier/best_model.pt')
print()
print(f'Final best val_acc: {best_val_acc:.3f}')
print('Training notebook complete.')