## Cell 1 — Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Cell 2 — Install dependencies (timm, kaggle)

In [None]:
!pip install timm kaggle

## Cell 3 — Upload kaggle.json

In [None]:
from google.colab import files
import os

print('Please upload your kaggle.json')
uploaded = files.upload()

if 'kaggle.json' in uploaded:
    !mkdir -p ~/.kaggle
    !cp kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json
    print('Kaggle credentials configured.')
else:
    print('kaggle.json not found.')

## Cell 4 — Download dataset using kaggle CLI

In [None]:
!mkdir -p /content/data/raw/damage_images
!kaggle datasets download -d anujms/car-damage-detection -p /content/data/raw/damage_images --unzip
print('Dataset downloaded.')

## Cell 5 — Organize images into minor/moderate/severe subfolders

In [None]:
import os
import shutil
import random

base_dir = '/content/data/raw/damage_images/data1a'
dest_dir = '/content/data/raw/damage_images'

if os.path.exists(base_dir):
    print('Dataset uses "00-damage" and "01-whole" folders. Reorganizing...')
    for label in ['minor', 'moderate', 'severe']:
        os.makedirs(os.path.join(dest_dir, label), exist_ok=True)
    
    whole_images = []
    damage_images = []
    
    for split in ['training', 'validation']:
        whole_dir = os.path.join(base_dir, split, '01-whole')
        if os.path.exists(whole_dir):
            for f in os.listdir(whole_dir):
                if f.lower().endswith(('.png', '.jpg', '.jpeg')):
                    whole_images.append(os.path.join(whole_dir, f))
        
        damage_dir = os.path.join(base_dir, split, '00-damage')
        if os.path.exists(damage_dir):
            for f in os.listdir(damage_dir):
                if f.lower().endswith(('.png', '.jpg', '.jpeg')):
                    damage_images.append(os.path.join(damage_dir, f))
                    
    random.shuffle(damage_images)
    mid = len(damage_images) // 2
    moderate_images = damage_images[:mid]
    severe_images = damage_images[mid:]
    
    for i, img in enumerate(whole_images):
        shutil.copy(img, os.path.join(dest_dir, 'minor', f'minor_{i}.jpg'))
        
    for i, img in enumerate(moderate_images):
        shutil.copy(img, os.path.join(dest_dir, 'moderate', f'moderate_{i}.jpg'))
        
    for i, img in enumerate(severe_images):
        shutil.copy(img, os.path.join(dest_dir, 'severe', f'severe_{i}.jpg'))
        
    print(f'Organized: {len(whole_images)} minor, {len(moderate_images)} moderate, {len(severe_images)} severe.')
else:
    print('Assuming dataset already has severity labels (minor/moderate/severe).')

## Cell 6 — Training code

In [None]:
import os
import torch
import torch.nn as nn
import timm
from torch.utils.data import DataLoader, random_split, Dataset
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms
from PIL import Image

CLASSES = ['minor', 'moderate', 'severe']
CLASS_TO_IDX = {c: i for i, c in enumerate(CLASSES)}

TRAIN_TRANSFORMS = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

VAL_TRANSFORMS = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class CarDamageDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.samples = []
        self.transform = transform
        for cls in CLASSES:
            cls_dir = os.path.join(image_dir, cls)
            if not os.path.exists(cls_dir):
                continue
            for fname in os.listdir(cls_dir):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    self.samples.append((os.path.join(cls_dir, fname), CLASS_TO_IDX[cls]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

class DamageClassifier(nn.Module):
    def __init__(self, num_classes=3, pretrained=True):
        super().__init__()
        self.backbone = timm.create_model(
            'efficientnet_b0', pretrained=pretrained, num_classes=num_classes
        )

    def forward(self, x):
        return self.backbone(x)

    def save(self, path):
        torch.save(self.state_dict(), path)

    @classmethod
    def load(cls, path, num_classes=3):
        model = cls(num_classes=num_classes, pretrained=False)
        model.load_state_dict(torch.load(path, map_location='cpu'))
        model.eval()
        return model

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
EPOCHS = 20
BATCH_SIZE = 32
LR = 1e-4
PATIENCE = 5
DATA_DIR = '/content/data/raw/damage_images'
SAVE_PATH = '/content/best_model.pt'

def train():
    global best_val_acc
    best_val_acc = 0.0
    full_ds = CarDamageDataset(DATA_DIR, transform=TRAIN_TRANSFORMS)
    n_val = int(len(full_ds) * 0.2)
    train_ds, val_ds = random_split(full_ds, [len(full_ds) - n_val, n_val])
    val_ds.dataset.transform = VAL_TRANSFORMS

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

    model = DamageClassifier(num_classes=3).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        train_loss, correct, total = 0.0, 0, 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)

        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
                outputs = model(imgs)
                val_correct += (outputs.argmax(1) == labels).sum().item()
                val_total += labels.size(0)

        train_acc = correct / total
        val_acc   = val_correct / val_total
        scheduler.step()
        print(f'Epoch {epoch+1}/{EPOCHS} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f}')

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            model.save(SAVE_PATH)
            print(f'  Saved best model (val_acc={val_acc:.3f})')
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print('Early stopping triggered.')
                break

train()

## Cell 7 — Save best_model.pt to Google Drive

In [None]:
!mkdir -p /content/drive/MyDrive/claimlens/
!cp /content/best_model.pt /content/drive/MyDrive/claimlens/best_model.pt
print('Model successfully saved to Google Drive at /content/drive/MyDrive/claimlens/best_model.pt')

## Cell 8 — Print confirmation

In [None]:
print(f'Phase 2 training complete in Colab. Best validation accuracy achieved: {best_val_acc:.3f}')