In [None]:
import torch
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import random_split
from torchvision import datasets, transforms
import timm 
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
import numpy as np
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.model_selection import train_test_split
from collections import Counter
import pandas as pd
import random
import os
from collections import defaultdict
import re
from torch.utils.data import Subset
from torchvision.models import vit_l_16, ViT_L_16_Weights
from torch.cuda.amp import autocast, GradScaler

In [2]:
#%pip install timm

In [None]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

In [None]:
#train_dir = "/ucsc-cse-144-winter-2025-final-project/train/train"
train_dir = "ucsc-cse-144-winter-2025-final-project/train/train"
folder_names = os.listdir(train_dir)
print("Unsorted folder names:", folder_names)

sorted_folder_names = sorted(folder_names, key=lambda x: int(x))
print("Sorted folder names:", sorted_folder_names)


Unsorted folder names: ['38', '59', '85', '35', '5', '58', '46', '3', '71', '10', '54', '48', '17', '84', '41', '75', '88', '1', '22', '81', '52', '93', '8', '83', '37', '9', '0', '43', '51', '94', '55', '32', '19', '89', '53', '78', '63', '74', '29', '33', '70', '72', '45', '49', '15', '64', '66', '95', '23', '4', '14', '97', '65', '77', '50', '61', '62', '21', '80', '47', '24', '96', '60', '30', '57', '13', '42', '82', '87', '92', '79', '68', '12', '27', '18', '31', '11', '34', '26', '98', '25', '39', '2', '16', '6', '86', '76', '44', '56', '7', '69', '73', '28', '91', '90', '40', '67', '36', '99', '20']
Sorted folder names: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '6

In [None]:
class CustomImageFolder(datasets.ImageFolder):
    def __init__(self, root, transform=None, target_transform=None):
        super().__init__(root, transform=transform, target_transform=target_transform)
    
    def find_classes(self, directory):
        classes = sorted(os.listdir(directory))
        classes = [cls for cls in classes if cls.isdigit()]
        class_to_idx = {cls_name: int(cls_name) for cls_name in classes}
        return classes, class_to_idx

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize(512),
    transforms.RandomCrop(512),
    transforms.RandomHorizontalFlip(p=0.4),
    transforms.RandAugment(num_ops=2, magnitude=7),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.5, hue=0.2),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(512),
    transforms.CenterCrop(512),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dir = "ucsc-cse-144-winter-2025-final-project/train/train"

train_dataset = CustomImageFolder(root=train_dir, transform=train_transforms)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

val_subset.dataset.transform = val_transforms

bs = 32
train_loader = DataLoader(train_subset, batch_size=bs, shuffle=True, num_workers=0)
val_loader = DataLoader(val_subset, batch_size=bs, shuffle=False, num_workers=0)

In [7]:
def unfreeze_layers(model, epoch, freeze_after_epoch = 5):
    if epoch >= freeze_after_epoch:
        for i, block in enumerate(model.blocks):
            if i <= epoch - freeze_after_epoch:
                for param in block.parameters():
                    param.requires_grad = True
        print(f"Epoch {epoch + 1}: Unfreezing block {epoch - freeze_after_epoch + 1}")
    return model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 100

weights = ViT_L_16_Weights.IMAGENET1K_SWAG_E2E_V1
model = vit_l_16(weights=weights)

model.heads[-1] = nn.Linear(model.heads[-1].in_features, num_classes)

model.to(device)

for param in model.parameters():
    param.requires_grad = False
for param in model.heads.parameters():
    param.requires_grad = True


In [9]:
# Cross-entropy loss
criterion = nn.CrossEntropyLoss()

# AdamW optimizer
optimizer = optim.AdamW(model.parameters(), lr=5e-3, weight_decay=1e-4)

# Reduced learning rate based on validation accuracy
scheduler = CosineAnnealingLR(optimizer, T_max= 6)


In [None]:
def train_one_epoch(model, train_loader, criterion, optimizer, device, scaler, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for imgs, lbls in train_loader:
        imgs, lbls = imgs.to(device), lbls.to(device)

        optimizer.zero_grad()

        # Mixed precision training
        with autocast():
            outputs = model(imgs)
            loss = criterion(outputs, lbls)

        # Backpropagate loss
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # Track statistics
        running_loss += loss.item()
        _, preds = outputs.max(1)
        correct += (preds == lbls).sum().item()
        total += lbls.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct / total * 100
    print(f"Train Epoch {epoch+1}: Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")
    return epoch_loss, epoch_acc


In [11]:
def validate_one_epoch(model, val_loader, criterion, device, epoch):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, lbls in val_loader:
            imgs, lbls = imgs.to(device), lbls.to(device)

            # Forward pass
            outputs = model(imgs)
            loss = criterion(outputs, lbls)

            # Track statistics
            running_loss += loss.item()
            _, preds = outputs.max(1)
            correct += (preds == lbls).sum().item()
            total += lbls.size(0)

    epoch_loss = running_loss / len(val_loader)
    epoch_acc = correct / total * 100
    print(f"Validation Epoch {epoch+1}: Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")
    return epoch_loss, epoch_acc


In [12]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=10):
    scaler = GradScaler(enabled=(device.type == 'cuda'))
    best_val_acc = 0

    for epoch in range(epochs):
        # No unfreezing; backbone remains frozen.
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device, scaler, epoch)
        val_loss, val_acc = validate_one_epoch(model, val_loader, criterion, device, epoch)
        scheduler.step(val_acc)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), "best_vit_model.pth")
            print(f"Model saved with accuracy: {best_val_acc:.2f}%")


In [13]:
def evaluate_model(model, test_loader, criterion, device):
    model.load_state_dict(torch.load("best_vit_model.pth"))
    model.to(device)
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, lbls in test_loader:
            imgs, lbls = imgs.to(device), lbls.to(device)
            outputs = model(imgs)
            _, preds = outputs.max(1)
            correct += (preds == lbls).sum().item()
            total += lbls.size(0)

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

In [14]:
numepochs = 10

train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=numepochs)

evaluate_model(model, val_loader, criterion, device)

  scaler = GradScaler(enabled=(device.type == 'cuda'))
  with autocast():


Train Epoch 1: Loss: 3.4641, Accuracy: 40.50%
Validation Epoch 1: Loss: 1.7485, Accuracy: 57.50%




Model saved with accuracy: 57.50%
Train Epoch 2: Loss: 0.7472, Accuracy: 79.75%
Validation Epoch 2: Loss: 1.3202, Accuracy: 64.50%
Model saved with accuracy: 64.50%
Train Epoch 3: Loss: 0.3116, Accuracy: 90.38%
Validation Epoch 3: Loss: 1.0068, Accuracy: 69.50%
Model saved with accuracy: 69.50%
Train Epoch 4: Loss: 0.2822, Accuracy: 93.25%
Validation Epoch 4: Loss: 1.0348, Accuracy: 69.00%
Train Epoch 5: Loss: 0.1864, Accuracy: 94.75%
Validation Epoch 5: Loss: 1.0077, Accuracy: 72.50%
Model saved with accuracy: 72.50%
Train Epoch 6: Loss: 0.1916, Accuracy: 94.12%
Validation Epoch 6: Loss: 1.2756, Accuracy: 69.50%
Train Epoch 7: Loss: 0.1620, Accuracy: 95.38%
Validation Epoch 7: Loss: 1.1525, Accuracy: 76.50%
Model saved with accuracy: 76.50%
Train Epoch 8: Loss: 0.0996, Accuracy: 97.38%
Validation Epoch 8: Loss: 0.8572, Accuracy: 80.50%
Model saved with accuracy: 80.50%
Train Epoch 9: Loss: 0.0535, Accuracy: 99.12%
Validation Epoch 9: Loss: 0.9339, Accuracy: 74.50%
Train Epoch 10: Loss

In [None]:
# Test Accuracy at 80% and Kaggle accuracy at 80%

In [None]:
num_classes = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def natural_sort_key(f):
    return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', f)]

direct_transform = transforms.Compose([
    transforms.Resize((512, 512)),  
    transforms.ToTensor(),          
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])  
])

test_dir = "ucsc-cse-144-winter-2025-final-project/test/test"

test_files = sorted([f for f in os.listdir(test_dir) if f.endswith('.jpg')],
                    key=natural_sort_key)

model.eval()


def generate_predictions(test_dir, test_files, model, device, transform):
    results = []
    with torch.no_grad():
        for file in test_files:
            img_path = os.path.join(test_dir, file)
            img = Image.open(img_path).convert("RGB")
            
            # Apply transformation to the image
            img_tensor = transform(img).unsqueeze(0).to(device)  # Add batch dimension
            
            # Get model prediction
            outputs = model(img_tensor)
            _, pred = outputs.max(1)
            
            image_id = int(re.search(r'(\d+)', file).group())
            results.append({
                "ID": image_id,
                "Filename": file,
                "Label": pred.item()
            })
    return results

predictions = generate_predictions(test_dir, test_files, model, device, direct_transform)

df = pd.DataFrame(predictions).sort_values("ID")
df['ID'] = df['ID'].astype(str) + ".jpg"
df[['ID', 'Label']].to_csv("submission_test7.csv", index=False)
print("CSV 'submission_test4.csv' generated.")

CSV 'submission_test4.csv' generated.
