# Experiment 002: ResNet50 with Fine-Tuning and TTA

**Objective**: Implement architecture upgrade with progressive fine-tuning and test-time augmentation to close the performance gap to gold.

**Expected improvements:**
- ResNet50 (2x parameters vs ResNet18): ~10-15% gain
- Progressive fine-tuning: ~15-20% gain  
- TTA (5 augmentations): ~5-10% gain
- **Combined target**: 0.044-0.050 (30-40% improvement from baseline 0.0736)

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

# Check GPU
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Set random seeds for reproducibility
def set_seeds(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

set_seeds(42)

GPU available: True
GPU: NVIDIA A100-SXM4-80GB
Memory: 85.1 GB


In [2]:
# Data paths
train_dir = '/home/data/train'
test_dir = '/home/data/test'

# Get training data
train_files = [f for f in os.listdir(train_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
train_labels = [1 if 'dog' in f.lower() else 0 for f in train_files]  # 1=dog, 0=cat

print(f"Training images: {len(train_files)}")
print(f"Dogs: {sum(train_labels)} ({sum(train_labels)/len(train_labels):.1%})")
print(f"Cats: {len(train_labels) - sum(train_labels)} ({1-sum(train_labels)/len(train_labels):.1%})")

Training images: 22500
Dogs: 11258 (50.0%)
Cats: 11242 (50.0%)


In [3]:
# Custom Dataset with advanced augmentations
class PetDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        img_path = os.path.join(train_dir, self.file_paths[idx])
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
            
        return image, torch.tensor(label, dtype=torch.float32)

# Augmentation strategies
def get_train_transforms():
    return transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def get_val_transforms():
    return transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def get_tta_transforms():
    """Test-time augmentation transforms"""
    return transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [4]:
# Model definition with progressive unfreezing capability
class PetClassifier(nn.Module):
    def __init__(self, backbone='resnet50', pretrained=True):
        super(PetClassifier, self).__init__()
        
        if backbone == 'resnet50':
            self.backbone = models.resnet50(pretrained=pretrained)
            in_features = self.backbone.fc.in_features
        elif backbone == 'resnet18':
            self.backbone = models.resnet18(pretrained=pretrained)
            in_features = self.backbone.fc.in_features
        else:
            raise ValueError(f"Unsupported backbone: {backbone}")
        
        # Replace final layer
        self.backbone.fc = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(in_features, 1)
        )
        
    def forward(self, x):
        return self.backbone(x)
    
    def freeze_backbone(self):
        """Freeze all backbone layers"""
        for param in self.backbone.parameters():
            param.requires_grad = False
        # Unfreeze final layer
        for param in self.backbone.fc.parameters():
            param.requires_grad = True
    
    def unfreeze_last_blocks(self, num_blocks=2):
        """Unfreeze last N residual blocks"""
        # ResNet has 4 main layer groups: layer1, layer2, layer3, layer4
        # layer4 is the last, layer3 is second to last, etc.
        layers_to_unfreeze = []
        if num_blocks >= 1:
            layers_to_unfreeze.append(self.backbone.layer4)
        if num_blocks >= 2:
            layers_to_unfreeze.append(self.backbone.layer3)
        if num_blocks >= 3:
            layers_to_unfreeze.append(self.backbone.layer2)
            
        for layer in layers_to_unfreeze:
            for param in layer.parameters():
                param.requires_grad = True

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
    
    return running_loss / len(dataloader.dataset)

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            all_preds.extend(torch.sigmoid(outputs).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    val_loss = running_loss / len(dataloader.dataset)
    val_log_loss = log_loss(all_labels, all_preds)
    
    return val_loss, val_log_loss, np.array(all_preds), np.array(all_labels)

In [None]:
# Cross-validation setup
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Store results
cv_scores = []
fold_predictions = []
fold_models = []

# Training configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Training parameters
batch_size = 32
num_workers = 4
early_stop_patience = 3
max_epochs_phase1 = 3  # Train head only
max_epochs_phase2 = 8  # Fine-tune with early stopping

print(f"\nStarting {n_splits}-fold CV training...")
print(f"Phase 1: Train head only ({max_epochs_phase1} epochs)")
print(f"Phase 2: Fine-tune backbone ({max_epochs_phase2} epochs max)")

In [None]:
# Main training loop
for fold, (train_idx, val_idx) in enumerate(skf.split(train_files, train_labels)):
    print(f"\n{'='*60}")
    print(f"FOLD {fold + 1}/{n_splits}")
    print(f"{'='*60}")
    
    # Split data
    train_files_fold = [train_files[i] for i in train_idx]
    train_labels_fold = [train_labels[i] for i in train_idx]
    val_files_fold = [train_files[i] for i in val_idx]
    val_labels_fold = [train_labels[i] for i in val_idx]
    
    print(f"Train: {len(train_files_fold)} images")
    print(f"Val: {len(val_files_fold)} images")
    
    # Create datasets
    train_dataset = PetDataset(train_files_fold, train_labels_fold, transform=get_train_transforms())
    val_dataset = PetDataset(val_files_fold, val_labels_fold, transform=get_val_transforms())
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    
    # Create model
    model = PetClassifier(backbone='resnet50', pretrained=True)
    model.to(device)
    
    # Loss function (with label smoothing)
    class LabelSmoothingBCE(nn.Module):
        def __init__(self, smoothing=0.1):
            super().__init__()
            self.smoothing = smoothing
            self.bce = nn.BCEWithLogitsLoss()
        
        def forward(self, outputs, targets):
            targets = targets * (1 - self.smoothing) + 0.5 * self.smoothing
            return self.bce(outputs, targets)
    
    criterion = LabelSmoothingBCE(smoothing=0.1)
    
    # === PHASE 1: Train head only (frozen backbone) ===
    print(f"\nPhase 1: Training head only...")
    model.freeze_backbone()
    
    # Only optimize the final layer
    head_params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.AdamW(head_params, lr=0.001, weight_decay=0.01)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs_phase1)
    
    for epoch in range(max_epochs_phase1):
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_log_loss, _, _ = validate(model, val_loader, criterion, device)
        scheduler.step()
        
        print(f"  Epoch {epoch+1}/{max_epochs_phase1} - Train Loss: {train_loss:.4f}, Val Log Loss: {val_log_loss:.4f}")
    
    # === PHASE 2: Fine-tune backbone ===
    print(f"\nPhase 2: Fine-tuning backbone...")
    model.unfreeze_last_blocks(num_blocks=2)  # Unfreeze last 2 blocks
    
    # Different learning rates for backbone and head
    backbone_params = [p for n, p in model.named_parameters() if 'fc' not in n and p.requires_grad]
    head_params = [p for n, p in model.named_parameters() if 'fc' in n and p.requires_grad]
    
    optimizer = optim.AdamW([
        {'params': backbone_params, 'lr': 0.0001},
        {'params': head_params, 'lr': 0.001}
    ], weight_decay=0.01)
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)
    
    best_val_log_loss = float('inf')
    patience_counter = 0
    best_model_state = None
    
    for epoch in range(max_epochs_phase2):
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_log_loss, _, _ = validate(model, val_loader, criterion, device)
        scheduler.step(val_log_loss)
        
        print(f"  Epoch {epoch+1}/{max_epochs_phase2} - Train Loss: {train_loss:.4f}, Val Log Loss: {val_log_loss:.4f}")
        
        # Early stopping
        if val_log_loss < best_val_log_loss:
            best_val_log_loss = val_log_loss
            patience_counter = 0
            best_model_state = model.state_dict().copy()
            print(f"  → New best: {val_log_loss:.4f}")
        else:
            patience_counter += 1
            if patience_counter >= early_stop_patience:
                print(f"  → Early stopping triggered after epoch {epoch+1}")
                break
    
    # Load best model
    model.load_state_dict(best_model_state)
    
    # Final validation
    _, final_val_log_loss, val_preds, val_labels = validate(model, val_loader, criterion, device)
    print(f"\nFold {fold + 1} Final Val Log Loss: {final_val_log_loss:.4f}")
    
    cv_scores.append(final_val_log_loss)
    fold_models.append(model)
    
    # Clean up
    del train_dataset, val_dataset, train_loader, val_loader
    torch.cuda.empty_cache()

print(f"\n{'='*60}")
print(f"CROSS-VALIDATION COMPLETE")
print(f"{'='*60}")
print(f"Mean CV Log Loss: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")
print(f"Individual folds: {[f'{score:.4f}' for score in cv_scores]}")

In [None]:
# Test-time augmentation (TTA) predictions
print(f"\nGenerating TTA predictions on test set...")

# Get test files
test_files = [f for f in os.listdir(test_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
print(f"Test images: {len(test_files)}")

class TestDataset(Dataset):
    def __init__(self, file_paths, transform=None):
        self.file_paths = file_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        img_path = os.path.join(test_dir, self.file_paths[idx])
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        return image, self.file_paths[idx]

# Number of TTA augmentations per image
n_tta = 5
tta_predictions = []

for fold, model in enumerate(fold_models):
    print(f"\nProcessing fold {fold + 1}/{len(fold_models)}...")
    model.eval()
    
    fold_tta_preds = np.zeros((len(test_files), n_tta))
    
    for tta_idx in range(n_tta):
        print(f"  TTA augmentation {tta_idx + 1}/{n_tta}")
        
        # Create dataset with TTA transforms
        test_dataset = TestDataset(test_files, transform=get_tta_transforms())
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
        
        fold_preds = []
        with torch.no_grad():
            for images, _ in test_loader:
                images = images.to(device)
                outputs = model(images).squeeze()
                probs = torch.sigmoid(outputs).cpu().numpy()
                fold_preds.extend(probs)
        
        fold_tta_preds[:, tta_idx] = fold_preds
        
        # Clean up
        del test_dataset, test_loader
        torch.cuda.empty_cache()
    
    # Average TTA predictions for this fold
    fold_avg_preds = np.mean(fold_tta_preds, axis=1)
    tta_predictions.append(fold_avg_preds)

# Average predictions across all folds
final_predictions = np.mean(tta_predictions, axis=0)
print(f"\nFinal predictions shape: {final_predictions.shape}")
print(f"Prediction range: [{final_predictions.min():.4f}, {final_predictions.max():.4f}]")

In [None]:
# Create submission
print(f"\nCreating submission file...")

# Load sample submission to get format
sample_submission_path = '/home/data/sample_submission.csv'
if os.path.exists(sample_submission_path):
    sample_submission = pd.read_csv(sample_submission_path)
    print(f"Sample submission columns: {sample_submission.columns.tolist()}")
    print(f"Sample submission shape: {sample_submission.shape}")
    
    # Create submission with same format
    submission = pd.DataFrame({
        'id': [f.split('.')[0] for f in test_files],  # Remove file extension
        'label': final_predictions
    })
    
    # Ensure correct column order
    submission = submission[['id', 'label']]
else:
    # Fallback if no sample submission
    submission = pd.DataFrame({
        'id': [f.split('.')[0] for f in test_files],
        'label': final_predictions
    })

# Save submission
submission_path = '/home/submission/submission_002.csv'
os.makedirs('/home/submission', exist_ok=True)
submission.to_csv(submission_path, index=False)

print(f"Submission saved to: {submission_path}")
print(f"Submission shape: {submission.shape}")
print(f"\nFirst 5 predictions:")
print(submission.head())

# Final summary
print(f"\n{'='*60}")
print(f"EXPERIMENT 002 SUMMARY")
print(f"{'='*60}")
print(f"Model: ResNet50 with progressive fine-tuning")
print(f"Augmentations: RandomResizedCrop, Flip, Rotation, ColorJitter")
print(f"Training: {max_epochs_phase1} epochs (head) + {max_epochs_phase2} epochs (fine-tune)")
print(f"TTA: {n_tta} augmentations per image")
print(f"CV Score: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")
print(f"Submission: {submission_path}")
print(f"Improvement from baseline: {((0.0736 - np.mean(cv_scores)) / 0.0736 * 100):.1f}%")
print(f"Remaining gap to gold: {np.mean(cv_scores) - 0.0388:.4f}")