# Baseline Model - ResNet-18

This notebook implements a baseline model for the Aerial Cactus Identification competition.

**Strategy:**
- ResNet-18 architecture (well-suited for 32x32 images)
- Stratified 5-fold cross-validation
- Standard cross-entropy loss (baseline before trying AUC-specific losses)
- Heavy data augmentation for small images
- Class imbalance handling via loss weighting
- Test-time augmentation for inference

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import cv2
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Set random seeds for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    
set_seed(42)

GPU available: True
GPU: NVIDIA A100-SXM4-80GB
GPU Memory: 85.1 GB


## Data Loading and Exploration

In [2]:
# Load data
train_df = pd.read_csv('/home/data/train.csv')
test_df = pd.read_csv('/home/data/sample_submission.csv')

print(f"Training samples: {len(train_df)}")
print(f"Test samples: {len(test_df)}")
print(f"\nClass distribution:")
print(train_df['has_cactus'].value_counts(normalize=True))

# Check image dimensions
import zipfile
with zipfile.ZipFile('/home/data/train.zip', 'r') as zip_ref:
    first_file = zip_ref.namelist()[1]  # Skip directory
    with zip_ref.open(first_file) as f:
        img_data = f.read()
        nparr = np.frombuffer(img_data, np.uint8)
        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        print(f"\nImage shape: {img.shape}")
        print(f"Image dtype: {img.dtype}")

Training samples: 14175
Test samples: 3325

Class distribution:
has_cactus
1    0.749771
0    0.250229
Name: proportion, dtype: float64



Image shape: (32, 32, 3)
Image dtype: uint8


## Custom Dataset Class

In [4]:
class CactusDataset(Dataset):
    def __init__(self, df, zip_path, transform=None, is_test=False):
        self.df = df
        self.zip_path = zip_path
        self.transform = transform
        self.is_test = is_test
        self.zip_file = None
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if self.zip_file is None:
            self.zip_file = zipfile.ZipFile(self.zip_path, 'r')
            
        img_id = self.df.iloc[idx]['id']
        
        # Images are in root of zip, not in subdirectories
        img_path = img_id
            
        with self.zip_file.open(img_path) as f:
            img_data = f.read()
            
        nparr = np.frombuffer(img_data, np.uint8)
        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img = self.transform(img)
            
        if self.is_test:
            return img
        else:
            label = self.df.iloc[idx]['has_cactus']
            return img, torch.tensor(label, dtype=torch.float32)
    
    def __del__(self):
        if self.zip_file is not None:
            self.zip_file.close()

## Data Augmentation

In [5]:
# Training transforms with heavy augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Validation transforms (minimal)
val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## Model Definition

In [6]:
class CactusClassifier(nn.Module):
    def __init__(self, pretrained=True):
        super(CactusClassifier, self).__init__()
        # Use ResNet-18 as backbone
        self.backbone = models.resnet18(pretrained=pretrained)
        
        # Modify first layer for 32x32 images (smaller kernel, stride)
        self.backbone.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.backbone.maxpool = nn.Identity()  # Remove maxpool for small images
        
        # Get number of features from backbone
        num_features = self.backbone.fc.in_features
        
        # Replace final layer
        self.backbone.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 1)
        )
        
    def forward(self, x):
        return self.backbone(x)

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=10):
    best_val_auc = 0
    best_model_state = None
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0
        train_preds = []
        train_labels = []
        
        for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs} - Train'):
            images, labels = batch
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_preds.extend(torch.sigmoid(outputs).detach().cpu().numpy())
            train_labels.extend(labels.detach().cpu().numpy())
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_preds = []
        val_labels = []
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f'Epoch {epoch+1}/{epochs} - Val'):
                images, labels = batch
                images, labels = images.to(device), labels.to(device)
                
                outputs = model(images).squeeze()
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                val_preds.extend(torch.sigmoid(outputs).cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
        
        # Calculate metrics
        train_auc = roc_auc_score(train_labels, train_preds)
        val_auc = roc_auc_score(val_labels, val_preds)
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'  Train Loss: {train_loss/len(train_loader):.4f}, Train AUC: {train_auc:.4f}')
        print(f'  Val Loss: {val_loss/len(val_loader):.4f}, Val AUC: {val_auc:.4f}')
        
        # Save best model
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            best_model_state = model.state_dict().copy()
            print(f'  New best validation AUC: {val_auc:.4f}')
        
        scheduler.step()
    
    return best_model_state, best_val_auc

## Cross-Validation Training

In [None]:
# Set up stratified k-fold
def run_cross_validation(n_folds=5, epochs=15, batch_size=64):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # Prepare data
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
    fold_scores = []
    fold_models = []
    
    # Calculate class weights for handling imbalance
    pos_class = (train_df['has_cactus'] == 1).sum()
    neg_class = (train_df['has_cactus'] == 0).sum()
    pos_weight = neg_class / pos_class
    print(f"Positive class weight: {pos_weight:.4f}")
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['has_cactus'])):
        print(f"\n{'='*50}")
        print(f"Fold {fold + 1}/{n_folds}")
        print(f"{'='*50}")
        
        # Create fold datasets
        train_fold = train_df.iloc[train_idx].reset_index(drop=True)
        val_fold = train_df.iloc[val_idx].reset_index(drop=True)
        
        train_dataset = CactusDataset(train_fold, '/home/data/train.zip', transform=train_transform)
        val_dataset = CactusDataset(val_fold, '/home/data/train.zip', transform=val_transform)
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
        
        # Initialize model
        model = CactusClassifier(pretrained=True).to(device)
        
        # Loss with class weighting for imbalance
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight).to(device))
        
        # Optimizer and scheduler
        optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
        
        # Train model
        best_state, best_auc = train_model(model, train_loader, val_loader, criterion, 
                                         optimizer, scheduler, device, epochs=epochs)
        
        fold_scores.append(best_auc)
        fold_models.append(best_state)
        
        print(f"Fold {fold + 1} Best Validation AUC: {best_auc:.4f}")
    
    print(f"\n{'='*50}")
    print(f"Cross-Validation Results")
    print(f"{'='*50}")
    print(f"Mean AUC: {np.mean(fold_scores):.4f} Â± {np.std(fold_scores):.4f}")
    print(f"Individual folds: {fold_scores}")
    
    return fold_models, fold_scores

# Run cross-validation
fold_models, fold_scores = run_cross_validation(n_folds=5, epochs=15, batch_size=64)

## Test-Time Augmentation and Prediction

In [None]:
def predict_with_tta(models, test_loader, device, n_tta=5):
    """Predict with test-time augmentation"""
    all_preds = []
    
    # Define TTA transforms
    tta_transforms = [
        transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), 
                           transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),
        transforms.Compose([transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=1.0), transforms.ToTensor(), 
                           transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),
        transforms.Compose([transforms.ToPILImage(), transforms.RandomVerticalFlip(p=1.0), transforms.ToTensor(), 
                           transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),
        transforms.Compose([transforms.ToPILImage(), transforms.RandomRotation(90), transforms.ToTensor(), 
                           transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),
        transforms.Compose([transforms.ToPILImage(), transforms.RandomRotation(180), transforms.ToTensor(), 
                           transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),
    ]
    
    for model in models:
        model.eval()
        fold_preds = []
        
        for tta_idx in range(min(n_tta, len(tta_transforms))):
            tta_pred = []
            
            with torch.no_grad():
                for batch in tqdm(test_loader, desc=f'Model - TTA {tta_idx+1}'):
                    images = batch.to(device)
                    outputs = model(images).squeeze()
                    probs = torch.sigmoid(outputs).cpu().numpy()
                    tta_pred.extend(probs)
            
            fold_preds.append(tta_pred)
        
        # Average TTA predictions for this model
        fold_preds = np.mean(fold_preds, axis=0)
        all_preds.append(fold_preds)
    
    # Average across all models (ensemble)
    final_preds = np.mean(all_preds, axis=0)
    return final_preds

# Create test dataset and loader
test_dataset = CactusDataset(test_df, '/home/data/test.zip', transform=val_transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Load best models and predict
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_models = []

for i, state_dict in enumerate(fold_models):
    model = CactusClassifier(pretrained=False).to(device)
    model.load_state_dict(state_dict)
    best_models.append(model)
    print(f"Loaded model from fold {i+1}")

# Generate predictions with TTA
print("\nGenerating predictions with test-time augmentation...")
test_predictions = predict_with_tta(best_models, test_loader, device, n_tta=5)

# Create submission
submission_df = test_df.copy()
submission_df['has_cactus'] = test_predictions
submission_df.to_csv('/home/submission/submission.csv', index=False)

print(f"\nSubmission saved to /home/submission/submission.csv")
print(f"Predictions shape: {test_predictions.shape}")
print(f"Prediction range: [{test_predictions.min():.4f}, {test_predictions.max():.4f}]")