# Baseline Experiment - iNaturalist 2019

This is a simple CNN baseline using ResNet50 pretrained on ImageNet.

## Strategy
- Use ResNet50 as feature extractor
- Fine-tune on iNaturalist 2019 dataset
- Use standard data augmentation
- 5-fold cross-validation
- Adam optimizer with learning rate scheduling

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import os
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Load data
print("Loading training data...")
with open('/home/data/train2019.json', 'r') as f:
    train_data = json.load(f)

print("Loading validation data...")
with open('/home/data/val2019.json', 'r') as f:
    val_data = json.load(f)

print("Loading test data...")
with open('/home/data/test2019.json', 'r') as f:
    test_data = json.load(f)

# Create category mapping
categories = {cat['id']: cat for cat in train_data['categories']}
num_classes = len(categories)
print(f"Number of classes: {num_classes}")

# Create image id to file name mapping
train_images = {img['id']: img for img in train_data['images']}
val_images = {img['id']: img for img in val_data['images']}
test_images = {img['id']: img for img in test_data['images']}

# Create annotations mapping
train_annots = {ann['image_id']: ann['category_id'] for ann in train_data['annotations']}
val_annots = {ann['image_id']: ann['category_id'] for ann in val_data['annotations']}

print(f"Training images: {len(train_images)}")
print(f"Validation images: {len(val_images)}")
print(f"Test images: {len(test_images)}")

In [None]:
# Prepare combined dataset for cross-validation
# Combine train and val for better CV
all_images = []
all_labels = []

# Add training data
for img_id, img_info in train_images.items():
    all_images.append(img_info)
    all_labels.append(train_annots[img_id])

# Add validation data  
for img_id, img_info in val_images.items():
    all_images.append(img_info)
    all_labels.append(val_annots[img_id])

print(f"Total images for CV: {len(all_images)}")
print(f"Label distribution: {Counter(all_labels).most_common(5)}")

In [None]:
# Create custom dataset
class iNaturalistDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_info = self.images[idx]
        image_path = f"/home/data/{img_info['file_name']}"
        
        # Load image
        image = Image.open(image_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        label = self.labels[idx]
        return image, label

# Data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Create model
class iNaturalistModel(nn.Module):
    def __init__(self, num_classes, pretrained=True):
        super().__init__()
        self.backbone = models.resnet50(pretrained=pretrained)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, num_classes)
        
    def forward(self, x):
        return self.backbone(x)

model = iNaturalistModel(num_classes=num_classes)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Move to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"Using device: {device}")

In [None]:
# Training function
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc='Training')
    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({
            'Loss': f'{running_loss/total:.4f}',
            'Acc': f'{100.*correct/total:.2f}%'
        })
    
    return running_loss/len(train_loader), 100.*correct/total

# Validation function
def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        pbar = tqdm(val_loader, desc='Validation')
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            pbar.set_postfix({
                'Loss': f'{running_loss/total:.4f}',
                'Acc': f'{100.*correct/total:.2f}%'
            })
    
    return running_loss/len(val_loader), 100.*correct/total

In [None]:
# Cross-validation setup
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

# For this baseline, let's use just 1 fold to keep it fast
fold = 0
train_idx, val_idx = list(skf.split(all_images, all_labels))[0]

print(f"Fold {fold+1}")
print(f"Train samples: {len(train_idx)}")
print(f"Val samples: {len(val_idx)}")

# Create datasets
train_dataset = iNaturalistDataset(
    [all_images[i] for i in train_idx],
    [all_labels[i] for i in train_idx],
    transform=train_transform
)

val_dataset = iNaturalistDataset(
    [all_images[i] for i in val_idx],
    [all_labels[i] for i in val_idx],
    transform=val_transform
)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

In [None]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

num_epochs = 5  # Keep it short for baseline
best_val_acc = 0.0

print("Starting training...")
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    scheduler.step(val_loss)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), '/home/code/experiments/001_baseline/best_model.pth')
        print(f"Saved best model with val acc: {best_val_acc:.2f}%")
    
    cv_scores.append(val_acc)

print(f"\nCV Score: {np.mean(cv_scores):.2f}% Â± {np.std(cv_scores):.2f}%")

In [None]:
# Load best model and predict on test set
print("Loading best model...")
model.load_state_dict(torch.load('/home/code/experiments/001_baseline/best_model.pth'))
model.eval()

# Create test dataset
class TestDataset(Dataset):
    def __init__(self, images, transform=None):
        self.images = images
        self.transform = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_info = self.images[idx]
        image_path = f"/home/data/{img_info['file_name']}"
        
        image = Image.open(image_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        return image, img_info['id']

test_dataset = TestDataset(test_data['images'], transform=val_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# Predict
print("Making predictions...")
test_predictions = []
test_ids = []

with torch.no_grad():
    for images, ids in tqdm(test_loader, desc='Testing'):
        images = images.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        
        test_predictions.extend(predicted.cpu().numpy())
        test_ids.extend(ids)

print(f"Predictions made: {len(test_predictions)}")
print(f"Sample predictions: {test_predictions[:10]}")

In [None]:
# Create submission
submission = pd.DataFrame({
    'id': test_ids,
    'predicted': test_predictions
})

# Ensure correct format (space-separated for multiple predictions)
submission['predicted'] = submission['predicted'].astype(str)

print("Submission format:")
print(submission.head())
print(f"\nShape: {submission.shape}")

# Save submission
submission_path = '/home/submission/submission.csv'
submission.to_csv(submission_path, index=False)
print(f"Submission saved to: {submission_path}")

# Also save to experiment folder for reference
submission.to_csv('/home/code/experiments/001_baseline/submission.csv', index=False)

# Calculate CV score for logging
cv_score = 1 - (best_val_acc / 100)  # Convert accuracy to error rate
print(f"CV Error Rate: {cv_score:.4f}")