# Dogs vs Cats - Baseline Transfer Learning

This notebook implements a baseline using transfer learning with a pretrained CNN backbone.

## Approach
1. Load and explore the data
2. Use Stratified K-Fold validation
3. Fine-tune a pretrained ResNet model
4. Generate predictions for submission

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from sklearn.model_selection import StratifiedKFold
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Load data information
train_dir = '/home/data/train'
test_dir = '/home/data/test'

# Get training images and labels
train_files = []
train_labels = []

for filename in os.listdir(train_dir):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        train_files.append(filename)
        # Label: 1 for dog, 0 for cat
        label = 1 if 'dog' in filename.lower() else 0
        train_labels.append(label)

print(f"Found {len(train_files)} training images")
print(f"Dog images: {sum(train_labels)}")
print(f"Cat images: {len(train_labels) - sum(train_labels)}")

# Get test images
test_files = [f for f in os.listdir(test_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
print(f"Found {len(test_files)} test images")

# Create DataFrame for easier handling
train_df = pd.DataFrame({
    'filename': train_files,
    'label': train_labels
})

test_df = pd.DataFrame({
    'filename': test_files
})

print("\nTraining data sample:")
print(train_df.head())

In [None]:
# Create custom dataset class
class DogsCatsDataset(Dataset):
    def __init__(self, dataframe, directory, transform=None, is_test=False):
        self.dataframe = dataframe
        self.directory = directory
        self.transform = transform
        self.is_test = is_test
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.directory, self.dataframe.iloc[idx]['filename'])
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx]['label']
            return image, torch.tensor(label, dtype=torch.float32)

In [None]:
# Define data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = val_transform

In [None]:
# Define model class using pretrained ResNet
def create_model():
    # Load pretrained ResNet18
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    
    # Freeze all layers initially
    for param in model.parameters():
        param.requires_grad = False
    
    # Replace the final layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 1),
        nn.Sigmoid()
    )
    
    return model

In [None]:
# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=5):
    best_val_loss = float('inf')
    best_model_state = None
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        
        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs} - Training'):
            images = images.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            predicted = (outputs > 0.5).float()
            train_correct += (predicted == labels).sum().item()
            train_total += labels.size(0)
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f'Epoch {epoch+1}/{epochs} - Validation'):
                images = images.to(device)
                labels = labels.to(device)
                
                outputs = model(images).squeeze()
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                predicted = (outputs > 0.5).float()
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)
        
        # Calculate metrics
        train_loss /= len(train_loader)
        train_acc = train_correct / train_total
        val_loss /= len(val_loader)
        val_acc = val_correct / val_total
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
        
        scheduler.step(val_loss)
    
    # Load best model
    model.load_state_dict(best_model_state)
    return model, best_val_loss

In [None]:
# Main training loop with cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_scores = []

# Convert to numpy for sklearn
X = train_df['filename'].values
y = train_df['label'].values

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Store predictions from each fold for test set
test_predictions = np.zeros(len(test_df))

for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f"\n{'='*50}")
    print(f"FOLD {fold + 1}/5")
    print(f"{'='*50}")
    
    # Create fold datasets
    train_fold_df = train_df.iloc[train_idx]
    val_fold_df = train_df.iloc[val_idx]
    
    train_dataset = DogsCatsDataset(train_fold_df, train_dir, transform=train_transform)
    val_dataset = DogsCatsDataset(val_fold_df, train_dir, transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
    
    # Create and setup model
    model = create_model()
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)
    
    # Train model
    model, val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=5)
    
    # Calculate fold log loss
    model.eval()
    val_preds = []
    val_true = []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            val_preds.extend(outputs)
            val_true.extend(labels.numpy())
    
    # Calculate log loss for this fold
    from sklearn.metrics import log_loss
    fold_log_loss = log_loss(val_true, val_preds)
    fold_scores.append(fold_log_loss)
    
    print(f"Fold {fold + 1} Log Loss: {fold_log_loss:.4f}")
    
    # Predict on test set
    test_dataset = DogsCatsDataset(test_df, test_dir, transform=test_transform, is_test=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)
    
    fold_test_preds = []
    with torch.no_grad():
        for images in tqdm(test_loader, desc=f'Predicting on test set - Fold {fold + 1}'):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            fold_test_preds.extend(outputs)
    
    # Add to ensemble predictions
    test_predictions += np.array(fold_test_preds)

# Average predictions across folds
test_predictions /= 5

# Calculate overall CV score
cv_score = np.mean(fold_scores)
cv_std = np.std(fold_scores)

print(f"\n{'='*50}")
print(f"CROSS-VALIDATION RESULTS")
print(f"{'='*50}")
print(f"Mean Log Loss: {cv_score:.4f} Â± {cv_std:.4f}")
print(f"Individual folds: {fold_scores}")

In [None]:
# Create submission file
submission_df = pd.DataFrame({
    'id': [int(f.split('.')[0]) for f in test_df['filename']],
    'label': test_predictions
})

# Sort by id to match expected format
submission_df = submission_df.sort_values('id')

print("Submission file preview:")
print(submission_df.head())
print(f"\nTotal predictions: {len(submission_df)}")

# Save submission
submission_path = '/home/submission/submission.csv'
os.makedirs('/home/submission', exist_ok=True)
submission_df.to_csv(submission_path, index=False)

print(f"Submission saved to: {submission_path}")