# Baseline CNN Model - Dogs vs Cats

## Experiment 001: Transfer Learning with ResNet

This notebook implements a baseline CNN model using transfer learning for the Dogs vs Cats classification problem.

### Approach:
- Use pretrained ResNet50 model
- 5-fold stratified cross-validation
- Binary classification (dog vs cat)
- Evaluation metric: Log Loss
- Generate predictions for test set submission

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Check GPU availability
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("WARNING: No GPU available, using CPU")

PyTorch version: 2.2.0+cu118
CUDA available: True
GPU: NVIDIA A100-SXM4-80GB
GPU Memory: 85.1 GB


In [2]:
# Data paths
DATA_DIR = '/home/data'
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
TEST_DIR = os.path.join(DATA_DIR, 'test')

# Verify data exists
print(f"Train directory exists: {os.path.exists(TRAIN_DIR)}")
print(f"Test directory exists: {os.path.exists(TEST_DIR)}")

# List some training images
train_files = os.listdir(TRAIN_DIR)[:5]
print(f"Sample training files: {train_files}")

Train directory exists: True
Test directory exists: True
Sample training files: ['dog.5.jpg', 'cat.8112.jpg', 'cat.1197.jpg', 'dog.8491.jpg', 'dog.9129.jpg']


In [3]:
# Create dataset class
class DogsCatsDataset(Dataset):
    def __init__(self, file_paths, labels=None, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        if self.labels is not None:
            label = self.labels[idx]
            return image, label
        return image

# Data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = val_transform

In [4]:
# Prepare training data
print("Loading training data...")
train_files = [os.path.join(TRAIN_DIR, f) for f in os.listdir(TRAIN_DIR) if f.endswith('.jpg')]
train_labels = [1 if 'dog' in os.path.basename(f) else 0 for f in train_files]

print(f"Total training images: {len(train_files)}")
print(f"Dog images: {sum(train_labels)}")
print(f"Cat images: {len(train_labels) - sum(train_labels)}")

# Convert to numpy arrays
train_files = np.array(train_files)
train_labels = np.array(train_labels)

Loading training data...
Total training images: 22500
Dog images: 11258
Cat images: 11242


In [5]:
# Create model function
def create_model():
    # Load pretrained ResNet50
    model = models.resnet50(pretrained=True)
    
    # Freeze early layers
    for param in model.parameters():
        param.requires_grad = False
    
    # Replace final layer for binary classification
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(num_features, 1),
        nn.Sigmoid()
    )
    
    return model

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=5):
    best_val_loss = float('inf')
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device).float()
            
            optimizer.zero_grad()
            output = model(data).squeeze()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            if batch_idx % 100 == 0:
                print(f'Epoch {epoch+1}, Batch {batch_idx}, Loss: {loss.item():.4f}')
        
        # Validation phase
        model.eval()
        val_loss = 0
        val_preds = []
        val_targets = []
        
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device).float()
                output = model(data).squeeze()
                loss = criterion(output, target)
                val_loss += loss.item()
                
                val_preds.extend(output.cpu().numpy())
                val_targets.extend(target.cpu().numpy())
        
        # Calculate metrics
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_log_loss = log_loss(val_targets, val_preds)
        
        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'  Train Loss: {train_loss:.4f}')
        print(f'  Val Loss: {val_loss:.4f}')
        print(f'  Val Log Loss: {val_log_loss:.4f}')
        
        scheduler.step(val_loss)
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            
    return model, val_log_loss

In [6]:
# Cross-validation setup
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Store results
cv_scores = []
oof_predictions = np.zeros(len(train_files))

print(f"\nStarting {n_splits}-fold cross-validation...\n")

Using device: cuda

Starting 5-fold cross-validation...



In [7]:
# Run cross-validation
fold = 1
for train_idx, val_idx in skf.split(train_files, train_labels):
    print(f"\n{'='*50}")
    print(f"FOLD {fold}/{n_splits}")
    print(f"{'='*50}")
    
    # Split data
    X_train, X_val = train_files[train_idx], train_files[val_idx]
    y_train, y_val = train_labels[train_idx], train_labels[val_idx]
    
    print(f"Training samples: {len(X_train)}")
    print(f"Validation samples: {len(X_val)}")
    
    # Create datasets
    train_dataset = DogsCatsDataset(X_train, y_train, transform=train_transform)
    val_dataset = DogsCatsDataset(X_val, y_val, transform=val_transform)
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
    
    # Create model
    model = create_model()
    model = model.to(device)
    
    # Loss and optimizer
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)
    
    # Train model
    model, val_score = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=3)
    
    # Store predictions
    model.eval()
    val_preds = []
    with torch.no_grad():
        for data, _ in val_loader:
            data = data.to(device)
            output = model(data).squeeze()
            val_preds.extend(output.cpu().numpy())
    
    oof_predictions[val_idx] = val_preds
    cv_scores.append(val_score)
    
    print(f"Fold {fold} Log Loss: {val_score:.4f}")
    
    fold += 1

# Overall CV score
print(f"\n{'='*50}")
print(f"CROSS-VALIDATION RESULTS")
print(f"{'='*50}")
print(f"Mean Log Loss: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")
print(f"Individual folds: {cv_scores}")


FOLD 1/5
Training samples: 18000
Validation samples: 4500


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/nonroot/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

  0%|          | 416k/97.8M [00:00<00:24, 4.22MB/s]

  5%|▍         | 4.66M/97.8M [00:00<00:03, 27.8MB/s]

 19%|█▉        | 18.9M/97.8M [00:00<00:00, 83.0MB/s]

 34%|███▍      | 33.1M/97.8M [00:00<00:00, 109MB/s] 

 48%|████▊     | 47.4M/97.8M [00:00<00:00, 124MB/s]

 63%|██████▎   | 61.8M/97.8M [00:00<00:00, 133MB/s]

 78%|███████▊  | 76.0M/97.8M [00:00<00:00, 138MB/s]

 92%|█████████▏| 90.4M/97.8M [00:00<00:00, 142MB/s]

100%|██████████| 97.8M/97.8M [00:00<00:00, 120MB/s]




Epoch 1, Batch 0, Loss: 0.7642


Epoch 1, Batch 100, Loss: 0.1376


Epoch 1, Batch 200, Loss: 0.1105


Epoch 1, Batch 300, Loss: 0.1599


Epoch 1, Batch 400, Loss: 0.1324


Epoch 1, Batch 500, Loss: 0.0403


Epoch 1/3:
  Train Loss: 0.1430
  Val Loss: 0.0531
  Val Log Loss: 0.0529


Epoch 2, Batch 0, Loss: 0.0281


Epoch 2, Batch 100, Loss: 0.1531


Epoch 2, Batch 200, Loss: 0.0600


Epoch 2, Batch 300, Loss: 0.0464


Epoch 2, Batch 400, Loss: 0.0064


Epoch 2, Batch 500, Loss: 0.0117


Epoch 2/3:
  Train Loss: 0.1037
  Val Loss: 0.0456
  Val Log Loss: 0.0454


Epoch 3, Batch 0, Loss: 0.0843


Epoch 3, Batch 100, Loss: 0.0847


Epoch 3, Batch 200, Loss: 0.0386


Epoch 3, Batch 300, Loss: 0.0298


Epoch 3, Batch 400, Loss: 0.0422


Epoch 3, Batch 500, Loss: 0.1193


Epoch 3/3:
  Train Loss: 0.0964
  Val Loss: 0.0443
  Val Log Loss: 0.0441


Fold 1 Log Loss: 0.0441

FOLD 2/5
Training samples: 18000
Validation samples: 4500


Epoch 1, Batch 0, Loss: 0.7212


Epoch 1, Batch 100, Loss: 0.1182


Epoch 1, Batch 200, Loss: 0.1397


Epoch 1, Batch 300, Loss: 0.1821


Epoch 1, Batch 400, Loss: 0.0552


Epoch 1, Batch 500, Loss: 0.1105


Epoch 1/3:
  Train Loss: 0.1453
  Val Loss: 0.0656
  Val Log Loss: 0.0657


Epoch 2, Batch 0, Loss: 0.0214


Epoch 2, Batch 100, Loss: 0.1022


Epoch 2, Batch 200, Loss: 0.0615


Epoch 2, Batch 300, Loss: 0.1944


Epoch 2, Batch 400, Loss: 0.0145


Epoch 2, Batch 500, Loss: 0.1361


Epoch 2/3:
  Train Loss: 0.0983
  Val Loss: 0.0571
  Val Log Loss: 0.0572


Epoch 3, Batch 0, Loss: 0.0401


Epoch 3, Batch 100, Loss: 0.0807


Epoch 3, Batch 200, Loss: 0.2837


Epoch 3, Batch 300, Loss: 0.0375


Epoch 3, Batch 400, Loss: 0.0692


Epoch 3, Batch 500, Loss: 0.0155


Epoch 3/3:
  Train Loss: 0.0933
  Val Loss: 0.0536
  Val Log Loss: 0.0537


Fold 2 Log Loss: 0.0537

FOLD 3/5
Training samples: 18000
Validation samples: 4500


Epoch 1, Batch 0, Loss: 0.7340


Epoch 1, Batch 100, Loss: 0.1772


Epoch 1, Batch 200, Loss: 0.1426


Epoch 1, Batch 300, Loss: 0.0751


Epoch 1, Batch 400, Loss: 0.0766


Epoch 1, Batch 500, Loss: 0.0537


Epoch 1/3:
  Train Loss: 0.1476
  Val Loss: 0.0635
  Val Log Loss: 0.0635


Epoch 2, Batch 0, Loss: 0.1746


Epoch 2, Batch 100, Loss: 0.0608


Epoch 2, Batch 200, Loss: 0.1166


Epoch 2, Batch 300, Loss: 0.1499


Epoch 2, Batch 400, Loss: 0.1142


Epoch 2, Batch 500, Loss: 0.0903


Epoch 2/3:
  Train Loss: 0.0949
  Val Loss: 0.0579
  Val Log Loss: 0.0578


Epoch 3, Batch 0, Loss: 0.1425


Epoch 3, Batch 100, Loss: 0.0610


Epoch 3, Batch 200, Loss: 0.0640


Epoch 3, Batch 300, Loss: 0.0371


Epoch 3, Batch 400, Loss: 0.0290


Epoch 3, Batch 500, Loss: 0.0359


Epoch 3/3:
  Train Loss: 0.0960
  Val Loss: 0.0521
  Val Log Loss: 0.0522


Fold 3 Log Loss: 0.0522

FOLD 4/5
Training samples: 18000
Validation samples: 4500


Epoch 1, Batch 0, Loss: 0.6881


Epoch 1, Batch 100, Loss: 0.1797


Epoch 1, Batch 200, Loss: 0.1541


Epoch 1, Batch 300, Loss: 0.1218


Epoch 1, Batch 400, Loss: 0.1180


Epoch 1, Batch 500, Loss: 0.0535


Epoch 1/3:
  Train Loss: 0.1465
  Val Loss: 0.0586
  Val Log Loss: 0.0587


Epoch 2, Batch 0, Loss: 0.1665


Epoch 2, Batch 100, Loss: 0.0707


Epoch 2, Batch 200, Loss: 0.0948


Epoch 2, Batch 300, Loss: 0.0298


Epoch 2, Batch 400, Loss: 0.0775


Epoch 2, Batch 500, Loss: 0.1356


Epoch 2/3:
  Train Loss: 0.0989
  Val Loss: 0.0461
  Val Log Loss: 0.0462


Epoch 3, Batch 0, Loss: 0.0524


Epoch 3, Batch 100, Loss: 0.0318


Epoch 3, Batch 200, Loss: 0.1573


Epoch 3, Batch 300, Loss: 0.1213


Epoch 3, Batch 400, Loss: 0.0328


Epoch 3, Batch 500, Loss: 0.0168


Epoch 3/3:
  Train Loss: 0.0975
  Val Loss: 0.0427
  Val Log Loss: 0.0428


Fold 4 Log Loss: 0.0428

FOLD 5/5
Training samples: 18000
Validation samples: 4500


Epoch 1, Batch 0, Loss: 0.7279


Epoch 1, Batch 100, Loss: 0.1403


Epoch 1, Batch 200, Loss: 0.1215


Epoch 1, Batch 300, Loss: 0.1296


Epoch 1, Batch 400, Loss: 0.1034


Epoch 1, Batch 500, Loss: 0.1105


Epoch 1/3:
  Train Loss: 0.1441
  Val Loss: 0.0601
  Val Log Loss: 0.0602


Epoch 2, Batch 0, Loss: 0.0566


Epoch 2, Batch 100, Loss: 0.1090


Epoch 2, Batch 200, Loss: 0.0496


Epoch 2, Batch 300, Loss: 0.0815


Epoch 2, Batch 400, Loss: 0.2870


Epoch 2, Batch 500, Loss: 0.0548


Epoch 2/3:
  Train Loss: 0.1022
  Val Loss: 0.0558
  Val Log Loss: 0.0558


Epoch 3, Batch 0, Loss: 0.0860


Epoch 3, Batch 100, Loss: 0.0224


Epoch 3, Batch 200, Loss: 0.0343


Epoch 3, Batch 300, Loss: 0.0903


Epoch 3, Batch 400, Loss: 0.0387


Epoch 3, Batch 500, Loss: 0.3858


Epoch 3/3:
  Train Loss: 0.0949
  Val Loss: 0.0467
  Val Log Loss: 0.0467


Fold 5 Log Loss: 0.0467

CROSS-VALIDATION RESULTS
Mean Log Loss: 0.0479 ± 0.0043
Individual folds: [0.044094325701767995, 0.05371179078450418, 0.05216084367308546, 0.04278605528187914, 0.04670264739165255]


In [8]:
# Generate predictions on test set
print("\nGenerating predictions on test set...")

# Load test files
test_files = [os.path.join(TEST_DIR, f) for f in sorted(os.listdir(TEST_DIR)) if f.endswith('.jpg')]
test_ids = [int(os.path.splitext(os.path.basename(f))[0]) for f in test_files]

print(f"Total test images: {len(test_files)}")

# Create test dataset and loader
test_dataset = DogsCatsDataset(test_files, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# Average predictions from all folds
test_predictions = np.zeros(len(test_files))

fold = 1
for train_idx, val_idx in skf.split(train_files, train_labels):
    print(f"Generating predictions from fold {fold}...")
    
    # Recreate and load model for this fold
    model = create_model()
    model = model.to(device)
    
    # Create temporary training split to train model
    X_train, X_val = train_files[train_idx], train_files[val_idx]
    y_train, y_val = train_labels[train_idx], train_labels[val_idx]
    
    train_dataset = DogsCatsDataset(X_train, y_train, transform=train_transform)
    val_dataset = DogsCatsDataset(X_val, y_val, transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
    
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)
    
    # Train model
    model, _ = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, epochs=3)
    
    # Generate predictions
    model.eval()
    fold_preds = []
    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            output = model(data).squeeze()
            fold_preds.extend(output.cpu().numpy())
    
    test_predictions += np.array(fold_preds)
    fold += 1

# Average predictions across folds
test_predictions /= n_splits

print(f"Test predictions shape: {test_predictions.shape}")
print(f"Test predictions range: [{test_predictions.min():.4f}, {test_predictions.max():.4f}]")


Generating predictions on test set...
Total test images: 2500
Generating predictions from fold 1...


Epoch 1, Batch 0, Loss: 0.7391


Epoch 1, Batch 100, Loss: 0.1232


Epoch 1, Batch 200, Loss: 0.0847


Epoch 1, Batch 300, Loss: 0.1244


Epoch 1, Batch 400, Loss: 0.0239


Epoch 1, Batch 500, Loss: 0.0558


Epoch 1/3:
  Train Loss: 0.1518
  Val Loss: 0.0809
  Val Log Loss: 0.0805


Epoch 2, Batch 0, Loss: 0.1805


Epoch 2, Batch 100, Loss: 0.1076


Epoch 2, Batch 200, Loss: 0.0332


Epoch 2, Batch 300, Loss: 0.2242


Epoch 2, Batch 400, Loss: 0.0485


Epoch 2, Batch 500, Loss: 0.0585


Epoch 2/3:
  Train Loss: 0.0968
  Val Loss: 0.0490
  Val Log Loss: 0.0487


Epoch 3, Batch 0, Loss: 0.0237


Epoch 3, Batch 100, Loss: 0.1422


Epoch 3, Batch 200, Loss: 0.0409


Epoch 3, Batch 300, Loss: 0.4402


Epoch 3, Batch 400, Loss: 0.0130


Epoch 3, Batch 500, Loss: 0.3523


Epoch 3/3:
  Train Loss: 0.1001
  Val Loss: 0.0457
  Val Log Loss: 0.0454


Generating predictions from fold 2...


Epoch 1, Batch 0, Loss: 0.6838


Epoch 1, Batch 100, Loss: 0.1813


Epoch 1, Batch 200, Loss: 0.1086


Epoch 1, Batch 300, Loss: 0.1419


Epoch 1, Batch 400, Loss: 0.0945


Epoch 1, Batch 500, Loss: 0.2978


Epoch 1/3:
  Train Loss: 0.1432
  Val Loss: 0.0620
  Val Log Loss: 0.0621


Epoch 2, Batch 0, Loss: 0.0653


Epoch 2, Batch 100, Loss: 0.0256


Epoch 2, Batch 200, Loss: 0.0602


Epoch 2, Batch 300, Loss: 0.0398


Epoch 2, Batch 400, Loss: 0.1704


Epoch 2, Batch 500, Loss: 0.0533


Epoch 2/3:
  Train Loss: 0.0965
  Val Loss: 0.0567
  Val Log Loss: 0.0568


Epoch 3, Batch 0, Loss: 0.0637


Epoch 3, Batch 100, Loss: 0.0874


Epoch 3, Batch 200, Loss: 0.2322


Epoch 3, Batch 300, Loss: 0.0836


Epoch 3, Batch 400, Loss: 0.0143


Epoch 3, Batch 500, Loss: 0.0979


Epoch 3/3:
  Train Loss: 0.0929
  Val Loss: 0.0529
  Val Log Loss: 0.0530


Generating predictions from fold 3...


Epoch 1, Batch 0, Loss: 0.7182


Epoch 1, Batch 100, Loss: 0.1747


Epoch 1, Batch 200, Loss: 0.2432


Epoch 1, Batch 300, Loss: 0.0570


Epoch 1, Batch 400, Loss: 0.0588


Epoch 1, Batch 500, Loss: 0.0266


Epoch 1/3:
  Train Loss: 0.1423
  Val Loss: 0.0625
  Val Log Loss: 0.0625


Epoch 2, Batch 0, Loss: 0.1120


Epoch 2, Batch 100, Loss: 0.0253


Epoch 2, Batch 200, Loss: 0.0433


Epoch 2, Batch 300, Loss: 0.0641


Epoch 2, Batch 400, Loss: 0.0374


Epoch 2, Batch 500, Loss: 0.3836


Epoch 2/3:
  Train Loss: 0.1014
  Val Loss: 0.0540
  Val Log Loss: 0.0540


Epoch 3, Batch 0, Loss: 0.1011


Epoch 3, Batch 100, Loss: 0.0379


Epoch 3, Batch 200, Loss: 0.0314


Epoch 3, Batch 300, Loss: 0.0090


Epoch 3, Batch 400, Loss: 0.1413


Epoch 3, Batch 500, Loss: 0.0299


Epoch 3/3:
  Train Loss: 0.0957
  Val Loss: 0.0584
  Val Log Loss: 0.0584


Generating predictions from fold 4...


Epoch 1, Batch 0, Loss: 0.7096


Epoch 1, Batch 100, Loss: 0.2547


Epoch 1, Batch 200, Loss: 0.1261


Epoch 1, Batch 300, Loss: 0.1596


Epoch 1, Batch 400, Loss: 0.0425


Epoch 1, Batch 500, Loss: 0.0404


Epoch 1/3:
  Train Loss: 0.1461
  Val Loss: 0.0580
  Val Log Loss: 0.0581


Epoch 2, Batch 0, Loss: 0.0811


Epoch 2, Batch 100, Loss: 0.0221


Epoch 2, Batch 200, Loss: 0.1249


Epoch 2, Batch 300, Loss: 0.1078


Epoch 2, Batch 400, Loss: 0.1639


Epoch 2, Batch 500, Loss: 0.1246


Epoch 2/3:
  Train Loss: 0.0943
  Val Loss: 0.0472
  Val Log Loss: 0.0473


Epoch 3, Batch 0, Loss: 0.0692


Epoch 3, Batch 100, Loss: 0.0267


Epoch 3, Batch 200, Loss: 0.0295


Epoch 3, Batch 300, Loss: 0.0518


Epoch 3, Batch 400, Loss: 0.0159


Epoch 3, Batch 500, Loss: 0.0327


Epoch 3/3:
  Train Loss: 0.0938
  Val Loss: 0.0419
  Val Log Loss: 0.0420


Generating predictions from fold 5...


Epoch 1, Batch 0, Loss: 0.7129


Epoch 1, Batch 100, Loss: 0.2356


Epoch 1, Batch 200, Loss: 0.1510


Epoch 1, Batch 300, Loss: 0.1131


Epoch 1, Batch 400, Loss: 0.1752


Epoch 1, Batch 500, Loss: 0.0674


Epoch 1/3:
  Train Loss: 0.1442
  Val Loss: 0.0669
  Val Log Loss: 0.0670


Epoch 2, Batch 0, Loss: 0.0695


Epoch 2, Batch 100, Loss: 0.1554


Epoch 2, Batch 200, Loss: 0.0349


Epoch 2, Batch 300, Loss: 0.0805


Epoch 2, Batch 400, Loss: 0.2175


Epoch 2, Batch 500, Loss: 0.0586


Epoch 2/3:
  Train Loss: 0.0956
  Val Loss: 0.0560
  Val Log Loss: 0.0561


Epoch 3, Batch 0, Loss: 0.0208


Epoch 3, Batch 100, Loss: 0.2035


Epoch 3, Batch 200, Loss: 0.0335


Epoch 3, Batch 300, Loss: 0.3463


Epoch 3, Batch 400, Loss: 0.0802


Epoch 3, Batch 500, Loss: 0.1305


Epoch 3/3:
  Train Loss: 0.0957
  Val Loss: 0.0456
  Val Log Loss: 0.0456


Test predictions shape: (2500,)
Test predictions range: [0.0000, 1.0000]


In [9]:
# Create submission file
print("\nCreating submission file...")

submission = pd.DataFrame({
    'id': test_ids,
    'label': test_predictions
})

# Sort by id
submission = submission.sort_values('id').reset_index(drop=True)

print(f"Submission shape: {submission.shape}")
print(f"Sample predictions:")
print(submission.head(10))

# Save submission
SUBMISSION_DIR = '/home/submission'
os.makedirs(SUBMISSION_DIR, exist_ok=True)
submission_path = os.path.join(SUBMISSION_DIR, 'submission.csv')
submission.to_csv(submission_path, index=False)

print(f"Submission saved to: {submission_path}")

# Verify submission format
sample_sub = pd.read_csv('/home/data/sample_submission.csv')
print(f"\nSubmission format matches sample: {list(submission.columns) == list(sample_sub.columns)}")
print(f"ID ranges match: {submission['id'].min() == sample_sub['id'].min() and submission['id'].max() == sample_sub['id'].max()}")


Creating submission file...
Submission shape: (2500, 2)
Sample predictions:
   id     label
0   1  0.999861
1   2  0.000488
2   3  0.856156
3   4  0.579628
4   5  0.000111
5   6  0.999985
6   7  0.996927
7   8  0.001029
8   9  0.000088
9  10  0.975234
Submission saved to: /home/submission/submission.csv

Submission format matches sample: True
ID ranges match: True
