# Baseline CNN for Dogs vs Cats

Simple CNN baseline to establish a starting point.

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt

# Check GPU availability
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Set paths
train_dir = '/home/data/train'
test_dir = '/home/data/test'
sample_submission_path = '/home/data/sample_submission.csv'

# Get all training images
train_files = os.listdir(train_dir)
train_files = [f for f in train_files if f.endswith('.jpg')]

print(f"Total training images: {len(train_files)}")

# Create labels (1 for dog, 0 for cat)
train_labels = []
train_paths = []

for file in train_files:
    if file.startswith('dog'):
        train_labels.append(1)
        train_paths.append(os.path.join(train_dir, file))
    elif file.startswith('cat'):
        train_labels.append(0)
        train_paths.append(os.path.join(train_dir, file))

print(f"Dogs: {sum(train_labels)}")
print(f"Cats: {len(train_labels) - sum(train_labels)}")

In [None]:
# Split data into train and validation
X_train, X_val, y_train, y_val = train_test_split(
    train_paths, train_labels, 
    test_size=0.2, 
    random_state=42,
    stratify=train_labels
)

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Training dogs: {sum(y_train)}")
print(f"Validation dogs: {sum(y_val)}")

In [None]:
# Define dataset class
class DogsCatsDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        label = self.labels[idx]
        return image, label

# Define transforms
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Create datasets and dataloaders
train_dataset = DogsCatsDataset(X_train, y_train, transform=train_transform)
val_dataset = DogsCatsDataset(X_val, y_val, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Training batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")

In [None]:
# Define simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        
        # Calculate feature size after convolutions
        # Input: 128x128 -> pool -> 64x64 -> pool -> 32x32 -> pool -> 16x16
        self.fc1 = nn.Linear(128 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 1)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # 128x128 -> 64x64
        x = self.pool(torch.relu(self.conv2(x)))  # 64x64 -> 32x32
        x = self.pool(torch.relu(self.conv3(x)))  # 32x32 -> 16x16
        x = x.view(-1, 128 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.fc2(x))
        return x

model = SimpleCNN()
if torch.cuda.is_available():
    model = model.cuda()
    
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Model size (MB): {sum(p.numel() for p in model.parameters()) * 4 / 1024 / 1024:.2f}")

In [None]:
# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in tqdm(loader, desc="Training"):
        images = images.to(device)
        labels = labels.float().to(device)
        
        optimizer.zero_grad()
        outputs = model(images).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    return running_loss / len(loader), correct / total

# Validation function
def validate_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validation"):
            images = images.to(device)
            labels = labels.float().to(device)
            
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_probs.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), correct / total, all_probs, all_labels

# Calculate log loss
def log_loss(probs, labels):
    probs = np.array(probs)
    labels = np.array(labels)
    eps = 1e-15
    probs = np.clip(probs, eps, 1 - eps)
    return -np.mean(labels * np.log(probs) + (1 - labels) * np.log(1 - probs))

In [None]:
# Train the model
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

best_val_loss = float('inf')
best_val_acc = 0
best_log_loss = float('inf')

train_losses = []
val_losses = []
val_accs = []
val_log_losses = []

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, val_probs, val_labels = validate_epoch(model, val_loader, criterion, device)
    val_logloss = log_loss(val_probs, val_labels)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    val_log_losses.append(val_logloss)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Val LogLoss: {val_logloss:.4f}")
    
    # Save best model
    if val_logloss < best_log_loss:
        best_log_loss = val_logloss
        torch.save(model.state_dict(), '/home/code/models/baseline_cnn_best.pth')
        print(f"New best model saved with LogLoss: {val_logloss:.4f}")

In [None]:
# Plot training curves
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].plot(train_losses, label='Train Loss')
axes[0].plot(val_losses, label='Val Loss')
axes[0].set_title('Loss Curves')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()

axes[1].plot(val_accs, label='Val Accuracy')
axes[1].set_title('Validation Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()

axes[2].plot(val_log_losses, label='Val LogLoss')
axes[2].set_title('Validation LogLoss')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('LogLoss')
axes[2].legend()

plt.tight_layout()
plt.savefig('/home/code/figures/baseline_training_curves.png')
plt.show()

print(f"Best validation LogLoss: {best_log_loss:.4f}")

In [None]:
# Load best model and make predictions on test set
model.load_state_dict(torch.load('/home/code/models/baseline_cnn_best.pth'))
model.eval()

# Get test files
test_files = os.listdir(test_dir)
test_files = [f for f in test_files if f.endswith('.jpg')]
test_files.sort(key=lambda x: int(x.split('.')[0]))  # Sort by numeric ID

print(f"Total test images: {len(test_files)}")

# Create test dataset
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = DogsCatsDataset(
    [os.path.join(test_dir, f) for f in test_files], 
    [0] * len(test_files),  # dummy labels
    transform=test_transform
)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# Make predictions
all_test_probs = []
with torch.no_grad():
    for images, _ in tqdm(test_loader, desc="Testing"):
        images = images.to(device)
        outputs = model(images).squeeze()
        all_test_probs.extend(outputs.cpu().numpy())

print(f"Generated {len(all_test_probs)} predictions")
print(f"Sample predictions: {all_test_probs[:5]}")

In [None]:
# Create submission file
submission_df = pd.DataFrame({
    'id': [int(f.split('.')[0]) for f in test_files],
    'label': all_test_probs
})

submission_df = submission_df.sort_values('id')
submission_df.to_csv('/home/submission/submission.csv', index=False)

print("Submission file created:")
print(submission_df.head())
print(f"\nSubmission shape: {submission_df.shape}")
print(f"Prediction range: [{submission_df['label'].min():.4f}, {submission_df['label'].max():.4f}]")