# Parking Lot Occupancy Estimation - Training

This notebook trains deep learning models for parking lot occupancy estimation.

**Author:** Aminu Yiwere  
**Date:** November 4, 2025  
**Environment:** Google Colab

---


## 1. Setup and Installation

Install required packages and mount Google Drive (if using Colab)


In [None]:
# Check if running on Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running on Google Colab")
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
else:
    print("Running locally")

In [None]:
# Install required packages
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install albumentations
!pip install timm  # PyTorch Image Models
!pip install tqdm
!pip install matplotlib seaborn
!pip install scikit-learn
!pip install tensorboard

## 2. Import Libraries


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2
from tqdm.auto import tqdm
import random
import warnings
warnings.filterwarnings('ignore')

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision
from torchvision import transforms, models

# Additional imports
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import timm

# Set random seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")

## 3. Configuration and Hyperparameters


In [None]:
# Configuration
class Config:
    # Paths
    if IN_COLAB:
        DATA_DIR = '/content/drive/MyDrive/parking_lot_data'  # Adjust this path
        OUTPUT_DIR = '/content/drive/MyDrive/parking_lot_output'
    else:
        DATA_DIR = './data/processed'
        OUTPUT_DIR = './output'
    
    TRAIN_DIR = os.path.join(DATA_DIR, 'train')
    VAL_DIR = os.path.join(DATA_DIR, 'validation')
    CHECKPOINT_DIR = os.path.join(OUTPUT_DIR, 'checkpoints')
    LOG_DIR = os.path.join(OUTPUT_DIR, 'logs')
    
    # Model parameters
    MODEL_NAME = 'resnet50'  # Options: resnet50, resnet101, efficientnet_b0, vgg16
    NUM_CLASSES = 2  # occupied, vacant
    PRETRAINED = True
    
    # Training parameters
    BATCH_SIZE = 32
    NUM_EPOCHS = 50
    LEARNING_RATE = 0.001
    WEIGHT_DECAY = 1e-4
    
    # Image parameters
    IMG_SIZE = 224
    NUM_WORKERS = 2 if IN_COLAB else 4
    
    # Training settings
    EARLY_STOPPING_PATIENCE = 10
    LR_SCHEDULER_PATIENCE = 5
    LR_SCHEDULER_FACTOR = 0.1
    
    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

config = Config()

# Create output directories
os.makedirs(config.CHECKPOINT_DIR, exist_ok=True)
os.makedirs(config.LOG_DIR, exist_ok=True)

print("Configuration:")
print(f"  Device: {config.DEVICE}")
print(f"  Model: {config.MODEL_NAME}")
print(f"  Batch Size: {config.BATCH_SIZE}")
print(f"  Learning Rate: {config.LEARNING_RATE}")
print(f"  Epochs: {config.NUM_EPOCHS}")

## 4. Dataset Class and Data Augmentation


In [None]:
# Custom Dataset Class
class ParkingLotDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        """
        Args:
            data_dir: Directory with 'occupied' and 'vacant' subdirectories
            transform: Albumentations transform pipeline
        """
        self.data_dir = data_dir
        self.transform = transform
        self.classes = ['occupied', 'vacant']
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        
        # Load image paths and labels
        self.samples = []
        for class_name in self.classes:
            class_dir = os.path.join(data_dir, class_name)
            if not os.path.exists(class_dir):
                continue
            
            for img_name in os.listdir(class_dir):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(class_dir, img_name)
                    label = self.class_to_idx[class_name]
                    self.samples.append((img_path, label))
        
        print(f"Found {len(self.samples)} images in {data_dir}")
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        
        # Load image
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Apply transforms
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        
        return image, label
    
    def get_class_distribution(self):
        labels = [label for _, label in self.samples]
        return np.bincount(labels)

In [None]:
# Data Augmentation Pipeline
def get_train_transforms(img_size=224):
    return A.Compose([
        A.Resize(img_size, img_size),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
        A.GaussNoise(p=0.2),
        A.OneOf([
            A.MotionBlur(p=0.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=0.2),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),
        ToTensorV2()
    ])

def get_val_transforms(img_size=224):
    return A.Compose([
        A.Resize(img_size, img_size),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),
        ToTensorV2()
    ])

## 5. Load and Explore Dataset


In [None]:
# Create datasets
train_dataset = ParkingLotDataset(
    data_dir=config.TRAIN_DIR,
    transform=get_train_transforms(config.IMG_SIZE)
)

val_dataset = ParkingLotDataset(
    data_dir=config.VAL_DIR,
    transform=get_val_transforms(config.IMG_SIZE)
)

# Create dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=True,
    num_workers=config.NUM_WORKERS,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=False,
    num_workers=config.NUM_WORKERS,
    pin_memory=True
)

print(f"\nDataset Statistics:")
print(f"  Training samples: {len(train_dataset)}")
print(f"  Validation samples: {len(val_dataset)}")
print(f"  Training batches: {len(train_loader)}")
print(f"  Validation batches: {len(val_loader)}")

# Class distribution
train_dist = train_dataset.get_class_distribution()
val_dist = val_dataset.get_class_distribution()

print(f"\nClass Distribution:")
print(f"  Training - Occupied: {train_dist[0]}, Vacant: {train_dist[1]}")
print(f"  Validation - Occupied: {val_dist[0]}, Vacant: {val_dist[1]}")

In [None]:
# Visualize sample images
def visualize_samples(dataset, num_samples=8):
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.ravel()
    
    for i in range(num_samples):
        idx = random.randint(0, len(dataset) - 1)
        img, label = dataset[idx]
        
        # Denormalize image for visualization
        img = img.permute(1, 2, 0).numpy()
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        img = std * img + mean
        img = np.clip(img, 0, 1)
        
        axes[i].imshow(img)
        axes[i].set_title(f"Label: {dataset.classes[label]}")
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

print("Sample Training Images:")
visualize_samples(train_dataset)

## 6. Model Architecture


In [None]:
def create_model(model_name, num_classes=2, pretrained=True):
    """
    Create a model for parking lot occupancy classification
    
    Args:
        model_name: Name of the model architecture
        num_classes: Number of output classes
        pretrained: Whether to use pretrained weights
    
    Returns:
        model: PyTorch model
    """
    if model_name == 'resnet50':
        model = models.resnet50(pretrained=pretrained)
        num_features = model.fc.in_features
        model.fc = nn.Linear(num_features, num_classes)
    
    elif model_name == 'resnet101':
        model = models.resnet101(pretrained=pretrained)
        num_features = model.fc.in_features
        model.fc = nn.Linear(num_features, num_classes)
    
    elif model_name == 'vgg16':
        model = models.vgg16(pretrained=pretrained)
        num_features = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_features, num_classes)
    
    elif model_name == 'efficientnet_b0':
        model = timm.create_model('efficientnet_b0', pretrained=pretrained, num_classes=num_classes)
    
    elif model_name == 'efficientnet_b3':
        model = timm.create_model('efficientnet_b3', pretrained=pretrained, num_classes=num_classes)
    
    else:
        raise ValueError(f"Model {model_name} not supported")
    
    return model

# Create model
model = create_model(config.MODEL_NAME, config.NUM_CLASSES, config.PRETRAINED)
model = model.to(config.DEVICE)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"\nModel: {config.MODEL_NAME}")
print(f"  Total parameters: {total_params:,}")
print(f"  Trainable parameters: {trainable_params:,}")

## 7. Loss Function and Optimizer


In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(
    model.parameters(),
    lr=config.LEARNING_RATE,
    weight_decay=config.WEIGHT_DECAY
)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=config.LR_SCHEDULER_FACTOR,
    patience=config.LR_SCHEDULER_PATIENCE,
    verbose=True
)

# TensorBoard writer
writer = SummaryWriter(log_dir=config.LOG_DIR)

print("Training setup complete!")

## 8. Training and Validation Functions


In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    """
    Train for one epoch
    """
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(dataloader, desc='Training')
    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Update progress bar
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100 * correct / total:.2f}%'
        })
    
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = correct / total
    
    return epoch_loss, epoch_acc


def validate_epoch(model, dataloader, criterion, device):
    """
    Validate for one epoch
    """
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Validation')
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Statistics
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Update progress bar
            pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{100 * correct / total:.2f}%'
            })
    
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = correct / total
    
    # Calculate additional metrics
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average='binary'
    )
    
    return epoch_loss, epoch_acc, precision, recall, f1

## 9. Training Loop


In [None]:
# Training history
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': [],
    'val_precision': [],
    'val_recall': [],
    'val_f1': []
}

# Early stopping
best_val_loss = float('inf')
patience_counter = 0

print("\n" + "="*50)
print("Starting Training")
print("="*50 + "\n")

for epoch in range(config.NUM_EPOCHS):
    print(f"\nEpoch {epoch + 1}/{config.NUM_EPOCHS}")
    print("-" * 50)
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, config.DEVICE)
    
    # Validate
    val_loss, val_acc, val_precision, val_recall, val_f1 = validate_epoch(
        model, val_loader, criterion, config.DEVICE
    )
    
    # Update learning rate
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    history['val_precision'].append(val_precision)
    history['val_recall'].append(val_recall)
    history['val_f1'].append(val_f1)
    
    # Log to TensorBoard
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('Loss/val', val_loss, epoch)
    writer.add_scalar('Accuracy/train', train_acc, epoch)
    writer.add_scalar('Accuracy/val', val_acc, epoch)
    writer.add_scalar('Metrics/precision', val_precision, epoch)
    writer.add_scalar('Metrics/recall', val_recall, epoch)
    writer.add_scalar('Metrics/f1', val_f1, epoch)
    
    # Print epoch summary
    print(f"\nEpoch {epoch + 1} Summary:")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"  Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
    print(f"  Val Precision: {val_precision:.4f} | Val Recall: {val_recall:.4f} | Val F1: {val_f1:.4f}")
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        
        checkpoint_path = os.path.join(config.CHECKPOINT_DIR, f'best_model_{config.MODEL_NAME}.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
            'val_acc': val_acc,
            'history': history
        }, checkpoint_path)
        print(f"  ✓ Model saved to {checkpoint_path}")
    else:
        patience_counter += 1
    
    # Early stopping
    if patience_counter >= config.EARLY_STOPPING_PATIENCE:
        print(f"\n⚠ Early stopping triggered after {epoch + 1} epochs")
        break

writer.close()
print("\n" + "="*50)
print("Training Complete!")
print("="*50)

## 10. Plot Training History


In [None]:
def plot_training_history(history):
    """
    Plot training and validation metrics
    """
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Loss
    axes[0, 0].plot(history['train_loss'], label='Train Loss')
    axes[0, 0].plot(history['val_loss'], label='Val Loss')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Training and Validation Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Accuracy
    axes[0, 1].plot(history['train_acc'], label='Train Acc')
    axes[0, 1].plot(history['val_acc'], label='Val Acc')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].set_title('Training and Validation Accuracy')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Precision and Recall
    axes[1, 0].plot(history['val_precision'], label='Precision')
    axes[1, 0].plot(history['val_recall'], label='Recall')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Score')
    axes[1, 0].set_title('Validation Precision and Recall')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # F1 Score
    axes[1, 1].plot(history['val_f1'], label='F1 Score', color='green')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('F1 Score')
    axes[1, 1].set_title('Validation F1 Score')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.savefig(os.path.join(config.OUTPUT_DIR, 'training_history.png'), dpi=300, bbox_inches='tight')
    plt.show()

plot_training_history(history)

## 11. Save Final Results


In [None]:
# Save training history to CSV
history_df = pd.DataFrame(history)
history_df.to_csv(os.path.join(config.OUTPUT_DIR, 'training_history.csv'), index=False)
print(f"Training history saved to {config.OUTPUT_DIR}/training_history.csv")

# Print final results
print("\n" + "="*50)
print("Final Results")
print("="*50)
print(f"Best Validation Loss: {min(history['val_loss']):.4f}")
print(f"Best Validation Accuracy: {max(history['val_acc']):.4f}")
print(f"Best Validation F1 Score: {max(history['val_f1']):.4f}")
print("="*50)