# Training CNN for crowd level classification

The model takes in pictures of the bus stop, classified into their respective crowd levels.
These classified images then is fed into the model to train. The output model can then be used to classify the existing state of the bus stop.

Folder Structure:
```
/home/root/my_project_training/
│
├── train/
│   ├── 0_empty/
│   │   ├── empty_01.jpg
│   │   ├── empty_02.jpg
│   │   └── ...
│   ├── 1_low/
│   │   ├── 1_low_01.jpg
│   │   └── ...
│   └── 2_medium/
│       ├── 2_medium_01.jpg
│       └── ...
│
└── validation/
    ├── 0_empty/
    │   └── ... (move ~10-20% of your images here)
    ├── 1_low/
    │   └── ...
    └── 2_medium/
        └── ...
```

Install dependencies:

In [1]:
pip install torch torchvision numpy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Import libraries:

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np

Define Data Transforms and Load Data

Add black bars to make it a square to enable transfer learning, MobileNetV2 needs square input

In [3]:
# Custom transform to "letterbox" (pad) the image to a square
class SquarePad:
    def __call__(self, image):
        w, h = image.size
        max_wh = max(w, h)
        hp = int((max_wh - w) / 2)
        vp = int((max_wh - h) / 2)
        padding = (hp, vp, hp, vp)
        return transforms.functional.pad(image, padding, 0, 'constant')

# Your new data transform pipeline
data_transform = transforms.Compose([
    SquarePad(),                       # 1. Pad it to be a square (e.g., 640x640)
    transforms.Resize((224, 224)),     # 2. Shrink that square to 224x224
    transforms.ToTensor(),             # 3. Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Load image data for training and testing:

In [4]:
# --- 2. Load Your Custom Data from Folders ---
# This is the PyTorch version of "image_dataset_from_directory"
# data_dir = '/home/root/my_project_training/'
data_dir = './test_train_esp32cam/'

train_dataset = datasets.ImageFolder(
    root=data_dir + 'train',
    transform=data_transform
)

validation_dataset = datasets.ImageFolder(
    root=data_dir + 'validation',
    transform=data_transform
)

Create Data Loaders & Get Classes

In [5]:
# --- 3. Create DataLoaders ---
BATCH_SIZE = 16 # Keep it small for your laptop/server

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True # IMPORTANT: Shuffle training data so it doesn't learn order
)

test_loader = DataLoader(
    validation_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# --- 4. Get Class Names ---
# Automatically finds your folders: e.g., ['0_empty', '1_low', '2_medium']
class_names = train_dataset.classes
NUM_CLASSES = len(class_names)
print(f"Found {NUM_CLASSES} classes: {class_names}")

Found 4 classes: ['0_empty', '1_low', '2_medium', '3_high']


Define the Model (Transfer Learning)


In [6]:
# --- 5. Load Pre-trained MobileNetV2 ---
# weights='DEFAULT' loads the best available pre-trained weights
model = torchvision.models.mobilenet_v2(weights='DEFAULT')

# --- 6. Freeze the "Feature Extractor" ---
# This prevents wrecking the pre-trained "expert" knowledge
for param in model.parameters():
    param.requires_grad = False

# --- 7. Replace the "Classifier Head" ---
# MobileNetV2's last layer is called 'classifier'. We replace it with a new one
# that fits YOUR number of classes (e.g., 3 instead of 1000).
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2),            # Dropout to prevent overfitting on your small dataset
    nn.Linear(model.last_channel, NUM_CLASSES) # Your new 3-class output layer
)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model.to(device)

Using device: cpu


MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

Training Setup & Loop

In [7]:
# --- 8. Define Hyperparameters ---
lr = 0.001
num_epochs = 20 # 20 epochs is usually enough for transfer learning

criterion = nn.CrossEntropyLoss()
# Only optimize parameters that require gradients (your new head)
optimizer = optim.Adam(model.classifier.parameters(), lr=lr)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

# --- 9. Training Loop ---
print("Starting training...")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_correct += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_loader)
    epoch_val_loss = val_loss / len(test_loader)
    val_acc = val_correct.double() / len(validation_dataset)
    
    print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {epoch_loss:.4f} - Val Loss: {epoch_val_loss:.4f} - Val Acc: {val_acc:.4f}')
    
    # Step the scheduler
    scheduler.step(epoch_val_loss)

print("Training complete!")

Starting training...
Epoch 1/20 - Train Loss: 1.0575 - Val Loss: 0.7572 - Val Acc: 0.7447
Epoch 2/20 - Train Loss: 0.6954 - Val Loss: 0.4798 - Val Acc: 0.9149
Epoch 3/20 - Train Loss: 0.5706 - Val Loss: 0.3579 - Val Acc: 0.9149
Epoch 4/20 - Train Loss: 0.4553 - Val Loss: 0.2848 - Val Acc: 0.9574
Epoch 5/20 - Train Loss: 0.3838 - Val Loss: 0.2381 - Val Acc: 0.9787
Epoch 6/20 - Train Loss: 0.3239 - Val Loss: 0.2073 - Val Acc: 0.9574
Epoch 7/20 - Train Loss: 0.2929 - Val Loss: 0.1770 - Val Acc: 0.9787
Epoch 8/20 - Train Loss: 0.2877 - Val Loss: 0.1557 - Val Acc: 1.0000
Epoch 9/20 - Train Loss: 0.2358 - Val Loss: 0.1281 - Val Acc: 1.0000
Epoch 10/20 - Train Loss: 0.2360 - Val Loss: 0.1230 - Val Acc: 1.0000
Epoch 11/20 - Train Loss: 0.2100 - Val Loss: 0.1003 - Val Acc: 1.0000
Epoch 12/20 - Train Loss: 0.1893 - Val Loss: 0.0980 - Val Acc: 1.0000
Epoch 13/20 - Train Loss: 0.1786 - Val Loss: 0.0847 - Val Acc: 1.0000
Epoch 14/20 - Train Loss: 0.1787 - Val Loss: 0.0822 - Val Acc: 1.0000
Epoch 15

Save the model for future use

In [8]:
# --- 10. Save the Model ---
# It's best practice to save the 'state_dict' (just the weights)
torch.save(model.state_dict(), 'smartstop_mobilenet_v2_esp32cam.pth')
print("Model saved as 'smartstop_mobilenet_v2_esp32cam.pth'")

# Save the class names too, you'll need them for the Flask app
import json
with open('class_names.json', 'w') as f:
    json.dump(class_names, f)
print("Class names saved to 'class_names.json'")

Model saved as 'smartstop_mobilenet_v2_esp32cam.pth'
Class names saved to 'class_names.json'
