In [8]:
%pip install pathlib
%pip install numpy
%pip install pandas
%pip install pillow
%pip install torch
%pip install torchvision

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting torchvision
  Using cached torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Using cached torchvision-0.24.1-cp312-cp312-manylinux_2_28_x86_64.whl (8.0 MB)
Installing collected packages: torchvision
Successfully installed torchvision-0.24.1
Note: you may need to restart the kernel to use updated packages.


In [10]:
import os
import json
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms


In [11]:
# === PATHS â€“ ADJUST THESE IF NEEDED ===
# Root folder that contains subfolders like:
# picture/2nd_Ave_49_st, picture/Queens_Midtown_Tunnel, ...
BASE_PICTURE_DIR = Path("picture")

# Folder that contains JSON label files like:
# 2nd_Ave_49_st_labels.json, Queens_Midtown_Tunnel_labels.json, ...
# If yours are in "car_counter/labels", change this to Path("car_counter/labels")
LABELS_DIR = Path("labels")

# Where to save the trained model
MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

MODEL_PATH = MODEL_DIR / "car_count_cnn.pth"

# Training config
IMAGE_SIZE = 224      # images will be resized to 224x224
BATCH_SIZE = 32
NUM_EPOCHS = 15
LEARNING_RATE = 1e-4
VAL_SPLIT = 0.2       # 80% train, 20% validation

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

In [12]:
all_rows = []

if not LABELS_DIR.exists():
    raise FileNotFoundError(f"Labels directory not found: {LABELS_DIR.resolve()}")

for json_path in LABELS_DIR.glob("*_labels.json"):
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    if not isinstance(data, list):
        continue

    for entry in data:
        # Skip bad / issue images
        if entry.get("error") == "YES":
            continue
        if entry.get("car_amount") is None:
            continue

        desc = entry.get("description")
        filename = entry.get("filename")

        # Try picture/<description>/<filename>, fall back to picture/<filename>
        path1 = BASE_PICTURE_DIR / desc / filename
        path2 = BASE_PICTURE_DIR / filename

        if path1.exists():
            img_path = path1
        elif path2.exists():
            img_path = path2
        else:
            # image not found â†’ skip
            continue

        all_rows.append({
            "filepath": str(img_path),
            "description": desc,
            "filename": filename,
            "car_amount": int(entry["car_amount"]),
            "date": entry.get("date"),
            "time": entry.get("time"),
            "description_total": entry.get("description_total", None)
        })

df = pd.DataFrame(all_rows)
print("Total labeled images:", len(df))
df.head()


Total labeled images: 15


Unnamed: 0,filepath,description,filename,car_amount,date,time,description_total
0,picture/2nd_Ave_49_st/2nd_Ave_49_st_1.png,2nd_Ave_49_st,2nd_Ave_49_st_1.png,9,2025-12-08,12-57-30,1
1,picture/2nd_Ave_49_st/2nd_Ave_49_st_2.png,2nd_Ave_49_st,2nd_Ave_49_st_2.png,6,2025-12-08,12-58-30,2
2,picture/E_63_St/E_63_St_1.png,E_63_St,E_63_St_1.png,7,2025-12-08,12-57-30,1
3,picture/Queens_Midtown_Tunnel/Queens_Midtown_T...,Queens_Midtown_Tunnel,Queens_Midtown_Tunnel_1.png,7,2025-12-08,12-57-30,1
4,picture/Queens_Plaza_North/Queens_Plaza_North_...,Queens_Plaza_North,Queens_Plaza_North_1.png,5,2025-12-08,12-57-30,1


In [13]:
# Image transforms
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


class CarCountDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row["filepath"]).convert("RGB")
        if self.transform:
            img = self.transform(img)
        # Regression target (float)
        target = torch.tensor(row["car_amount"], dtype=torch.float32)
        return img, target


In [14]:
full_dataset = CarCountDataset(df, transform=train_transform)

val_size = int(len(full_dataset) * VAL_SPLIT)
train_size = len(full_dataset) - val_size

train_dataset, val_dataset = random_split(
    full_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

# Important: validation should use val_transform instead of train_transform
# Wrap val_dataset to change transform
val_dataset = CarCountDataset(df.iloc[val_dataset.indices], transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

len(train_dataset), len(val_dataset)


(12, 3)

In [15]:
class SimpleCarCounterCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),   # 112x112

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),   # 56x56

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),   # 28x28

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),   # 14x14
        )

        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, 1)  # output: predicted car count
        )

    def forward(self, x):
        x = self.features(x)
        x = self.regressor(x)
        return x.squeeze(1)  # shape: (batch,)

model = SimpleCarCounterCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

model


SimpleCarCounterCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, moment

In [16]:
def evaluate(model, loader):
    model.eval()
    total_loss = 0.0
    total_mae = 0.0
    n = 0
    with torch.no_grad():
        for images, targets in loader:
            images = images.to(device)
            targets = targets.to(device)

            outputs = model(images)
            loss = criterion(outputs, targets)

            total_loss += loss.item() * images.size(0)
            total_mae += torch.abs(outputs - targets).sum().item()
            n += images.size(0)

    return total_loss / n, total_mae / n  # MSE, MAE


best_val_mae = float("inf")

for epoch in range(1, NUM_EPOCHS + 1):
    model.train()
    running_loss = 0.0

    for images, targets in train_loader:
        images = images.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    train_mse = running_loss / len(train_dataset)
    val_mse, val_mae = evaluate(model, val_loader)

    print(f"Epoch {epoch:02d}/{NUM_EPOCHS} "
          f"| Train MSE: {train_mse:.3f} "
          f"| Val MSE: {val_mse:.3f} "
          f"| Val MAE: {val_mae:.3f}")

    # Save best model (based on MAE)
    if val_mae < best_val_mae:
        best_val_mae = val_mae
        torch.save(model.state_dict(), MODEL_PATH)
        print(f"  ðŸ”¥ New best model saved to {MODEL_PATH}")


Epoch 01/15 | Train MSE: 15.827 | Val MSE: 31.296 | Val MAE: 4.888
  ðŸ”¥ New best model saved to models/car_count_cnn.pth
Epoch 02/15 | Train MSE: 279.729 | Val MSE: 36.964 | Val MAE: 5.382
Epoch 03/15 | Train MSE: 8.658 | Val MSE: 41.266 | Val MAE: 5.747
Epoch 04/15 | Train MSE: 27.159 | Val MSE: 41.899 | Val MAE: 5.800
Epoch 05/15 | Train MSE: 30.928 | Val MSE: 41.946 | Val MAE: 5.805
Epoch 06/15 | Train MSE: 26.094 | Val MSE: 41.499 | Val MAE: 5.767
Epoch 07/15 | Train MSE: 20.816 | Val MSE: 40.773 | Val MAE: 5.705
Epoch 08/15 | Train MSE: 17.688 | Val MSE: 40.096 | Val MAE: 5.646
Epoch 09/15 | Train MSE: 13.792 | Val MSE: 39.280 | Val MAE: 5.574
Epoch 10/15 | Train MSE: 11.518 | Val MSE: 38.263 | Val MAE: 5.483
Epoch 11/15 | Train MSE: 9.267 | Val MSE: 37.049 | Val MAE: 5.372
Epoch 12/15 | Train MSE: 7.587 | Val MSE: 35.534 | Val MAE: 5.230
Epoch 13/15 | Train MSE: 4.380 | Val MSE: 33.731 | Val MAE: 5.057
Epoch 14/15 | Train MSE: 3.823 | Val MSE: 31.777 | Val MAE: 4.864
  ðŸ”¥ New

In [17]:
best_model = SimpleCarCounterCNN().to(device)
best_model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
best_model.eval()


SimpleCarCounterCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, moment

In [18]:
single_image_transform = val_transform  # same as validation

def predict_cars(image_path: str, model=best_model):
    img = Image.open(image_path).convert("RGB")
    x = single_image_transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        pred = model(x).item()

    # We treat the output as a real number and round to nearest integer
    return max(0, int(round(pred)))


# Example usage: pick any image
example_path = df.iloc[0]["filepath"]
print("Example path:", example_path)

pred_count = predict_cars(example_path)
print("Predicted cars:", pred_count)
print("True cars:", df.iloc[0]["car_amount"])


Example path: picture/2nd_Ave_49_st/2nd_Ave_49_st_1.png
Predicted cars: 1
True cars: 9


In [19]:
import random

for i in range(5):
    row = df.sample(1).iloc[0]
    p = row["filepath"]
    true_cars = row["car_amount"]
    pred_cars = predict_cars(p)
    print(f"{Path(p).name:40s}  true={true_cars:2d},  pred={pred_cars:2d}")


S_Conduit_Ave_150_5.png                   true= 4,  pred= 1
S_Conduit_Ave_150_2.png                   true= 2,  pred= 1
S_Conduit_Ave_150_3.png                   true= 1,  pred= 1
S_Conduit_Ave_150_9.png                   true= 3,  pred= 1
Queens_Plaza_North_2.png                  true= 0,  pred= 1
