In [1]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF
import gradio as gr

# Device (CPU-only for your setup)
device = torch.device("cpu")

# Input resolution used during training
TARGET_H = 768
TARGET_W = 1024

# ImageNet normalization used in training
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

print("Device:", device)
print("Input size:", TARGET_H, "x", TARGET_W)


  from .autonotebook import tqdm as notebook_tqdm


Device: cpu
Input size: 768 x 1024


In [2]:
import torchvision.models as models

class CSRNet(nn.Module):
    def __init__(self, load_pretrained_vgg: bool = False):
        super().__init__()

        vgg = models.vgg16(weights=None)

        # Front-end (VGG-16 up to Conv4_3)
        self.frontend = nn.Sequential(*list(vgg.features.children())[:23])

        # Back-end (Dilated CNN layers)
        self.backend = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 256, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
        )

        # Output: 1-channel density map
        self.output_layer = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        return x

print("CSRNet model class ready.")

CSRNet model class ready.


In [3]:
# Path to your trained model weights (from training notebook)
WEIGHTS_PATH = "csrnet_weights.pth"

model = CSRNet().to(device)

print("Loading weights from:", WEIGHTS_PATH)
state = torch.load(WEIGHTS_PATH, map_location=device)

missing, unexpected = model.load_state_dict(state, strict=False)
print("Missing keys:", missing)
print("Unexpected keys:", unexpected)

model.eval()
print("Model loaded and ready for inference.")

Loading weights from: csrnet_weights.pth
Missing keys: []
Unexpected keys: []
Model loaded and ready for inference.


In [4]:
def preprocess_frame(frame_bgr):
    """
    Convert BGR webcam frame into model-ready tensor.
    EXACT same processing as training notebook.
    """

    # BGR → RGB → float32
    rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

    # Resize to fixed training shape
    resized = cv2.resize(rgb, (TARGET_W, TARGET_H))

    # Convert to tensor CHW
    tensor = torch.from_numpy(resized.transpose(2, 0, 1)).float()

    # Apply ImageNet normalization
    tensor = TF.normalize(tensor, mean=IMAGENET_MEAN, std=IMAGENET_STD)

    return tensor.unsqueeze(0).to(device)


In [5]:
def predict_count(frame_bgr):
    """
    Perform CSRNet inference:
    - Input frame (BGR)
    - Output: (count, full-resolution density map)
    """

    inp = preprocess_frame(frame_bgr)

    with torch.no_grad():
        dm_small = model(inp)[0,0].cpu().numpy()

    # Upsample density map to original frame size
    dm_full = cv2.resize(dm_small, (TARGET_W, TARGET_H), interpolation=cv2.INTER_CUBIC)

    # Count = sum of density
    count = float(dm_full.sum())

    return count, dm_full


def generate_heatmap(frame_bgr, density_map):
    """
    Overlay heatmap on the frame.
    """

    dm = density_map.copy()
    if dm.max() > 0:
        dm = dm / dm.max()

    heatmap = (dm * 255).astype(np.uint8)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)

    blended = cv2.addWeighted(frame_bgr, 0.6, heatmap, 0.6, 0)

    return blended


In [6]:
def process_frame(frame_rgb):
    """
    Gradio provides RGB frame.
    Convert to BGR for OpenCV processing.
    """

    frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

    # Resize for display & processing
    frame_bgr = cv2.resize(frame_bgr, (TARGET_W, TARGET_H))

    # Predict
    count, dm = predict_count(frame_bgr)

    # Heatmap overlay
    heatmap_bgr = generate_heatmap(frame_bgr, dm)

    # Convert outputs back to RGB
    live_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    heatmap_rgb = cv2.cvtColor(heatmap_bgr, cv2.COLOR_BGR2RGB)

    # Alert message
    if count > 50:
        alert = f"⚠ ALERT: Crowd exceeds limit! Count = {count:.1f}"
    else:
        alert = f"Count = {count:.1f}"

    return live_rgb, heatmap_rgb, alert


In [None]:
gr.Interface(
    fn=process_frame,
    inputs=gr.Image(sources=["webcam"], streaming=True),   # <-- FIXED
    outputs=[
        gr.Image(label="Live Webcam Feed"),
        gr.Image(label="Density Heatmap"),
        gr.Textbox(label="Crowd Status")
    ],
    title="Real-Time Crowd Monitoring (CSRNet)",
    description="Webcam-based crowd estimation using CSRNet.",
    live=True
).launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Created dataset file at: .gradio\flagged\dataset1.csv
Error while flagging: field larger than field limit (131072)
