In [1]:
from google.colab import drive
drive.mount('/content/drive')

import cv2
import torch
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
from tqdm import tqdm
import os
import torch.nn as nn
import torchvision.models as models

Mounted at /content/drive


In [2]:
# ImageNet normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [3]:
class CSRNet(nn.Module):
    def __init__(self):
        super().__init__()
        vgg = models.vgg16_bn(weights=models.VGG16_BN_Weights.IMAGENET1K_V1)

        self.frontend = nn.Sequential(*list(vgg.features.children())[:33])

        self.backend = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 256, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(128, 1, 1)
        )

    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        return x

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CSRNet().to(device)
model.load_state_dict(
    torch.load(
        "/content/drive/MyDrive/deepvision/checkpoints/best_finetuned.pth",
        map_location=device
    )
)
model.eval()

print("‚úÖ CSRNet model loaded successfully")

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 528M/528M [00:02<00:00, 196MB/s]


‚úÖ CSRNet model loaded successfully


In [5]:
INPUT_VIDEO = "/content/drive/MyDrive/deepvision/video.mp4"
OUTPUT_VIDEO = "/content/drive/MyDrive/deepvision/output.mp4"

THRESHOLD = 12

os.makedirs("/content/drive/MyDrive/deepvision", exist_ok=True)

cap = cv2.VideoCapture(INPUT_VIDEO)

fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(OUTPUT_VIDEO, fourcc, fps, (width, height))

print("‚úÖ Video loaded")

‚úÖ Video loaded


In [6]:
from tqdm import tqdm

frame_count = 0

with tqdm(
    total=total_frames,
    desc="Processing Video Frames",
    unit="frame"
) as pbar:

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1

        # Convert frame to PIL
        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img_tensor = transform(img).unsqueeze(0).to(device)

        # Inference
        with torch.no_grad():
            density_map = model(img_tensor)

        density_map = torch.relu(density_map)
        count = int(round(density_map.sum().item()))
        count = max(0, count)

        # Alert logic
        if count > THRESHOLD:
            alert_text = "ALERT: Crowd Limit Exceeded"
            color = (0, 0, 255)
        else:
            alert_text = "Crowd Level Safe"
            color = (0, 255, 0)

        # Draw text
        cv2.putText(frame, f"Count: {count}", (20, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

        cv2.putText(frame, alert_text, (20, 80),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        out.write(frame)
        pbar.update(1)

cap.release()
out.release()

print("\n‚úÖ Processing complete")
print("üìÅ Output saved at:", OUTPUT_VIDEO)


Processing Video Frames: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 331/331 [00:15<00:00, 21.27frame/s]


‚úÖ Processing complete
üìÅ Output saved at: /content/drive/MyDrive/deepvision/output.mp4



