In [1]:
!pip install ultralytics gradio opencv-python torchvision torch


Collecting ultralytics
  Downloading ultralytics-8.3.237-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.237-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.237 ultralytics-thop-2.0.18


In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import cv2
import gradio as gr
from ultralytics import YOLO
import tempfile
import os


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [6]:
class CSRNet(nn.Module):
    def __init__(self):
        super().__init__()
        vgg = models.vgg16_bn(weights=models.VGG16_BN_Weights.IMAGENET1K_V1)
        self.frontend = nn.Sequential(*list(vgg.features.children())[:33])
        self.backend = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(),
            nn.Conv2d(512, 256, 3, padding=2, dilation=2), nn.ReLU(),
            nn.Conv2d(256, 128, 3, padding=1), nn.ReLU(),
            nn.Conv2d(128, 1, 1)
        )

    def forward(self, x):
        return self.backend(self.frontend(x))


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ⚠️ Upload partB_best.pth OR give full Drive path
csrnet = CSRNet().to(device)
csrnet.load_state_dict(
    torch.load(
        "/content/drive/MyDrive/deepvision/checkpoints/partB_best.pth",
        map_location=device
    )
)
csrnet.eval()

print("✅ CSRNet trained model loaded")


✅ CSRNet trained model loaded


In [7]:
yolo = YOLO("yolov8n.pt")   # pretrained people detector
THRESHOLD = 10             # overcrowding limit


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 49.9MB/s 0.1s


In [8]:
def detect_people(video_file):
    video_path = video_file.name   # Gradio temp file path

    cap = cv2.VideoCapture(video_path)

    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Create temporary output file
    temp_dir = tempfile.mkdtemp()
    output_path = os.path.join(temp_dir, "output.mp4")

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = yolo(frame, classes=[0], conf=0.3)  # person class
        count = 0

        for r in results:
            for box in r.boxes:
                count += 1
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                conf = float(box.conf[0])

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{conf:.2f}", (x1, y1 - 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # ALERT LOGIC
        if count > THRESHOLD:
            alert_text = "ALERT: Overcrowding"
            color = (0, 0, 255)
        else:
            alert_text = "Crowd Level Safe"
            color = (0, 255, 0)

        # Draw count + alert
        cv2.putText(frame, f"People Count: {count}", (20, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

        cv2.putText(frame, alert_text, (20, 80),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

        out.write(frame)

    cap.release()
    out.release()

    return output_path


In [9]:
interface = gr.Interface(
    fn=detect_people,
    inputs=gr.File(label="Upload a video file"),
    outputs=gr.Video(label=" People Detection Output"),
    title="Overcrowding Detection for videos",
    description="Upload any video to detect people, show bounding boxes, count, and overcrowding alerts."
)

interface.launch(debug=True)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://812d82bc9cceb4746c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)



0: 384x640 10 persons, 326.1ms
Speed: 7.9ms preprocess, 326.1ms inference, 41.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 136.6ms
Speed: 2.6ms preprocess, 136.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 8 persons, 231.6ms
Speed: 2.5ms preprocess, 231.6ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 212.7ms
Speed: 2.3ms preprocess, 212.7ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 215.5ms
Speed: 4.9ms preprocess, 215.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 198.7ms
Speed: 2.4ms preprocess, 198.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 persons, 201.8ms
Speed: 3.4ms preprocess, 201.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 226.9ms
Speed: 2.8ms preprocess, 226.9ms inference, 1.4ms postprocess p



Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://812d82bc9cceb4746c.gradio.live


