In [8]:
!pip install opencv-python numpy ultralytics Flask



In [9]:
!pip install "pymongo[srv]"



In [13]:
from flask import Flask, Response
import cv2
import numpy as np
from ultralytics import YOLO
from pymongo import MongoClient
from datetime import datetime, date
from dotenv import load_dotenv
import os

# Load YOLO model
model = YOLO('yolov8n.pt')  # or use a different YOLO version

# RTSP stream URL
# Retrive the RTSP stream URL from iSpy or Wireshark
# Replace the rtsp_url with your own RTSP stream URL
rtsp_url = ''



# Connect to the RTSP stream
cap = cv2.VideoCapture(rtsp_url)

#MongoDB connection
client = MongoClient('')
db = client["CrowTracking"]
collection = db["Crowd"]

frame_id = 0
current_date = date.today()


while True:
        now = datetime.now()
        # Read the frame from the stream
        # If the frame was not read, then break the loop and print an error
        ret, frame = cap.read()
        if not ret:
            print('Error reading the frame')
            break

        # Perform YOLO detection
        results = model(frame)

        # Process results with box coordinates and confidence scores
        for result in results:
            boxes = result.boxes.cpu().numpy()
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                conf = box.conf[0]
                cls = int(box.cls[0])
            
                if cls == 0:  # Assuming class 0 is person
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f'Person: {conf:.2f}', (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
         
        # Save the number of persons detected to MongoDB
        # Save the frame_id, timestamp and the total number of persons detected
        data = {
            
            "frame_id": frame_id,
            "timestamp": now.strftime("%d/%m/%Y %H:%M:%S"),
            "total_persons": len(boxes)
        }
        collection.insert_one(data)

        # Display the number of persons detected on the frame       
        cv2.rectangle(frame, (10, 10), (310, 60), (255, 255, 255), -1)
        cv2.putText(frame, f'Total Persons: {len(boxes)}', (20, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

        frame_id += 1

        # Display the frame
        cv2.imshow('Crowd Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()





0: 384x640 1 person, 3 tvs, 3 laptops, 41.1ms
Speed: 2.3ms preprocess, 41.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 3 tvs, 3 laptops, 36.4ms
Speed: 1.4ms preprocess, 36.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 4 laptops, 46.9ms
Speed: 1.3ms preprocess, 46.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 4 laptops, 40.5ms
Speed: 1.3ms preprocess, 40.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 3 laptops, 32.7ms
Speed: 1.0ms preprocess, 32.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 3 laptops, 32.2ms
Speed: 2.3ms preprocess, 32.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 3 laptops, 38.5ms
Speed: 1.1ms preprocess, 38.5ms inference, 0.4ms postprocess per image at shape (1, 3, 38

[h264 @ 0x29c936f80] cabac decode of qscale diff failed at 42 26
[h264 @ 0x29c936f80] error while decoding MB 42 26, bytestream 323



0: 384x640 1 person, 2 tvs, 2 laptops, 1 mouse, 60.8ms
Speed: 1.9ms preprocess, 60.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x29c940530] cabac decode of qscale diff failed at 28 1
[h264 @ 0x29c940530] error while decoding MB 28 1, bytestream 0



0: 384x640 1 person, 2 tvs, 3 laptops, 1 mouse, 55.4ms
Speed: 1.1ms preprocess, 55.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x29c949ae0] error while decoding MB 25 6, bytestream -13



0: 384x640 1 person, 2 tvs, 4 laptops, 1 mouse, 34.6ms
Speed: 1.3ms preprocess, 34.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 4 laptops, 1 mouse, 81.4ms
Speed: 1.4ms preprocess, 81.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x2970594b0] top block unavailable for requested intra mode -1
[h264 @ 0x2970594b0] error while decoding MB 10 0, bytestream 676



0: 384x640 1 person, 2 tvs, 4 laptops, 1 mouse, 48.3ms
Speed: 1.8ms preprocess, 48.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 3 laptops, 1 mouse, 53.0ms
Speed: 1.9ms preprocess, 53.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 4 laptops, 1 mouse, 36.5ms
Speed: 1.1ms preprocess, 36.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 6 laptops, 1 mouse, 39.3ms
Speed: 1.6ms preprocess, 39.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 5 laptops, 1 mouse, 48.6ms
Speed: 1.2ms preprocess, 48.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 5 laptops, 1 mouse, 41.0ms
Speed: 1.6ms preprocess, 41.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 5 laptops, 1 mouse, 44.0ms
Speed: 1.2ms preproces

[h264 @ 0x29706f720] left block unavailable for requested intra4x4 mode -1
[h264 @ 0x29706f720] error while decoding MB 0 6, bytestream 12



0: 384x640 2 persons, 1 hot dog, 2 tvs, 2 laptops, 52.5ms
Speed: 1.1ms preprocess, 52.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 hot dog, 2 tvs, 2 laptops, 43.6ms
Speed: 1.1ms preprocess, 43.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x16a0bb200] error while decoding MB 24 4, bytestream -14



0: 384x640 2 persons, 2 tvs, 2 laptops, 53.7ms
Speed: 1.5ms preprocess, 53.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 3 laptops, 56.7ms
Speed: 1.4ms preprocess, 56.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 42.6ms
Speed: 1.3ms preprocess, 42.6ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 hot dogs, 2 tvs, 3 laptops, 55.9ms
Speed: 1.6ms preprocess, 55.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 hot dog, 2 tvs, 3 laptops, 48.4ms
Speed: 1.4ms preprocess, 48.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 hot dog, 2 tvs, 2 laptops, 36.0ms
Speed: 1.0ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 hot dog, 3 tvs, 2 laptops, 39.2ms
Speed: 1.2ms preprocess, 39.2ms inferenc

[h264 @ 0x29706f720] left block unavailable for requested intra mode
[h264 @ 0x29706f720] error while decoding MB 0 17, bytestream 570
[h264 @ 0x16a0bb200] left block unavailable for requested intra4x4 mode -1
[h264 @ 0x16a0bb200] error while decoding MB 0 23, bytestream 78



0: 384x640 1 person, 1 hot dog, 2 tvs, 2 laptops, 58.1ms
Speed: 1.2ms preprocess, 58.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 3 laptops, 50.2ms
Speed: 1.2ms preprocess, 50.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 42.7ms
Speed: 2.4ms preprocess, 42.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 1 cake, 2 tvs, 3 laptops, 43.7ms
Speed: 1.0ms preprocess, 43.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 2 tvs, 2 laptops, 58.3ms
Speed: 1.3ms preprocess, 58.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 3 tvs, 2 laptops, 58.4ms
Speed: 1.3ms preprocess, 58.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 2 tvs, 2 laptops, 47.0ms
Speed: 1.3ms preprocess, 47.0ms inference, 0.4ms postprocess per 

[h264 @ 0x29c949ae0] error while decoding MB 65 1, bytestream 380



0: 384x640 3 persons, 2 tvs, 2 laptops, 42.3ms
Speed: 1.1ms preprocess, 42.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 4 laptops, 39.8ms
Speed: 1.2ms preprocess, 39.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x2970594b0] error while decoding MB 24 30, bytestream -6



0: 384x640 3 persons, 2 tvs, 4 laptops, 34.3ms
Speed: 1.3ms preprocess, 34.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 3 laptops, 38.1ms
Speed: 1.1ms preprocess, 38.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 3 laptops, 34.7ms
Speed: 1.2ms preprocess, 34.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 3 laptops, 37.7ms
Speed: 1.4ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 3 laptops, 39.0ms
Speed: 1.3ms preprocess, 39.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 2 laptops, 37.8ms
Speed: 1.2ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 2 laptops, 38.2ms
Speed: 1.6ms preprocess, 38.2ms inference, 0.4ms postprocess per image at shape (1

[h264 @ 0x16a0bb200] error while decoding MB 72 40, bytestream -5
[h264 @ 0x29c92db00] error while decoding MB 73 10, bytestream -13


Speed: 1.4ms preprocess, 39.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 37.6ms
Speed: 1.3ms preprocess, 37.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 38.8ms
Speed: 2.6ms preprocess, 38.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 39.9ms
Speed: 1.5ms preprocess, 39.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 36.0ms
Speed: 1.3ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 38.5ms
Speed: 1.2ms preprocess, 38.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 41.0ms
Speed: 1.1ms preprocess, 41.0ms inference, 0.4ms postprocess per image at sh

[h264 @ 0x29c949ae0] left block unavailable for requested intra4x4 mode -1
[h264 @ 0x29c949ae0] error while decoding MB 0 7, bytestream 1077



0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 45.0ms
Speed: 1.2ms preprocess, 45.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 3 laptops, 1 mouse, 38.7ms
Speed: 1.4ms preprocess, 38.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 41.9ms
Speed: 2.9ms preprocess, 41.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x16a0bb200] left block unavailable for requested intra mode
[h264 @ 0x16a0bb200] error while decoding MB 0 28, bytestream 193



0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 37.7ms
Speed: 1.2ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x29c92db00] error while decoding MB 51 10, bytestream -14



0: 384x640 1 person, 2 tvs, 2 laptops, 1 mouse, 44.7ms
Speed: 1.6ms preprocess, 44.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 tvs, 2 laptops, 1 mouse, 44.0ms
Speed: 1.1ms preprocess, 44.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


[h264 @ 0x29c936f80] cabac decode of qscale diff failed at 75 10
[h264 @ 0x29c936f80] error while decoding MB 75 10, bytestream 0



0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 43.4ms
Speed: 1.0ms preprocess, 43.4ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 69.8ms
Speed: 1.5ms preprocess, 69.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 36.7ms
Speed: 1.7ms preprocess, 36.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 2 laptops, 1 mouse, 47.1ms
Speed: 2.2ms preprocess, 47.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 2 tvs, 4 laptops, 1 mouse, 44.2ms
Speed: 1.1ms preprocess, 44.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 4 laptops, 1 mouse, 45.5ms
Speed: 1.3ms preprocess, 45.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 2 tvs, 2 laptops, 1 mouse, 49.2ms
Speed: 6.0ms prepro

[h264 @ 0x29c949ae0] error while decoding MB 42 13, bytestream -20



0: 384x640 3 persons, 2 tvs, 3 laptops, 1 mouse, 43.9ms
Speed: 1.3ms preprocess, 43.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 2 laptops, 1 mouse, 41.1ms
Speed: 1.1ms preprocess, 41.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 3 laptops, 1 mouse, 42.1ms
Speed: 0.9ms preprocess, 42.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 2 tvs, 3 laptops, 1 mouse, 39.3ms
Speed: 1.2ms preprocess, 39.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 3 laptops, 1 mouse, 41.5ms
Speed: 1.3ms preprocess, 41.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 tvs, 3 laptops, 1 mouse, 49.3ms
Speed: 1.2ms preprocess, 49.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


KeyboardInterrupt: 