In [28]:
!pip install ultralytics



In [29]:
!pip install opencv-python-headless



In [30]:
import cv2
from ultralytics import YOLO
from collections import defaultdict

In [31]:
# Load the YOLO model
model = YOLO('yolo11l.pt')

In [32]:
import os
print(os.path.abspath("yolo11l.pt"))


/content/yolo11l.pt


In [33]:
# To keep the file permanently i copy it to my drive
from google.colab import drive
drive.mount('/content/drive')  # Mount Google Drive



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
# Now move the model file to a folder in your Drive

!cp /content/yolo11l.pt "/content/drive/MyDrive/Colab Notebooks/Detect_count_and_track_the_objects_in_real_time/"


In [35]:

# the different classes of coco dataset that this model trained on are  these  :
class_list = model.names
class_list

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [36]:
#  provided the video path that opens  the video file from drive
# video_path = "/content/drive/MyDrive/Colab Notebooks/Detect_count_and_track_the_objects_in_real_time/test_videos/4.mp4"

#  this pathis for the webcam of mobile
video_path = "/content/drive/MyDrive/Colab Notebooks/Detect_count_and_track_the_objects_in_real_time/Test_video_of_indian_people_cars/4.mp4"
cap = cv2.VideoCapture(video_path)


In [37]:
from google.colab.patches import cv2_imshow

line_y_red = 430  # Red line position

# Dictionary to store object counts by class
class_counts = defaultdict(int)

# Dictionary to keep track of object IDs that have crossed the line
crossed_ids = set()

# ------------------------

# Define output path
output_dir = "/content/drive/MyDrive/Colab Notebooks/Detect_count_and_track_the_objects_in_real_time/output_video_of_indian_people_detetction"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "tracked_output_of_indian_people.mp4")

# Get video properties for VideoWriter
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

# Define video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO tracking on the frame
    results = model.track(frame, persist=True)
    #print(results)

     # Ensure results are not empty
    if results[0].boxes.data is not None:
        # Get the detected boxes, their class indices, and track IDs
        boxes = results[0].boxes.xyxy.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

        cv2.line(frame, (690, line_y_red), (1130, line_y_red), (0, 0, 255), 3)
        #cv2.putText(frame, 'Red Line', (690, line_y_red - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)




        # Loop through each detected object
        for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2  # Calculate the center point
            cy = (y1 + y2) // 2

            class_name = class_list[class_idx]

            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)

            cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)


            # Check if the object has crossed the red line
            if cy > line_y_red and track_id not in crossed_ids:
                # Mark the object as crossed
                crossed_ids.add(track_id)
                class_counts[class_name] += 1


        # Display the counts on the frame
        y_offset = 30
        for class_name, count in class_counts.items():
            cv2.putText(frame, f"{class_name}: {count}", (50, y_offset),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            y_offset += 30



    # Show the frame
    out.write(frame)  # Save the processed frame to output video


    # Exit loop if 'q' key is pressed
    # if cv2.waitKey(1) & 0xFF == ord('q'):
    #     break

# Release resources
cap.release()
out.release()  # Save and close the output video file
# cv2.destroyAllWindows()




0: 384x640 11 persons, 2 bicycles, 5 cars, 2 motorcycles, 4 buss, 1 truck, 31.7ms
Speed: 3.3ms preprocess, 31.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 2 bicycles, 5 cars, 2 motorcycles, 4 buss, 1 truck, 31.2ms
Speed: 3.1ms preprocess, 31.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 1 bicycle, 6 cars, 2 motorcycles, 4 buss, 1 truck, 30.8ms
Speed: 3.4ms preprocess, 30.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 1 bicycle, 5 cars, 2 motorcycles, 4 buss, 28.7ms
Speed: 3.2ms preprocess, 28.7ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 1 bicycle, 6 cars, 2 motorcycles, 4 buss, 28.6ms
Speed: 3.2ms preprocess, 28.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 1 bicycle, 8 cars, 2 motorcycles, 4 buss, 28.6ms
Speed: 4.9ms preprocess, 28.6ms inference, 1.5ms

<IPython.core.display.Javascript object>

**FOR WEBCAM AND RECORDING FROM MOBILE**

In [None]:
import os
import cv2
import time
from collections import defaultdict
from ultralytics import YOLO



# Create output folder
os.makedirs('output_videos', exist_ok=True)

# Use mobile IP webcam as source
video_url = 'http://192.168.100.7:8080/video'
cap = cv2.VideoCapture(video_url)

# Define red line y-position
line_y_red = 430

# Initialize counts
class_counts = defaultdict(int)
crossed_ids = set()

# Read first frame to get size
ret, frame = cap.read()
if not ret:
    print("Failed to read from webcam. Check URL or connection.")
    exit()

frame_height, frame_width = frame.shape[:2]

# Estimate fps (safe fixed value, e.g., 5 if inference is slow)
fps = 5
output_path = 'output_videos/result_from_webcam.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Load class list
class_list = model.names

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO detection + tracking
    results = model.track(frame, persist=True)

    if results[0].boxes.data is not None:
        boxes = results[0].boxes.xyxy.cpu()
        class_indices = results[0].boxes.cls.int().cpu().tolist()
        confidences = results[0].boxes.conf.cpu()

        if results[0].boxes.id is not None:
            track_ids = results[0].boxes.id.int().cpu().tolist()
        else:
            track_ids = [-1] * len(boxes)

        # Draw red line
        cv2.line(frame, (690, line_y_red), (1130, line_y_red), (0, 0, 255), 3)

        for box, track_id, class_idx, conf in zip(boxes, track_ids, class_indices, confidences):
            x1, y1, x2, y2 = map(int, box)
            cx = (x1 + x2) // 2
            cy = (y1 + y2) // 2
            class_name = class_list[class_idx]

            # Draw detection
            cv2.circle(frame, (cx, cy), 4, (0, 0, 255), -1)
            cv2.putText(frame, f"ID: {track_id} {class_name}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Count crossing line
            if cy > line_y_red and track_id not in crossed_ids:
                crossed_ids.add(track_id)
                class_counts[class_name] += 1

        # Show total counts
        y_offset = 30
        for class_name, count in class_counts.items():
            cv2.putText(frame, f"{class_name}: {count}", (50, y_offset),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            y_offset += 30

    # Show and Save
    out.write(frame)
    cv2.imshow("YOLO Live Webcam Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()
