In [25]:
# First, install the necessary libraries
!pip install deep-sort-realtime ultralytics opencv-python-headless



In [26]:
!pip install torch torchvision opencv-python tqdm
!pip install ultralytics  # YOLOv8
!pip install filterpy  # Kalman filtering (for DeepSORT)
!pip install moviepy  # For video processing and output




In [27]:
!pip install yt_dlp





In [28]:
import yt_dlp

video_urls = [
    "https://www.youtube.com/watch?v=aWV7UUMddCU",
    "https://www.youtube.com/watch?v=f6wqlpG9rd0",
    "https://www.youtube.com/watch?v=GNVTuLHdeSo",
    "https://www.youtube.com/watch?v=SWtmkjd45so",
    "https://www.youtube.com/watch?v=RzI6Ar5mu2Q",
    "https://www.youtube.com/watch?v=aulLej6Z6W8",
    "https://www.youtube.com/watch?v=7pN6ydLE4EQ",
    "https://www.youtube.com/watch?v=fEEelCgBkWA",
    "https://www.youtube.com/watch?v=ckZQbQwM3oU",
    "https://www.youtube.com/watch?v=E8Wgwg3F4X0",
    "https://www.youtube.com/watch?v=rvIPH4ccfpI",
    "https://www.youtube.com/watch?v=F6iqlW6ovZc",
    "https://www.youtube.com/watch?v=9qjk-Sq415s&list=PL5B0D2D5B4BFE92C1&index=6",
    "https://www.youtube.com/watch?v=DI25kGJis0w",
    "https://www.youtube.com/watch?v=rrLhFZG6iQY",
    "https://www.youtube.com/watch?v=RKOZbT0ftL4&t=1s",
    "https://www.youtube.com/watch?v=N7TBbWHB01E",
    "https://www.youtube.com/watch?v=1YqVEVbXQ1c",

]

def download_videos(urls):
    ydl_opts = {
        'format': 'mp4',
        'outtmpl': '%(id)s.%(ext)s',
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download(urls)

download_videos(video_urls)


[youtube] Extracting URL: https://www.youtube.com/watch?v=aWV7UUMddCU
[youtube] aWV7UUMddCU: Downloading webpage
[youtube] aWV7UUMddCU: Downloading ios player API JSON
[youtube] aWV7UUMddCU: Downloading web creator player API JSON
[youtube] aWV7UUMddCU: Downloading m3u8 information
[info] aWV7UUMddCU: Downloading 1 format(s): 18
[download] aWV7UUMddCU.mp4 has already been downloaded
[download] 100% of   22.69MiB
[youtube] Extracting URL: https://www.youtube.com/watch?v=f6wqlpG9rd0
[youtube] f6wqlpG9rd0: Downloading webpage
[youtube] f6wqlpG9rd0: Downloading ios player API JSON
[youtube] f6wqlpG9rd0: Downloading web creator player API JSON
[youtube] f6wqlpG9rd0: Downloading m3u8 information
[info] f6wqlpG9rd0: Downloading 1 format(s): 18
[download] f6wqlpG9rd0.mp4 has already been downloaded
[download] 100% of   23.56MiB
[youtube] Extracting URL: https://www.youtube.com/watch?v=GNVTuLHdeSo
[youtube] GNVTuLHdeSo: Downloading webpage
[youtube] GNVTuLHdeSo: Downloading ios player API JSON




[youtube:tab] PL5B0D2D5B4BFE92C1 page 1: Downloading API JSON




[youtube:tab] PL5B0D2D5B4BFE92C1 page 1: Downloading API JSON




[youtube:tab] PL5B0D2D5B4BFE92C1 page 1: Downloading API JSON




[youtube:tab] Playlist In The Son-Rise Program® Playroom for Autism: Downloading 6 items of 6
[download] Downloading item 1 of 6
[youtube] Extracting URL: https://www.youtube.com/watch?v=Hd9marUY5GQ
[youtube] Hd9marUY5GQ: Downloading webpage
[youtube] Hd9marUY5GQ: Downloading ios player API JSON
[youtube] Hd9marUY5GQ: Downloading web creator player API JSON
[youtube] Hd9marUY5GQ: Downloading m3u8 information
[info] Hd9marUY5GQ: Downloading 1 format(s): 18
[download] Hd9marUY5GQ.mp4 has already been downloaded
[download] 100% of   21.01MiB
[download] Downloading item 2 of 6
[youtube] Extracting URL: https://www.youtube.com/watch?v=81fShK4roew
[youtube] 81fShK4roew: Downloading webpage
[youtube] 81fShK4roew: Downloading ios player API JSON
[youtube] 81fShK4roew: Downloading web creator player API JSON
[youtube] 81fShK4roew: Downloading m3u8 information
[info] 81fShK4roew: Downloading 1 format(s): 18
[download] 81fShK4roew.mp4 has already been downloaded
[download] 100% of   21.64MiB
[dow

In [29]:
# Import necessary libraries
import cv2
import torch
import numpy as np
from deep_sort_realtime.deepsort_tracker import DeepSort
from ultralytics import YOLO

In [30]:
# Initialize YOLOv8 model (or any other model of choice)
model = YOLO('yolov8n.pt')


In [31]:
# Initialize DeepSORT tracker
deepsort = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0, max_iou_distance=0.7)

In [32]:
import numpy as np

def process_frame(frame):
    # Perform detection
    results = model(frame)
    detections = results[0].boxes.data.cpu().numpy()

    # Extract bounding boxes and confidence scores
    bbox_xywh = []
    confidences = []
    for *xyxy, conf, cls in detections:
        if int(cls) == 0:  # Class 0 is 'person' in COCO dataset
            x1, y1, x2, y2 = map(int, xyxy)
            w = x2 - x1
            h = y2 - y1
            x_center = x1 + w / 2.0
            y_center = y1 + h / 2.0
            bbox_xywh.append([x_center, y_center, w, h])
            confidences.append(float(conf))

    bbox_xywh = np.array(bbox_xywh, dtype=np.float32)
    confidences = np.array(confidences)

    # Debugging: Print bounding boxes and their shape
    print("Bounding boxes:", bbox_xywh)
    print("Bounding box shape:", bbox_xywh.shape)
    print("Confidences:", confidences)
    print("Confidences shape:", confidences.shape)

    # Format detections for DeepSORT
    detections = [([x, y, w, h], conf, -1) for [x, y, w, h], conf in zip(bbox_xywh, confidences)]

    print("Formatted detections:", detections)

    # Perform tracking with DeepSORT if there are any detections
    if len(detections) > 0:
        try:
            outputs = deepsort.update_tracks(detections, embeds=None, frame=frame)
        except Exception as e:
            print(f"Error in DeepSORT update: {e}")
            outputs = []
    else:
        outputs = []

    # Draw bounding boxes and labels
    for track in outputs:
        if not track.is_confirmed():
            continue
        bbox = track.to_tlbr()
        track_id = track.track_id
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame

In [33]:
def process_video(input_video_path, output_video_path):
    video_capture = cv2.VideoCapture(input_video_path)
    width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_capture.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    frame_count = 0
    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            break

        frame_count += 1
        if frame_count % 100 == 0:
            print(f"Processing frame {frame_count}")

        try:
            processed_frame = process_frame(frame)
            out.write(processed_frame)
        except Exception as e:
            print(f"Error processing frame {frame_count}: {e}")
            break

    video_capture.release()
    out.release()
    print(f"Finished processing {input_video_path}")


In [34]:
# List of downloaded video paths (assuming they are in /content/)
video_paths = [
    '/content/1YqVEVbXQ1c.mp4',
    '/content/7pN6ydLE4EQ.mp4',
    '/content/81fShK4roew.mp4',
    '/content/9qjk-Sq415s.mp4',
    '/content/DI25kGJis0w.mp4',
    '/content/E8Wgwg3F4X0.mp4',
    '/content/F6iqlW6ovZc.mp4',
    '/content/GNVTuLHdeSo.mp4',
    '/content/HGz2M8kvWqM.mp4',
    '/content/Hd9marUY5GQ.mp4',
    '/content/N7TBbWHB01E.mp4',
    '/content/RKOZbT0ftL4.mp4',
    '/content/RzI6Ar5mu2Q.mp4',
    '/content/SWtmkjd45so.mp4',
    '/content/V9YDDpo9LWg.mp4',
    '/content/Wd3X_3C0et8.mp4',
    '/content/aWV7UUMddCU.mp4',
    '/content/aulLej6Z6W8.mp4',
    '/content/ckZQbQwM3oU.mp4',
    '/content/f6wqlpG9rd0.mp4',
    '/content/rrLhFZG6iQY.mp4',
    '/content/rvIPH4ccfpI.mp4',
]


In [None]:
for video_path in video_paths:
    output_video_path = video_path.replace('.webm', '_output.mp4').replace('.mkv', '_output.mp4').replace('.mp4', '_output.mp4')
    try:
        process_video(video_path, output_video_path)
    except Exception as e:
        print(f"Error processing video {video_path}: {e}")

print("All videos processed!")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Confidences: [    0.85012     0.83595]
Confidences shape: (2,)
Formatted detections: [([232.0, 184.5, 150.0, 271.0], 0.8501189351081848, -1), ([256.5, 213.0, 75.0, 218.0], 0.8359506726264954, -1)]

0: 480x640 2 persons, 1 car, 2 chairs, 1 refrigerator, 216.2ms
Speed: 3.4ms preprocess, 216.2ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)
Bounding boxes: [[      231.5         184         149         270]
 [        254         213          76         220]]
Bounding box shape: (2, 4)
Confidences: [    0.86041     0.83576]
Confidences shape: (2,)
Formatted detections: [([231.5, 184.0, 149.0, 270.0], 0.8604108095169067, -1), ([254.0, 213.0, 76.0, 220.0], 0.8357559442520142, -1)]

0: 480x640 2 persons, 1 car, 2 chairs, 1 refrigerator, 199.1ms
Speed: 3.7ms preprocess, 199.1ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)
Bounding boxes: [[      231.5       185.5         149         271]
 [  