<a href="https://colab.research.google.com/github/Hikari006/oa_cogniAble/blob/main/oa_internship.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade pytube



In [None]:
!pip install pytubefix
!pip install ultralytics
!pip install deep_sort_realtime




In [None]:
from pytubefix import YouTube
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import numpy as np

def download_youtube_video(url, resolution='720p'):
    yt = YouTube(url)
    stream = yt.streams.filter(res=resolution).first()
    video_path = stream.download()
    return video_path

# Load YOLOv8 model
model = YOLO('yolov8n.pt')

# Initialize the DeepSort tracker
tracker = DeepSort(max_age=30)

def detect_objects(frame):
    results = model(frame)
    return results

def track_objects(frame, detections):
    print("Detections before tracking:", detections)
    # Convert detections to a numpy array and ensure the bounding boxes are correctly formatted
    formatted_detections = []
    for det in detections:
        bbox = det[:4]  # Extract bbox coordinates [x1, y1, x2, y2]
        confidence = det[4]  # Extract confidence score
        class_id = det[5]  # Extract class ID
        # Convert bbox to a list of floats
        formatted_bbox = [float(x) for x in bbox]
        formatted_detections.append([formatted_bbox, confidence, class_id])

    # Pass detections to tracker
    tracked_objects = tracker.update_tracks(formatted_detections, frame=frame)
    return tracked_objects

def process_video(video_path, output_path='output_video.mp4'):
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = detect_objects(frame)
        detections = []

        # Check if boxes are detected
        if results and results[0].boxes:
            for box in results[0].boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()  # Bounding box coordinates
                confidence = box.conf.cpu().numpy()[0]      # Confidence score
                class_id = box.cls.cpu().numpy()[0]         # Class ID
                detections.append([x1, y1, x2, y2, confidence, class_id])

        tracked_objects = track_objects(frame, detections)

        for obj in tracked_objects:
            bbox = obj.to_tlbr()
            ID = obj.track_id
            label = f"ID: {ID}"
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
            cv2.putText(frame, label, (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        out.write(frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    video_url = 'https://www.youtube.com/watch?v=V9YDDpo9LWg'
    video_path = download_youtube_video(video_url)
    process_video(video_path)


Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 282MB/s]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 384x640 2 persons, 2 chairs, 7 books, 156.3ms
Speed: 3.0ms preprocess, 156.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Detections before tracking: [[699.07776, 50.192566, 1178.3055, 703.2127, 0.87159574, 0.0], [277.63464, 77.08905, 524.382, 698.8041, 0.7860043, 0.0], [127.46619, 308.5424, 381.58844, 701.34216, 0.4212057, 56.0], [463.46118, 4.6575165, 513.4042, 91.657715, 0.35240635, 73.0], [524.0188, 3.3043518, 590.30396, 83.45822, 0.34405118, 73.0], [476.34167, 4.797638, 527.2235, 90.657074, 0.33482113, 73.0], [0.048965454, 309.86304, 167.99129, 563.92126, 0.31602913, 56.0], [489.77417, 3.938652, 541.5112, 88.00941, 0.29981202, 73.0], [558.4741, 1.2077179, 609.7998, 81.12764, 0.29512137, 73.0], [505.08646, 2.7181854, 571.56226, 85.96039, 0.29310068, 73.0], [456.3602, 4.8349876, 494.6243, 92.89484, 0.2929936, 73.0]]

0: 384x640 2 persons, 2 chairs, 9 books, 172.2ms
Speed: 2.9ms preprocess, 172