In [1]:
import torch
import cv2
import csv
import json
import numpy as np
import pandas as pd
import math
import subprocess
from tqdm import tqdm

In [2]:
# Initialize YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)
model.eval()

Using cache found in C:\Users\ahmed/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-18 Python-3.9.13 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 SUPER, 6144MiB)



[31m[1mrequirements:[0m C:\Users\ahmed\.cache\torch\hub\requirements.txt not found, check failed.


Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 48, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1))
  

In [5]:
def track_objects(video_path, output_path, frame_cut:int, json_filename):
    
    cap = cv2.VideoCapture(video_path)
     #Get the video frame rate and dimensions
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    ffmpeg_cmd = f"ffmpeg -y -f rawvideo -pix_fmt bgr24 -s {frame_width}x{frame_height} -r {fps} -i - -c:v libx264 -preset fast -crf 30 -pix_fmt nv12 -an -vcodec libx264 {output_path}"

    # Open the output file for writing 
    output_file = subprocess.Popen(ffmpeg_cmd.split(' '), stdin=subprocess.PIPE)
    
    # Define the loading animation
    pbar = tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), desc="Processing video", unit="frames")
    
    # Initialize count and center points
    count = 0
    center_points_prev_frame = []

    tracking_objects = {}
    track_id = 0

    object_dicts = {}
    
    while True:
        ret, frame = cap.read() 
        count += 1

        if not ret:
            break      

        # Point current frame
        center_points_cur_frame = []

        # Detect objects on frame using YOLOv5
        frame_model = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = model(frame_model[frame_cut:])
        for box in results.xyxy[0].tolist():
            (x1, y1, x2, y2, conf, class_ids) = box
            x1, y1, x2, y2 = int(x1), int(y1 + frame_cut), int(x2), int(y2+frame_cut)
            cx = int((x1 + x2 ) / 2)
            cy = int((y1 + y2 ) / 2)
            center_points_cur_frame.append((cx, cy))

            # Draw bounding box around detected object
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 174, 66), 2)
        
        if count <= 1:
            for pt in center_points_cur_frame:
                for pt2 in center_points_prev_frame:
                    distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])

                    if distance < 20:
                        tracking_objects[track_id] = pt
                        track_id += 1
        else:

            tracking_objects_copy = tracking_objects.copy()
            center_points_cur_frame_copy = center_points_cur_frame.copy()

            for object_id, pt2 in tracking_objects_copy.items():
                object_exists = False
                for pt in center_points_cur_frame_copy:
                    distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])

                    # Update IDs position
                    if distance < 20:
                        tracking_objects[object_id] = pt
                        object_exists = True
                        if pt in center_points_cur_frame:
                            center_points_cur_frame.remove(pt)
                        continue

                # Remove IDs lost
                if not object_exists:
                    tracking_objects.pop(object_id)

            # Add new IDs found
            for pt in center_points_cur_frame:
                tracking_objects[track_id] = pt
                track_id += 1
        
        for object_id, pt in tracking_objects.items():
            
            # Create a dictionary for the object if it does not exist
            if object_id not in object_dicts:
                object_dicts[object_id] = []
                
            object_dicts[object_id].append = {
                'frame': count,
                'x1': x1,
                'y1': y1,
                'x2': x2,
                'y2': y2  
            })
                
            cv2.circle(frame, pt, 5, (160, 32, 140), -1)
            cv2.putText(frame, str(object_id), (pt[0], pt[1] - 7), 0, 1, (255, 255, 255), 2)

        # Update the loading animation
        pbar.update(1)
        
        # Write the output frame to the output file
        output_file.stdin.write(frame.tobytes())  

    # Release the video capture and close the output file
    cap.release()
    output_file.stdin.close()
    output_file.wait()
    cv2.destroyAllWindows()
    pbar.close()
        
    # Write object dictionaries to JSON file
    with open(json_filename, 'w') as f:
        json.dump(object_dicts, f, indent=4)


In [6]:
track_objects('los_angeles.mp4','track_test.mp4',400,'tracked_objects.json')

Processing video: 100%|██████████████████████████████████████████████████████████| 332/332 [00:12<00:00, 26.59frames/s]


In [None]:
# Read object dictionaries from JSON file
with open('tracked_objects.json', 'r') as f:
    object_dicts = json.load(f)

# Print object dictionaries in a readable way
print(json.dumps(object_dicts, indent=4))
