In [2]:
# Install all required Python packages
!python -m pip install --upgrade pip
!pip install --quiet ultralytics supervision "torch>=1.8.0" "torchvision>=0.9.0" scikit-learn pycocotools

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2


In [7]:
import os
import json
from ultralytics import YOLO
import supervision as sv

# --- CONFIGURATION ---
# IMPORTANT: Update these paths to match your uploaded files.

# Path to your fine-tuned model weights file.
MODEL_PATH = '/content/best (1).pt'

# Path to your test video file.
VIDEO_PATH = '/content/14414218_1080_1920_60fps.mp4'

# This is the final output file required by the assignment.
OUTPUT_JSON_PATH = '/content/results.json'


# --- VIDEO TRACKING SCRIPT ---
print(f"Loading fine-tuned model: {MODEL_PATH}")
model = YOLO(MODEL_PATH)

# Initialize the ByteTrack tracker.
tracker = sv.ByteTrack()

# Open the video file and prepare to loop through its frames.
frames_generator = sv.get_video_frames_generator(VIDEO_PATH)
tracking_results = []

print(f"Processing video: {VIDEO_PATH}")
# Loop through each frame of the video.
for frame_number, frame in enumerate(frames_generator):
    # Get model predictions for the current frame.
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)

    # Update the tracker with the detections from the current frame.
    tracked_detections = tracker.update_with_detections(detections=detections)

    # Loop through all the tracked objects in the current frame.
    for detection in tracked_detections:
        xyxy, mask, confidence, class_id, tracker_id, _ = detection

        # Get the class name from the model's list of names.
        class_name = model.model.names[int(class_id)]

        # Append the tracking data to our results list.
        tracking_results.append({
            "frame_number": frame_number,
            "tracker_id": int(tracker_id),
            "class": class_name,
            "bounding_box": [int(coord) for coord in xyxy]
        })

# Save the complete list of tracking data to the output JSON file.
with open(OUTPUT_JSON_PATH, 'w') as f:
    json.dump(tracking_results, f, indent=4)

print("\n✅ Video processing complete!")
print(f"Tracking results have been saved to: {OUTPUT_JSON_PATH}")

Loading fine-tuned model: /content/best (1).pt
Processing video: /content/14414218_1080_1920_60fps.mp4

0: 640x384 7 Vehicles, 198.9ms
Speed: 3.9ms preprocess, 198.9ms inference, 16.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 7 Vehicles, 178.2ms
Speed: 4.5ms preprocess, 178.2ms inference, 15.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 7 Vehicles, 177.9ms
Speed: 5.6ms preprocess, 177.9ms inference, 15.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 6 Vehicles, 174.0ms
Speed: 5.8ms preprocess, 174.0ms inference, 13.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 6 Vehicles, 183.8ms
Speed: 5.1ms preprocess, 183.8ms inference, 16.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 6 Vehicles, 250.6ms
Speed: 5.9ms preprocess, 250.6ms inference, 26.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 6 Vehicles, 269.4ms
Speed: 4.4ms preprocess, 269.4ms inference, 22.2ms postprocess per image at sha


KeyboardInterrupt



In [5]:
print(dir(tracker))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'det_thresh', 'external_id_counter', 'frame_id', 'internal_id_counter', 'kalman_filter', 'lost_tracks', 'max_time_lost', 'minimum_consecutive_frames', 'minimum_matching_threshold', 'removed_tracks', 'reset', 'shared_kalman', 'track_activation_threshold', 'tracked_tracks', 'update_with_detections', 'update_with_tensors']
