# SETUP

In [None]:
import os
HOME = os.getcwd()
print(HOME)

/content


In [None]:
!pip install ultralytics


import ultralytics
from ultralytics import YOLO

In [None]:
!pip install supervision
import supervision as sv


In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
import cv2

## GOOGLE DRIVE MOUNT

In [None]:
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/drive')

# RESTORE DATA

In [None]:
%cd "/content"
!cp "/content/drive/MyDrive/Colab Notebooks/MVI_1482_VIS.avi" .

!cp "/content/drive/MyDrive/Colab Notebooks/maritime-best.pt" .

!cp "/content/drive/MyDrive/Colab Notebooks/gt.txt" .


/content


In [None]:
SOURCE_VIDEO_PATH = "/content/drive/MyDrive/Colab Notebooks/MVI_1482_VIS.avi"

In [None]:
MODEL_PATH = "/content/maritime-best.pt"

# Model Import

In [None]:
model = YOLO(MODEL_PATH)


In [None]:
# dict maping class_id to class_name
CLASS_NAMES_DICT = model.model.names
print(CLASS_NAMES_DICT)
# class ids matching the class names we have chosen // Ferry only
SELECTED_CLASS_IDS = [2]

{0: 'Boat', 1: 'Buoy', 2: 'Ferry', 3: 'Flying bird-plane', 4: 'Kayak', 5: 'Other', 6: 'Sail boat', 7: 'Speed boat', 8: 'Vessel-ship'}


# MODEL FRAME TEST

In [None]:
# create frame generator
generator = sv.get_video_frames_generator(SOURCE_VIDEO_PATH)
# create instance of BoxAnnotator and LabelAnnotator
box_annotator = sv.BoxAnnotator(thickness=4)
label_annotator = sv.LabelAnnotator(text_thickness=2, text_scale=1.5, text_color=sv.Color.BLACK)
# acquire first video frame
iterator = iter(generator)
frame = next(iterator)
# model prediction on single frame and conversion to supervision Detections
results = model(frame, verbose=False)[0]

# convert to Detections
detections = sv.Detections.from_ultralytics(results)
# only consider class id from selected_classes define above
detections = detections[np.isin(detections.class_id, SELECTED_CLASS_IDS)]

# format custom labels
labels = [
    f"{CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
    for confidence, class_id in zip(detections.confidence, detections.class_id)
]

# annotate and display frame
annotated_frame = frame.copy()
annotated_frame = box_annotator.annotate(
    scene=annotated_frame, detections=detections)
annotated_frame = label_annotator.annotate(
    scene=annotated_frame, detections=detections, labels=labels)

%matplotlib inline
sv.plot_image(annotated_frame, (16, 16))

# TRACK

In [None]:
def process_video_with_bytetrack(video_path, model, output_file, video_output_path):

    # create BYTETracker instance
    byte_tracker = sv.ByteTrack(
        track_activation_threshold=0.25,
        lost_track_buffer=30,
        minimum_matching_threshold=0.7,
        frame_rate=30,
        minimum_consecutive_frames=3)
    # Reset ByteTrack each time
    sv.ByteTrack.reset(byte_tracker)

    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    out = cv2.VideoWriter(video_output_path,
                         cv2.VideoWriter_fourcc(*'mp4v'),
                         fps, (width, height))

    box_annotator = sv.BoxAnnotator()
    label_annotator = sv.LabelAnnotator(text_thickness=2, text_scale=1.5, text_color=sv.Color.BLACK)

    frame_id = 1

    with open(output_file, 'w') as f:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Get results
            results = model(frame)[0]
            detections = sv.Detections.from_ultralytics(results)

            # Filter selected class (Ferry)
            mask = np.isin(detections.class_id, SELECTED_CLASS_IDS)
            filtered_detections = detections[mask]

            # Filter +0.5 confidence
            conf_mask = filtered_detections.confidence > 0.5
            filtered_detections = filtered_detections[conf_mask]

            # Track only selected class
            tracked_detections = byte_tracker.update_with_detections(filtered_detections)

            for bbox, track_id, conf in zip(tracked_detections.xyxy, tracked_detections.tracker_id, tracked_detections.confidence):
                x1, y1, x2, y2 = bbox
                w = x2 - x1
                h = y2 - y1
                # MOT Challenge format
                f.write(f"{frame_id},{track_id},{x1},{y1},{w},{h},{conf},-1,-1,-1\n")

            # Visualize
            labels = [f"#{track_id} Ferry ({conf:.2f})" for track_id, conf
                     in zip(tracked_detections.tracker_id, tracked_detections.confidence)]

            frame = box_annotator.annotate(scene=frame, detections=tracked_detections)
            frame = label_annotator.annotate(scene=frame, detections=tracked_detections, labels=labels)
            out.write(frame)

            frame_id += 1

    cap.release()
    out.release()



## Run


In [None]:
process_video_with_bytetrack(SOURCE_VIDEO_PATH, model,
                           'bytetrack_results.txt',
                           'bytetrack_tracking_output.mp4')


## Save video outputs to GDrive and Print

In [None]:
output_dir = "/content/drive/MyDrive/Colab Notebooks/bytetrack"
os.makedirs(output_dir, exist_ok=True)


!cp bytetrack_tracking_output.mp4 "/content/drive/MyDrive/Colab Notebooks/bytetrack/"
!cp bytetrack_results.txt "/content/drive/MyDrive/Colab Notebooks/bytetrack/"


df = pd.read_csv('bytetrack_results.txt', header=None,
                 names=['frame', 'id', 'bb_left', 'bb_top', 'bb_width', 'bb_height', 'conf', 'x', 'y', 'z'])

print("\nStats:")
print(f"Total frame count: {df['frame'].nunique()}")
print(f"Total track count: {df['id'].nunique()}")
print("\nTrack ID counts:")
print(df.groupby('id').size())
