In [None]:
import torch
import torchvision
import cv2
import numpy as np
from pathlib import Path

from boxmot import BotSort

# Load a pre-trained Faster R-CNN model
device = torch.device('cpu')  # Use 'cuda' if you have a GPU
detector = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
detector.eval().to(device)

# Initialize the tracker
tracker = BotSort(
    reid_weights=Path('osnet_x0_25_msmt17.pt'),  # Path to ReID model
    device=device,  # Use CPU for inference
    half=False
)

# Open the video file
vid = cv2.VideoCapture(0)  # or 'path/to/your.avi

while True:
    # Capture frame-by-frame
    ret, frame = vid.read()

    # If ret is False, it means we have reached the end of the video
    if not ret:
        break

    # Convert frame to tensor and move to device
    frame_tensor = torchvision.transforms.functional.to_tensor(frame).to(device)

    # Perform detection
    with torch.no_grad():
        detections = detector([frame_tensor])[0]

    # Filter the detections (e.g., based on confidence threshold)
    confidence_threshold = 0.5
    dets = []
    for i, score in enumerate(detections['scores']):
        if score >= confidence_threshold:
            bbox = detections['boxes'][i].cpu().numpy()
            label = detections['labels'][i].item()
            conf = score.item()
            dets.append([*bbox, conf, label])

    # Convert detections to numpy array (N X (x, y, x, y, conf, cls))
    dets = np.array(dets)

    # Update the tracker
    res = tracker.update(dets, frame)  # --> M X (x, y, x, y, id, conf, cls, ind)

    # Plot tracking results on the image
    tracker.plot_results(frame, show_trajectories=True)
    
    cv2.imshow('BoXMOT + Torchvision', frame)

    # Simulate wait for key press to continue, press 'q' to exit
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

# Release resources
vid.release()
cv2.destroyAllWindows()

In [1]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available. Number of devices:", torch.cuda.device_count())
else:
    print("CUDA is not available. Using CPU instead.")

torch.cuda.empty_cache()


CUDA is available. Number of devices: 1


In [1]:
import os
import cv2
import numpy as np
from pathlib import Path
from boxmot import BotSort
import torch
import torchvision

# Initialize the tracker
device = torch.device('cuda:0')  # Use 'cuda' if you have a GPU
tracker = BotSort(
    reid_weights=Path('osnet_x0_25_msmt17.pt'),  # Path to ReID model
    device=device,
    half=False
)

# Open the existing video file
video_path = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763.MOV'  # Replace with your video file path
vid = cv2.VideoCapture(video_path)

# Check if video was opened successfully
if not vid.isOpened():
    print("Error opening video file")
    exit()

# Directory containing bounding box files for each frame
bounding_box_dir = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_detection'  # Replace with your bounding box directory

# Video writer setup
output_video_path = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_tracked_output.mp4'
frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vid.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Function to parse bounding box data for each frame
def parse_bounding_boxes(line):
    data = line.strip().split(',')
    if len(data) < 10:
        return None  # Skip invalid entries
    
    try:
        # Parse bounding box coordinates
        x1, y1, x2, y2, x3, y3, x4, y4 = map(float, data[:8])
        x_min, y_min = min(x1, x2, x3, x4), min(y1, y2, y3, y4)
        x_max, y_max = max(x1, x2, x3, x4), max(y1, y2, y3, y4)
        score = float(data[9])  # Confidence score
        class_id = 0  # Assuming all are vehicles (class_id=0)
        
        return [x_min, y_min, x_max, y_max, score, class_id]
    
    except ValueError:
        return None
    
# Initialize a resizable window
cv2.namedWindow('BoXMOT + Pre-existing Bounding Boxes', cv2.WINDOW_NORMAL)

frame_idx = 0
while True:
    # Capture frame-by-frame from video file
    ret, frame = vid.read()
    if not ret:
        break

    # Read bounding boxes for the current frame from file
    bounding_box_file = os.path.join(bounding_box_dir, f'det_fr_{frame_idx:04d}.txt')
    
    if os.path.isfile(bounding_box_file):
        with open(bounding_box_file, 'r') as f:
            frame_detections = [parse_bounding_boxes(line) for line in f.readlines()]
            frame_detections = [detection for detection in frame_detections if detection is not None]
    else:
        frame_detections = []

    # Convert detections to numpy array format required by BoxMOT
    dets = np.array(frame_detections)
    
    # Update tracker if detections are available
    if dets.size > 0:
        res = tracker.update(dets, frame)

        # Plot tracking results on the frame
        tracker.plot_results(frame, show_trajectories=True)
    
    # Write the processed frame with tracking to the output video
    out.write(frame)
    
    # Show the frame in the resizable window
    cv2.imshow('BoXMOT + Pre-existing Bounding Boxes', frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Move to the next frame
    frame_idx += 1

# Release resources
vid.release()
out.release()  # Release the VideoWriter
cv2.destroyAllWindows()

print(f"Video with tracking saved as '{output_video_path}'")


[32m2024-12-03 01:13:28.833[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v11.0.5 🚀 Python-3.10.15 torch-2.2.2+cu121
CUDA:0 (NVIDIA GeForce RTX 4070, 11987MiB)[0m
[32m2024-12-03 01:13:28.857[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m183[0m - [32m[1mLoaded pretrained weights from osnet_x0_25_msmt17.pt[0m


[[1.85400000e+03 2.97000000e+02 1.88300000e+03 ... 9.85135496e-01
  0.00000000e+00 0.00000000e+00]
 [2.19100000e+03 3.58000000e+02 2.21900000e+03 ... 9.81923044e-01
  0.00000000e+00 1.00000000e+00]
 [1.86000000e+03 3.68000000e+02 1.88600000e+03 ... 9.81635451e-01
  0.00000000e+00 2.00000000e+00]
 ...
 [2.09800000e+03 1.66100000e+03 2.13800000e+03 ... 6.44052744e-01
  0.00000000e+00 2.53000000e+02]
 [2.59900000e+03 1.10300000e+03 2.66200000e+03 ... 6.40957475e-01
  0.00000000e+00 2.54000000e+02]
 [6.10000000e+02 1.26300000e+03 6.68000000e+02 ... 6.34681106e-01
  0.00000000e+00 2.55000000e+02]]
[[1.85399567e+03 2.96998976e+02 1.88299570e+03 ... 9.83681679e-01
  0.00000000e+00 0.00000000e+00]
 [2.19186347e+03 3.57998742e+02 2.21986351e+03 ... 9.81781363e-01
  0.00000000e+00 2.00000000e+00]
 [1.85999572e+03 3.67998971e+02 1.88599575e+03 ... 9.82877612e-01
  0.00000000e+00 1.00000000e+00]
 ...
 [2.09799662e+03 1.66013104e+03 2.13799667e+03 ... 6.37839019e-01
  0.00000000e+00 2.54000000e+02]

KeyboardInterrupt: 

: 

In [1]:
import os
import cv2
import numpy as np
from pathlib import Path
from boxmot import BotSort
import torch
import shutil
import torchvision

# Initialize the tracker
device = torch.device('cuda:0')  # Use 'cuda' if you have a GPU
tracker = BotSort(
    reid_weights=Path('osnet_x0_25_msmt17.pt'),  # Path to ReID model
    device=device,
    half=False
)

# Open the existing video file
video_path = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763.MOV'  # Replace with your video file path
vid = cv2.VideoCapture(video_path)

# Check if video was opened successfully
if not vid.isOpened():
    print("Error opening video file")
    exit()

# Directory containing bounding box files for each frame
bounding_box_dir = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_detection'  # Replace with your bounding box directory

# Directory to save tracking files
tracking_output_dir = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_tracking'  # Replace with desired tracking output directory
os.makedirs(tracking_output_dir, exist_ok=True)  # Create directory if it doesn't exist

# Video writer setup
output_video_path = '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_tracked_output.mp4'
frame_width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vid.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# Function to parse bounding box data for each frame
def parse_bounding_boxes(line):
    data = line.strip().split(',')
    if len(data) < 10:
        return None  # Skip invalid entries
    
    try:
        # Parse bounding box coordinates
        x1, y1, x2, y2, x3, y3, x4, y4 = map(float, data[:8])
        x_min, y_min = min(x1, x2, x3, x4), min(y1, y2, y3, y4)
        x_max, y_max = max(x1, x2, x3, x4), max(y1, y2, y3, y4)
        score = float(data[9])  # Confidence score
        class_id = 0  # Assuming all are vehicles (class_id=0)
        
        return [x_min, y_min, x_max, y_max, score, class_id]
    
    except ValueError:
        return None
    
def clear_folder(folder_path):
    """Delete all contents of a folder by removing and recreating it."""
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)  # Delete the entire folder
    os.makedirs(folder_path)  # Recreate the empty folder

clear_folder(tracking_output_dir)
    
def calculate_bbox_center(x_min, y_min, x_max, y_max):
    """Calculate and round the center of the bounding box."""
    center_x = round((x_min + x_max) / 2)
    center_y = round((y_min + y_max) / 2)
    return center_x, center_y

# Function to compute Intersection over Union (IoU)
def compute_iou(box1, box2):
    """Compute IoU between two bounding boxes"""
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2

    # Determine the coordinates of the intersection rectangle
    x_left = max(x1_min, x2_min)
    y_top = max(y1_min, y2_min)
    x_right = min(x1_max, x2_max)
    y_bottom = min(y1_max, y2_max)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    # Compute the area of intersection rectangle
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Compute the area of both bounding boxes
    bb1_area = (x1_max - x1_min) * (y1_max - y1_min)
    bb2_area = (x2_max - x2_min) * (y2_max - y2_min)

    # Compute the IoU
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    return iou

# Initialize a resizable window
cv2.namedWindow('BoXMOT + Pre-existing Bounding Boxes', cv2.WINDOW_NORMAL)

frame_idx = 0
while True:
    # Capture frame-by-frame from video file
    ret, frame = vid.read()
    if not ret:
        break

    # Read bounding boxes for the current frame from file
    bounding_box_file = os.path.join(bounding_box_dir, f'det_fr_{frame_idx:04d}.txt')
    
    if os.path.isfile(bounding_box_file):
        with open(bounding_box_file, 'r') as f:
            frame_detections = [parse_bounding_boxes(line) for line in f.readlines()]
            frame_detections = [detection for detection in frame_detections if detection is not None]
    else:
        frame_detections = []

    # Convert detections to numpy array format required by BoxMOT
    dets = np.array(frame_detections)
    
    # Update tracker if detections are available
    if dets.size > 0:
        res = tracker.update(dets, frame)

        # Plot tracking results on the frame
        tracker.plot_results(frame, show_trajectories=True)

        # Save tracking results to a text file
        tracking_file = os.path.join(tracking_output_dir, f'track_fr_{frame_idx:04d}.txt')
        with open(tracking_file, 'w') as f_track:
            for track in res:
                x_min, y_min, x_max, y_max, track_id = track[:5]
                # Attempt to find matching detection to get the confidence score
                max_iou = 0
                confidence = None
                for det in dets:
                    det_x_min, det_y_min, det_x_max, det_y_max, det_confidence, _ = det
                    iou = compute_iou([x_min, y_min, x_max, y_max], [det_x_min, det_y_min, det_x_max, det_y_max])
                    if iou > max_iou:
                        max_iou = iou
                        confidence = det_confidence
                # Write tracking information to file
                # Format: id,x_min,y_min,x_max,y_max,confidence
                if confidence is not None:
                    center_x, center_y = calculate_bbox_center(x_min, y_min, x_max, y_max)
                    f_track.write(f"{int(track_id)},{center_x},{center_y},{confidence}\n")
                else:
                    center_x, center_y = calculate_bbox_center(x_min, y_min, x_max, y_max)
                    f_track.write(f"{int(track_id)},{center_x},{center_y}\n")
    else:
        res = []

    # # Write the processed frame with tracking to the output video
    # out.write(frame)
    
    # # Show the frame in the resizable window
    # cv2.imshow('BoXMOT + Pre-existing Bounding Boxes', frame)

    # # Press 'q' to exit
    # if cv2.waitKey(1) & 0xFF == ord('q'):
    #     break

    # Move to the next frame
    frame_idx += 1

# Release resources
vid.release()
out.release()  # Release the VideoWriter
cv2.destroyAllWindows()

print(f"Video with tracking saved as '{output_video_path}'")
print(f"Tracking data saved in '{tracking_output_dir}'")


[32m2024-12-10 12:05:11.192[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v11.0.5 🚀 Python-3.10.15 torch-2.2.2+cu121
CUDA:0 (NVIDIA GeForce RTX 4070, 11987MiB)[0m
[32m2024-12-10 12:05:11.235[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m183[0m - [32m[1mLoaded pretrained weights from osnet_x0_25_msmt17.pt[0m


Video with tracking saved as '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_tracked_output.mp4'
Tracking data saved in '/home/oussama/Documents/EPFL/PDS_LUTS/Dataset/DJI_0763_tracking'
