<a href="https://colab.research.google.com/github/Keeron1/com.mcast.research_design_I_2025.spiteri_keeron/blob/main/src/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Install Packages

In [120]:
# YOLO
!pip install torch torchvision opencv-python ultralytics
!rm -rf /content/com.mcast.research_design_I_2025.spiteri_keeron

# Clone DeepSORT and YOLO model
!git clone -n --depth=1 --filter=tree:0 \
  https://github.com/Keeron1/com.mcast.research_design_I_2025.spiteri_keeron.git
%cd /content/com.mcast.research_design_I_2025.spiteri_keeron
!git sparse-checkout set --no-cone src/deep_sort_pytorch src/yolo
!git checkout
# Copy folders
!rsync -av --remove-source-files src/deep_sort_pytorch/ ../deep_sort_pytorch/
!rsync -av --remove-source-files src/yolo/ ../yolo/
%cd ..
!rm -rf com.mcast.research_design_I_2025.spiteri_keeron # Delete repo directory

!pip install -r deep_sort_pytorch/requirements.txt

Cloning into 'com.mcast.research_design_I_2025.spiteri_keeron'...
remote: Enumerating objects: 1, done.[K
remote: Counting objects: 100% (1/1), done.[K
Receiving objects: 100% (1/1), done.
remote: Total 1 (delta 0), reused 1 (delta 0), pack-reused 0 (from 0)[K
/content/com.mcast.research_design_I_2025.spiteri_keeron
remote: Enumerating objects: 12, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 12 (delta 0), reused 11 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (12/12), 2.09 KiB | 2.09 MiB/s, done.
remote: Enumerating objects: 41, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 41 (delta 0), reused 40 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (41/41), 86.80 MiB | 34.36 MiB/s, done.
Updating files: 100% (44/44), done.
Your branch is up to date with 'origin/main'.
sending incremental file list
./
.g

# 2. Import Libraries

In [184]:
from google.colab.patches import cv2_imshow # cv2.imshow("title", frane) doesn't work in Colab so this is the fix cv2_imshow(frame)
from IPython.display import HTML, Video, display
from base64 import b64encode
from ultralytics import YOLO
import cv2
import glob
import json
import os
import re

# 3. Config

In [122]:
dataset_path = "/content/dataset"
dataset_data_path = dataset_path + "/data.yaml"
idx_to_class_path = "/content/idx_to_class.json"
output_video_path = "/content/output.mp4"

# Load object detector model
model = YOLO("/content/yolo/best.pt")
model.info() # Display model information

# DeepSORT cfg
deep_sort_cfg = {
    "USE_FASTREID" : False,
    'DEEPSORT': {
        'REID_CKPT': 'deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7',
        'MAX_DIST': 0.2, # Maximum cosine distance
        'MIN_CONFIDENCE': 0.5, # Minimum detector confidence
        'NMS_MAX_OVERLAP': 0.5, # Maximum IoU for suppressing overlapping boxes.
        'MAX_IOU_DISTANCE': 0.5, # 0.7 Max IoU distance for motion-only matching in the cascade.
        'MAX_AGE': 70, # Number of consecutive frames a track is kept “alive” without matching.
        'N_INIT': 3, # Number of frames a track must be consistently matched before confirmed.
        'NN_BUDGET': 100 # Maximum number of appearance features to store per track.
    }
}

Model summary: 169 layers, 25,856,899 parameters, 0 gradients, 79.1 GFLOPs


# 4. Download Dataset

## 4.1 Download

In [67]:
dataset_download_path = "/content/pedestrain-data.zip"

## 4.2 Unzip

In [None]:
!unzip -q {dataset_download_path} -d {dataset_path}
!rm -rf {dataset_download_path}

# 5. Functions

## 5.1 Play Video

In [198]:
def play_video(video_path):
  video_info = video_path.rsplit('.')
  compressed_video_path = f"{video_info[0]}_compressed.mp4"

  if video_info[1] == "avi":
    os.system(f"ffmpeg -i {video_path} -c:v libx264 -crf 20 -preset slow -pix_fmt yuv420p {video_info[0]}.mp4")

  # compress video file (cant run in colab without compressing)
  os.system(f"ffmpeg -i {video_path[0]}.mp4 -vcodec libx264 {compressed_video_path}")

  display(Video(compressed_video_path, embed=True, width=640))

# 6. Train Model

In [None]:
results = model.train(data=dataset_data_path, epochs=50, imgsz=640)

# 7. Test Model

In [None]:
# model = YOLO("runs/detect/train/weights/best.pt")

# Run inference with the YOLO model on the validation dataset
results = model.predict(source="people.mp4", save=True, verbose=True) # dataset_path + "/valid/images"

## 7.1 Display Results

### 7.1.1 Play Results Video

In [None]:
dirs = []
base_dir = "/content/runs/detect"
# List all directories in the base directory
for predictFolder in os.listdir(base_dir):
  if os.path.isdir(os.path.join(base_dir, predictFolder)):
    if predictFolder.startswith('predict'):
      # If the directory has a number then extract it
      m = re.search(r"(\d+)$", predictFolder)
      num = int(m.group(1)) if m else 0
      dirs.append((predictFolder, num))

if dirs:
  dirs.sort(key=lambda x: x[1], reverse=True) # Sort by number in reverse (largest)
  latest_pred_dir = dirs[0][0]
  latest_pred_dir_path = os.path.join(base_dir, latest_pred_dir)

  for f in os.listdir(latest_pred_dir_path):
    if f.endswith('.avi'):
      play_video(os.path.join(latest_pred_dir_path, f))
      break

### 7.1.2 From predict function

In [None]:
# Process results list
for result in results[:10]:
    boxes = result.boxes  # Boxes object for bounding box outputs
    # print(boxes.data.tolist()) # x1, y1, x2, y2, conf score, class id
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen
    # result.save(filename="result.jpg")  # save to disk

### 7.1.3 From predict folder

In [202]:
import glob
from IPython.display import Image, display

for image_path in glob.glob("runs/detect/predict/*.jpg")[:10]:
  display(Image(filename=image_path, height=400))
  print("\n")

# 8. Run Model

## 8.1 Initalize DeepSORT

In [None]:
from deep_sort_pytorch.deepsort import DeepSortTracker
from deep_sort_pytorch.utils.parser import get_config
from deep_sort_pytorch.utils.draw import draw_boxes
from deep_sort_pytorch.utils.tools import generate_idx_to_class

# Import idx_to_class file
if os.path.isfile(idx_to_class_path):
  with open(idx_to_class_path, 'r') as f:
    idx_to_class = json.load(f)
else:
  idx_to_class = generate_idx_to_class(dataset_data_path)

cfg = get_config() # Create new empty dict
cfg.merge_from_dict(deep_sort_cfg) # merge dict

# Initialize DeepSORT tracker
deep_sort_tracker = DeepSortTracker(cfg, use_cuda=True)

## 8.2 Images

In [None]:
# Loop through images/frames
for img_path in glob.glob(dataset_path + "/valid/images/*.jpg"):

    frame = cv2.imread(img_path)
    if frame is None:
        continue

    # Perform object detection
    results = model.predict(source=frame, verbose=True)

    # Prepare detections for DeepSORT
    bbox_xywh = results[0].boxes.xywh.cpu().numpy()
    conf = results[0].boxes.conf.cpu().numpy()
    cls_ids = results[0].boxes.cls.cpu().numpy()

    # Update tracker
    outputs, _ = deep_sort_tracker.update(bbox_xywh, conf, cls_ids, frame) #outputs bbox, track id, class

    # Draw results
    if len(outputs) > 0:
      bbox_xyxy = outputs[:, :4] # Bounding Boxes
      identities = outputs[:, -1] # Unique Track IDs
      cls = outputs[:, -2] # Classes

      # Get class names
      names = [idx_to_class[str(label)] for label in cls]
      # Draw bounding boxes
      ori_im = draw_boxes(frame, bbox_xyxy, names, identities)
      # Display the frame
      cv2_imshow(ori_im)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()

## 8.3 Video

### 8.3.1 Run

In [None]:
save_video = True
play_frames = False

cap = cv2.VideoCapture("/content/people.mp4") # Load video

ret, frame = cap.read() # Read first frame

# Retrieve frame dimensions and frames per second (fps)
if save_video:
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter
if save_video:
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4 files
  out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while ret:
  # Perform object detection
  results = model.predict(source=frame, verbose=True)

  # Prepare detections for DeepSORT
  bbox_xywh = results[0].boxes.xywh.cpu().numpy()
  conf = results[0].boxes.conf.cpu().numpy()
  cls_ids = results[0].boxes.cls.cpu().numpy()

  # Update tracker
  outputs, _ = deep_sort_tracker.update(bbox_xywh, conf, cls_ids, frame) #outputs bbox, track id, class

  # Draw results
  if len(outputs) > 0:
    bbox_xyxy = outputs[:, :4] # Bounding Boxes
    identities = outputs[:, -1] # Unique Track IDs
    cls = outputs[:, -2] # Classes

    # Get class names
    names = [idx_to_class[str(label)] for label in cls]
    # Draw bounding boxes
    ori_im = draw_boxes(frame, bbox_xyxy, names, identities)

  if save_video: out.write(ori_im) # Write current frame to output
  if play_frames: cv2_imshow(ori_im) # output current frame

  ret, frame = cap.read() # Read next frame

  if cv2.waitKey(1) & 0xFF == ord('q'):
    break

cap.release()
if save_video: out.release()
cv2.destroyAllWindows()

### 8.3.2 Play Video

In [None]:
play_video(output_video_path)