<a href="https://colab.research.google.com/github/Delta-K-rist/kai-ai-model/blob/main/crowd_counting_yolov11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Repo Git

In [None]:
# Clone own repo

from google.colab import userdata
import os

github_username = 'Delta-K-rist'
github_repo_name = 'kai-ai-model'

github_token = userdata.get('GITHUB_TOKEN_DELTA')

authenticated_github_url = f'https://{github_username}:{github_token}@github.com/{github_username}/{github_repo_name}.git'

!git clone {authenticated_github_url}

Cloning into 'kai-ai-model'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 49 (delta 2), reused 8 (delta 2), pack-reused 38 (from 3)[K
Receiving objects: 100% (49/49), 241.15 MiB | 22.01 MiB/s, done.
Resolving deltas: 100% (2/2), done.


# Installations

In [None]:
!pip install -q ultralytics opencv-python-headless

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m49.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q ultralytics "lap>=0.5.12"

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━[0m [32m1.2/1.7 MB[0m [31m36.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25h

# Model Initialization (YOLOv11.m)

In [None]:
# -----------------------------------------------------------------------------
# STEP 1.2: IMPORTS AND THE `initialize_model` FUNCTION
# -----------------------------------------------------------------------------
import torch
from ultralytics import YOLO

def initialize_model():
    """
    Loads and initializes the YOLOv11 detection model.
    The model object itself contains the tracking capabilities.

    Returns:
        A dictionary containing the initialized 'yolo' model.
    """
    print("🧠 Initializing YOLOv11 model...")

    # Load the YOLOv11 small model
    # The model weights are downloaded automatically on the first run
    yolo_model = YOLO('yolo11m.pt')

    # Move model to GPU if available, otherwise CPU
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    yolo_model.to(device)
    print(f"YOLOv11 model loaded on {device}.")

    # We return it in a dictionary to keep our code structure consistent
    # and easy to extend later if needed.
    models = {
        'yolo': yolo_model
    }

    return models

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
# -----------------------------------------------------------------------------
# STEP 1.3: TEST THE INITIALIZATION
# -----------------------------------------------------------------------------
# Call the function to get the initialized model
ai_models = initialize_model()

# Print the model's device to confirm it's loaded correctly
# This should show 'cuda' if you're on a GPU environment, or 'cpu'
print("\n🔍 Model loaded successfully and is running on:", ai_models['yolo'].device)

🧠 Initializing YOLOv11 model...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt': 100% ━━━━━━━━━━━━ 38.8MB 183.0MB/s 0.2s
YOLOv11 model loaded on cuda.

🔍 Model loaded successfully and is running on: cuda:0


# Detection & Tracker

In [None]:
# -----------------------------------------------------------------------------
# STEP 2.1 (UPDATED): THE CORE VIDEO PROCESSING FUNCTION
# -----------------------------------------------------------------------------
import cv2
import numpy as np
import os

def process_video_with_tracker(video_path, model, output_dir):
    """
    Processes a video, saves an annotated version with inference speed,
    and collects performance data.

    Returns:
        A tuple containing:
        - A list of person counts for each frame.
        - The annotated frame with the highest person count.
        - A list of inference times (in ms) for each frame.
    """
    print(f"📹 Starting video processing and annotation for: {video_path}")

    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    cap.release()

    output_video_path = os.path.join(output_dir, "output_video.mp4")
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    results_generator = model.track(source=video_path, persist=True, tracker="bytetrack.yaml", classes=0, stream=True, verbose=False)

    # --- NEW: List to store inference times ---
    inference_times = []

    frame_by_frame_counts = []
    max_person_count = -1
    annotated_max_frame = None
    original_max_frame = None

    confidences_at_max = []

    for frame_results in results_generator:
        annotated_frame = frame_results.plot(
          line_width=1,  # Thinner bounding box
          font_size=0.4  # Smaller font size
        )

        # --- NEW: Get inference time and calculate FPS ---
        # The 'speed' attribute is a dict: {'preprocess': ms, 'inference': ms, 'postprocess': ms}
        inference_time_ms = frame_results.speed['inference']
        inference_times.append(inference_time_ms)

        writer.write(annotated_frame)

        if frame_results.boxes is not None and frame_results.boxes.id is not None:
            current_count = len(frame_results.boxes.id)
        else:
            current_count = 0

        frame_by_frame_counts.append(current_count)

        if current_count > max_person_count:
            max_person_count = current_count
            annotated_max_frame = annotated_frame.copy()
            original_max_frame = frame_results.orig_img.copy()

            if frame_results.boxes.conf is not None:
                confidences_at_max = frame_results.boxes.conf.tolist()

    writer.release()
    print(f"✅ Annotated video saved to {output_video_path}")

    # --- NEW: Return the list of inference times ---
    return frame_by_frame_counts, annotated_max_frame, original_max_frame, inference_times, confidences_at_max

In [None]:
# -----------------------------------------------------------------------------
# STEP 2.2 (UPDATED): THE SAVE RESULTS FUNCTION
# -----------------------------------------------------------------------------
import os
import json
import cv2

def save_analysis_summary(output_dir, counts, annotated_snapshot, original_snapshot, inference_times, video_basename, confidences_at_max):
    """
    Calculates final metrics and saves the output files, including both snapshot versions.
    """
    print(f"💾 Saving analysis to: {output_dir}")

    if video_basename == 'd_1':
        gerbong_id = 'gerbong_3'
    elif video_basename == 'm_1':
        gerbong_id = 'gerbong_2'
    elif video_basename == 's_2':
        gerbong_id = 'gerbong_1'
    else:
        gerbong_id = 'gerbong_unknown' # Default case

    avg_confidence = 0
    if confidences_at_max:
        avg_confidence = sum(confidences_at_max) / len(confidences_at_max)

    # ... (all the calculation logic remains the same) ...
    human_count = int(max(counts)) if counts else 0
    MEDIUM_THRESHOLD = 15
    DANGEROUS_THRESHOLD = 35
    crowdness_level = "Low Density"
    if human_count >= DANGEROUS_THRESHOLD:
        crowdness_level = "High Density"
    elif human_count >= MEDIUM_THRESHOLD:
        crowdness_level = "Medium Density"
    total_inference_seconds = sum(inference_times) / 1000.0
    avg_inference_ms = sum(inference_times) / len(inference_times) if inference_times else 0

    # --- MODIFIED: Save both snapshot images with new names ---
    if annotated_snapshot is not None:
        text = f"Human Count: {human_count} ({crowdness_level})"
        cv2.putText(annotated_snapshot, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2, cv2.LINE_AA)
        snapshot_path = os.path.join(output_dir, 'snapshot_annotated.jpg')
        cv2.imwrite(snapshot_path, annotated_snapshot)
        print(f"📸 Annotated snapshot saved to {snapshot_path}")

    if original_snapshot is not None:
        snapshot_path_orig = os.path.join(output_dir, 'snapshot_original.jpg')
        cv2.imwrite(snapshot_path_orig, original_snapshot)
        print(f"📸 Original snapshot saved to {snapshot_path_orig}")

    # ... (JSON saving logic remains the same) ...
    results_data = {
        'gerbong_id': gerbong_id,
        'max_human_count': human_count,
        'confidence_score': round(avg_confidence, 2),
        'crowdness_level': crowdness_level,
        'performance': {
            'total_inference_seconds': round(total_inference_seconds, 2),
            'average_inference_ms': round(avg_inference_ms, 2),
            'average_fps': round(1000 / avg_inference_ms, 1) if avg_inference_ms > 0 else 'inf'
        },
        'frame_count_data': counts
    }
    json_path = os.path.join(output_dir, 'results.json')
    with open(json_path, 'w') as f:
        json.dump(results_data, f, indent=4)
    print(f"📄 JSON summary saved to {json_path}")

# Preprocessing

In [None]:
import cv2
import os

def preprocess_video(input_path, output_path, target_fps=15, target_width=1280):
    """
    Creates an optimized version of a video for faster model processing.

    Args:
        input_path (str): Path to the original video file.
        output_path (str): Path to save the new, preprocessed video.
        target_fps (int): The desired frames per second.
        target_width (int): The desired frame width. Height is scaled automatically.

    Returns:
        str: The path to the newly created video file.
    """
    print(f"🔧 Starting preprocessing for {input_path}...")

    # 1. Open the original video
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return None

    # 2. Get original video properties
    original_fps = cap.get(cv2.CAP_PROP_FPS)
    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # 3. Calculate resampling and resizing parameters
    skip_interval = max(1, round(original_fps / target_fps))
    aspect_ratio = original_height / original_width
    target_height = int(target_width * aspect_ratio)

    print(f"Original: {original_width}x{original_height} @ {original_fps:.2f} FPS")
    print(f"Target:   {target_width}x{target_height} @ {target_fps} FPS (keeping 1 in every {skip_interval} frames)")

    # 4. Initialize the Video Writer for the new video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    writer = cv2.VideoWriter(output_path, fourcc, target_fps, (target_width, target_height))

    # 5. Loop, Resample, Resize, and Write
    frame_number = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break # End of video

        # Only process a frame if it's at the correct interval
        if frame_number % skip_interval == 0:
            # Resize the frame
            resized_frame = cv2.resize(frame, (target_width, target_height))
            # Write the resized frame to the new video
            writer.write(resized_frame)

        frame_number += 1

    # 6. Finalize and clean up
    cap.release()
    writer.release()

    print(f"✅ Preprocessing complete. Optimized video saved to: {output_path}")
    return output_path

# Run

In [None]:
import os
import glob
from IPython.display import Image, display, Video
import json

INPUT_FOLDER = '/content/kai-ai-model/dataset/test/captured/new'
BASE_OUTPUT_FOLDER = '/content/kai-ai-model/dataset/test result'
os.makedirs(BASE_OUTPUT_FOLDER, exist_ok=True)

mp4_files = glob.glob(os.path.join(INPUT_FOLDER, '*.mp4')) + glob.glob(os.path.join(INPUT_FOLDER, '*.MP4'))
mov_files = glob.glob(os.path.join(INPUT_FOLDER, '*.mov')) + glob.glob(os.path.join(INPUT_FOLDER, '*.MOV'))
video_files = sorted(list(set(mp4_files + mov_files)))
print(f"Found {len(video_files)} videos to process.")

print("\n--- INITIALIZING MODEL (ONCE) ---")
models = initialize_model()

for original_video_path in video_files:
    print(f"\n{'='*50}\n🎬 PROCESSING VIDEO: {os.path.basename(original_video_path)}\n{'='*50}")

    video_basename = os.path.splitext(os.path.basename(original_video_path))[0]
    final_output_dir = os.path.join(BASE_OUTPUT_FOLDER, video_basename)
    os.makedirs(final_output_dir, exist_ok=True)

    preprocessed_video_path = os.path.join(final_output_dir, 'preprocessed.mp4')

    actual_video_to_process = preprocess_video(original_video_path, preprocessed_video_path)

    if actual_video_to_process:
        # --- MODIFIED: Unpack the new confidences_at_max list ---
        counts, annotated_snap, original_snap, times, confidences_at_max = process_video_with_tracker(
            actual_video_to_process,
            models['yolo'],
            final_output_dir
        )

        # --- MODIFIED: Pass the new arguments to the save function ---
        save_analysis_summary(
            final_output_dir,
            counts,
            annotated_snap,
            original_snap,
            times,
            video_basename,
            confidences_at_max
        )
        print(f"✅ SUCCESSFULLY PROCESSED: {os.path.basename(original_video_path)}")
    else:
        print(f"❌ FAILED to preprocess: {os.path.basename(original_video_path)}")

print(f"\n\n{'='*50}\n🎉 BATCH PROCESSING COMPLETE! 🎉\n{'='*50}")

Found 9 videos to process.

--- INITIALIZING MODEL (ONCE) ---
🧠 Initializing YOLOv11 model...
YOLOv11 model loaded on cuda.

🎬 PROCESSING VIDEO: d_1.mp4
🔧 Starting preprocessing for /content/kai-ai-model/dataset/test/captured/new/d_1.mp4...
Original: 1280x720 @ 30.00 FPS
Target:   1280x720 @ 15 FPS (keeping 1 in every 2 frames)
✅ Preprocessing complete. Optimized video saved to: /content/kai-ai-model/dataset/test result/d_1/preprocessed.mp4
📹 Starting video processing and annotation for: /content/kai-ai-model/dataset/test result/d_1/preprocessed.mp4
✅ Annotated video saved to /content/kai-ai-model/dataset/test result/d_1/output_video.mp4
💾 Saving analysis to: /content/kai-ai-model/dataset/test result/d_1
📸 Annotated snapshot saved to /content/kai-ai-model/dataset/test result/d_1/snapshot_annotated.jpg
📸 Original snapshot saved to /content/kai-ai-model/dataset/test result/d_1/snapshot_original.jpg
📄 JSON summary saved to /content/kai-ai-model/dataset/test result/d_1/results.json
✅ SUCCE

In [None]:
# The '-r' flag means 'recursive' to include all subfolders
!zip -r /content/results.zip "/content/kai-ai-model/dataset/test result"

  adding: content/kai-ai-model/dataset/test result/ (stored 0%)
  adding: content/kai-ai-model/dataset/test result/d_1/ (stored 0%)
  adding: content/kai-ai-model/dataset/test result/d_1/results.json (deflated 91%)
  adding: content/kai-ai-model/dataset/test result/d_1/output_video.mp4 (deflated 2%)
  adding: content/kai-ai-model/dataset/test result/d_1/preprocessed.mp4 (deflated 0%)
  adding: content/kai-ai-model/dataset/test result/d_1/snapshot_annotated.jpg (deflated 1%)
  adding: content/kai-ai-model/dataset/test result/d_1/snapshot_original.jpg (deflated 0%)
  adding: content/kai-ai-model/dataset/test result/m_3/ (stored 0%)
  adding: content/kai-ai-model/dataset/test result/m_3/results.json (deflated 89%)
  adding: content/kai-ai-model/dataset/test result/m_3/output_video.mp4 (deflated 2%)
  adding: content/kai-ai-model/dataset/test result/m_3/preprocessed.mp4 (deflated 0%)
  adding: content/kai-ai-model/dataset/test result/m_3/snapshot_annotated.jpg (deflated 1%)
  adding: conte

# Dump

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11m.pt")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt': 100% ━━━━━━━━━━━━ 38.8MB 86.5MB/s 0.4s


In [None]:
# Perform object detection on video
results = model("/content/kai-ai-model/dataset/captured/vid_1.mp4", save=True)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/351) /content/kai-ai-model/dataset/captured/vid_1.mp4: 384x640 13 persons, 2044.9ms
video 1/1 (frame 2/351) /content/kai-ai-model/dataset/captured/vid_1.mp4: 384x640 13 persons, 15.4ms
video 1/1 (frame 3/351) /content/kai-ai-model/dataset/captured/vid_1.mp4: 384x640 12 persons, 1 backpack, 11.8ms
video 1/1 (frame 4/351) /content/kai-ai-model/dataset/captured/vid_1.mp4: 384x640 13 persons, 1 backpack, 11.8ms
video 1/1 (frame 5/351) /content/kai-a

In [None]:
results = model("/content/p.mp4",classes=0, save=True)



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/188) /content/p.mp4: 384x640 10 persons, 24.4ms
video 1/1 (frame 2/188) /content/p.mp4: 384x640 10 persons, 24.3ms
video 1/1 (frame 3/188) /content/p.mp4: 384x640 10 persons, 24.3ms
video 1/1 (frame 4/188) /content/p.mp4: 384x640 10 persons, 24.4ms
video 1/1 (frame 5/188) /content/p.mp4: 384x640 10 persons, 24.3ms
video 1/1 (frame 6/188) /content/p.mp4: 384x640 10 persons, 24.3ms
video 1/1 (frame 7/188) /content/p.mp4: 384x640 10 persons, 24.3ms
video 1/1 (frame 8/188) /content/p.mp4: 384x640 10 persons, 24.3ms
video 1/1 (frame 9/188) /content/p.m

# Track

In [None]:
#Bot Sort
results = model.track(source="/content/p.mp4", persist=True, save=True)

[31m[1mrequirements:[0m Ultralytics requirement ['lapx>=0.5.2'] not found, attempting AutoUpdate...
Collecting lapx>=0.5.2
  Downloading lapx-0.5.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.1 kB)
Downloading lapx-0.5.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 118.2 MB/s eta 0:00:00
Installing collected packages: lapx
Successfully installed lapx-0.5.11

[31m[1mrequirements:[0m AutoUpdate success ✅ 3.4s, installed 1 package: ['lapx>=0.5.2']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m



errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxe

In [None]:
# Byte Track
results = model.track("/content/p.mp4", persist=True, show=True, classes=0, tracker="bytetrack.yaml")