In [None]:
!pip install ultralytics

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# Step 1: Data Preprocessing
**Extract Frames from Videos:**

Extract frames from videos in the enemy and ally folders at 2 frames per second.
Run YOLOv11 on Each Frame:

Use a pre-trained YOLOv11 model to detect persons in each frame.
Filter detections for people (class ID 0 in YOLO).
Labeling:

Label detections as enemy (class 0) for frames from the enemy videos.
Label detections as ally (class 1) for frames from the ally videos.
Save the frame image and corresponding label in YOLO format (x_center y_center width height).
Organize Data:

Store frames as images in the images/train folder.
Store corresponding labels as .txt files in the labels/train folder.

In [9]:
import os
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO

# Paths to your data on Google Drive
enemy_folder = "/content/drive/MyDrive/Counter Strike/Counter Strike/counter-terrorists"  # Path to enemy folder
ally_folder = "/content/drive/MyDrive/Counter Strike/Counter Strike/terrorists"    # Path to ally folder
output_images_folder = "/content/drive/MyDrive/yolov11_data/images/train"  # Output folder for images
output_labels_folder = "/content/drive/MyDrive/yolov11_data/labels/train"  # Output folder for labels

# Ensure the output labels folder exists
os.makedirs(output_labels_folder, exist_ok=True)
os.makedirs(output_images_folder, exist_ok=True)

# Load YOLOv8 model (pre-trained weights)
model = YOLO("yolov11s.pt")  # You can use a larger model if needed, like yolov11l.pt

# Function to process each frame and label it
def process_and_label_frame(frame, label_class, frame_idx, video_name):
    height, width = frame.shape[:2]

    # Inference: Get the results (detected persons)
    results = model(frame)  # Perform inference on frame

    # Process detections
    detections = results[0].boxes.xywh  # xywh format of bounding boxes
    confidences = results[0].boxes.conf  # Confidence scores
    class_ids = results[0].boxes.cls  # Class IDs for detected objects

    # Ensure the label directory exists before writing label file
    label_file = os.path.join(output_labels_folder, f"{video_name}_frame_{frame_idx}.txt")

    with open(label_file, 'w') as f:
        for i, det in enumerate(detections):
            if class_ids[i] == 0:  # We are only interested in persons (class 0 in COCO)
                # Convert to YOLO format (normalized coordinates)
                x_center = det[0] / width
                y_center = det[1] / height
                w = det[2] / width
                h = det[3] / height

                # Write the label to file (0 for enemy, 1 for ally)
                f.write(f"{label_class} {x_center} {y_center} {w} {h}\n")

    # Save frame (optional)
    output_img_path = os.path.join(output_images_folder, f"{video_name}_frame_{frame_idx}.jpg")
    cv2.imwrite(output_img_path, frame)

# Function to process videos from enemy and ally folders
def process_video(video_path, label_class):
    video_name = os.path.basename(video_path).split('.')[0]

    # Open the video file
    cap = cv2.VideoCapture(video_path)
    frame_rate = cap.get(cv2.CAP_PROP_FPS)  # Get the FPS of the video
    interval = int(frame_rate / 2)  # Capture two frames per second (every 1/(FPS/2) seconds)

    frame_idx = 0
    current_frame = 0  # Initialize frame counter

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Only process every second frame
        if current_frame % interval == 0:
            # Process and label the frame
            process_and_label_frame(frame, label_class, frame_idx, video_name)
            frame_idx += 1

        current_frame += 1

    cap.release()

# Process videos in both enemy and ally folders
def process_videos():
    # Process enemy videos and label persons as "enemy" (class 0)
    for video_name in os.listdir(enemy_folder):
        video_path = os.path.join(enemy_folder, video_name)
        if video_path.endswith(('.mp4', '.avi', '.mov')):
            process_video(video_path, label_class=0)  # Enemy label

    # Process ally videos and label persons as "ally" (class 1)
    for video_name in os.listdir(ally_folder):
        video_path = os.path.join(ally_folder, video_name)
        if video_path.endswith(('.mp4', '.avi', '.mov')):
            process_video(video_path, label_class=1)  # Ally label

# Run the function to process videos
process_videos()



0: 384x640 1 person, 1 bus, 11.6ms
Speed: 1.8ms preprocess, 11.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 train, 10.9ms
Speed: 1.8ms preprocess, 10.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10.9ms
Speed: 2.6ms preprocess, 10.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 13.7ms
Speed: 2.6ms preprocess, 13.7ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 train, 10.9ms
Speed: 3.3ms preprocess, 10.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 car, 10.9ms
Speed: 2.4ms preprocess, 10.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 train, 10.9ms
Speed: 3.4ms preprocess, 10.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10.9ms
Speed: 3.2ms preprocess, 10.9ms inference, 2.6ms

# Step 2: Model Training
**Dataset Configuration:**

Create a YAML file specifying paths to training and validation datasets, class names (['enemy', 'ally']), and number of classes (nc: 2).
Train YOLOv11 Model:

Use a pre-trained YOLOv11 model (e.g., yolov11s.pt).
Train the model on the labeled dataset for a set number of epochs (e.g., 50).
Use parameters like batch_size, imgsz=640, and epochs=50 for training.
Save Weights:

After training, save the model weights (best.pt).


In [8]:
from ultralytics import YOLO

# Load the pre-trained YOLOv8 model (choose small or large model depending on needs)
model = YOLO("yolov11s.pt")  # You can change this to yolov11m.pt or yolov11l.pt for larger models

# Start training the model
model.train(
    data='/content/drive/MyDrive/yolov11_data/data.yaml',  # Path to your dataset.yaml file
    epochs=5,  # Number of epochs, you can increase it
    imgsz=640,  # Image size, should be a multiple of 32 (e.g., 640, 416)
    batch=16,  # Batch size, adjust according to your available GPU
    name='enemy_vs_ally',  # The name for your training run
    exist_ok=True  # If True, it overwrites the existing run folder
)


Ultralytics 8.3.58 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/content/runs/detect/enemy_vs_ally/weights/best.pt, data=/content/drive/MyDrive/yolov8_data/data.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=enemy_vs_ally, exist_ok=True, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, s

[34m[1mtrain: [0mScanning /content/drive/MyDrive/yolov8_data/labels/train.cache... 4621 images, 772 backgrounds, 0 corrupt: 100%|██████████| 4621/4621 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/drive/MyDrive/yolov8_data/labels/train.cache... 4621 images, 772 backgrounds, 0 corrupt: 100%|██████████| 4621/4621 [00:00<?, ?it/s]


Plotting labels to runs/detect/enemy_vs_ally/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/enemy_vs_ally[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5      4.03G     0.9713      1.259      1.151         43        640: 100%|██████████| 289/289 [02:40<00:00,  1.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 145/145 [01:21<00:00,  1.79it/s]


                   all       4621       7539      0.855      0.825      0.904      0.754

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5       4.1G     0.7397     0.7621      1.017         47        640: 100%|██████████| 289/289 [02:32<00:00,  1.89it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 145/145 [01:20<00:00,  1.81it/s]


                   all       4621       7539      0.877      0.838      0.921      0.783

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5      4.13G     0.6971     0.6789     0.9957         50        640: 100%|██████████| 289/289 [02:33<00:00,  1.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 145/145 [01:19<00:00,  1.83it/s]


                   all       4621       7539       0.88      0.828      0.915      0.787

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5       4.1G     0.6551      0.628     0.9784         32        640: 100%|██████████| 289/289 [02:41<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 145/145 [01:22<00:00,  1.76it/s]


                   all       4621       7539      0.876      0.871      0.933      0.816

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5      4.12G     0.6113     0.5808     0.9617         61        640: 100%|██████████| 289/289 [02:36<00:00,  1.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 145/145 [01:21<00:00,  1.78it/s]


                   all       4621       7539      0.901       0.86      0.941      0.836

5 epochs completed in 0.340 hours.
Optimizer stripped from runs/detect/enemy_vs_ally/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/enemy_vs_ally/weights/best.pt, 22.5MB

Validating runs/detect/enemy_vs_ally/weights/best.pt...
Ultralytics 8.3.58 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 11,126,358 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 145/145 [01:20<00:00,  1.80it/s]


                   all       4621       7539      0.901      0.859      0.941      0.837
                 enemy       1320       2264      0.889      0.845      0.938      0.848
                  ally       2529       5275      0.913      0.874      0.944      0.825
Speed: 0.2ms preprocess, 2.6ms inference, 0.0ms loss, 2.5ms postprocess per image
Results saved to [1mruns/detect/enemy_vs_ally[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7bea8db733d0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.04804

In [4]:
# Save the model weights after training
model.save('/content/drive/MyDrive/yolov11_data/best_model.pt')


# Step 3: Inference

Load test videos and process them frame by frame.
Run the trained YOLOv11 model to detect people in each frame.
Draw red bounding boxes for enemies and green bounding boxes for allies.
Label each bounding box with the respective class name and confidence score.
Save Results:

Save the annotated frames into a new video.

In [7]:
import cv2
from ultralytics import YOLO
import os

# Load the trained YOLOv8 model
model = YOLO('/content/runs/detect/enemy_vs_ally/weights/best.pt')

# Function to draw bounding boxes with labels
def draw_bounding_boxes(frame, results):
    height, width = frame.shape[:2]

    # Ensure there are detections before drawing bounding boxes
    if len(results) > 0 and len(results[0].boxes.xyxy) > 0:
        # Loop through the results and draw bounding boxes
        for result in results:
            # Bounding box (xyxy format)
            x1, y1, x2, y2 = result.boxes.xyxy[0]
            class_id = int(result.boxes.cls[0])
            confidence = result.boxes.conf[0]
            label = model.names[class_id]  # Get the class name ('enemy' or 'ally')

            # Draw bounding box: red for enemy, green for ally
            color = (0, 0, 255) if label == 'enemy' else (0, 255, 0)  # Red for enemy, Green for ally

            # Draw rectangle
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)

            # Add label with confidence score
            label_text = f"{label} {confidence:.2f}"
            cv2.putText(frame, label_text, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    return frame

# Function to process a video and save results
def process_video_for_inference(video_path, output_path):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    output_video = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Perform inference on the current frame
        results = model(frame)  # Get predictions (detected objects)

        # Draw bounding boxes and labels
        frame = draw_bounding_boxes(frame, results)

        # Initialize the output video writer if it's not already done
        if output_video is None:
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_video = cv2.VideoWriter(output_path, fourcc, 30.0, (frame.shape[1], frame.shape[0]))

        # Write the frame to the output video
        output_video.write(frame)
        frame_idx += 1

    cap.release()
    output_video.release()

# Path to your test clips folder
test_clips_folder = '/content/drive/MyDrive/Counter Strike/Counter Strike/test'  # Replace with the path to your test clips folder
output_folder = '/content/drive/MyDrive/Counter Strike/Counter Strike/outputs'  # Replace with the path where you want to save the output

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Process all videos in the test clips folder
for video_name in os.listdir(test_clips_folder):
    video_path = os.path.join(test_clips_folder, video_name)
    if video_path.endswith(('.mp4', '.avi', '.mov')):  # Process only video files
        output_video_path = os.path.join(output_folder, f"output_{os.path.splitext(video_name)[0]}.mp4")

        # Process the video and save the output with bounding boxes
        process_video_for_inference(video_path, output_video_path)
        print(f"Processed video saved to: {output_video_path}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 384x640 1 ally, 12.6ms
Speed: 2.4ms preprocess, 12.6ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ally, 10.8ms
Speed: 2.1ms preprocess, 10.8ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 enemy, 12.7ms
Speed: 2.3ms preprocess, 12.7ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 enemy, 16.5ms
Speed: 2.4ms preprocess, 16.5ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 enemy, 1 ally, 16.0ms
Speed: 2.3ms preprocess, 16.0ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 enemy, 1 ally, 10.9ms
Speed: 2.2ms preprocess, 10.9ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 10.8ms
Speed: 2.7ms preprocess, 10.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ally, 10.9ms
Speed: 2.6m