In [1]:
# 1. Import necessary libraries
from ultralytics import YOLO # Here we import YOLO
import yaml                  # for yaml files
import torch
from PIL import Image
import os
import cv2
import time

In [None]:
# Extract & convert

import os
import cv2
import json

def extract_frames(video_path, output_folder, no_labels=False):
    cap = cv2.VideoCapture(video_path)
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f'frame_{count:04d}.jpg')
        cv2.imwrite(frame_path, frame)
        count += 1
    cap.release()
    return count

def convert_bbox_format(bbox, img_width, img_height):
    x_min, y_min, width, height = bbox
    x_center = (x_min + width / 2) / img_width
    y_center = (y_min + height / 2) / img_height
    width /= img_width
    height /= img_height
    return x_center, y_center, width, height

def convert_labels_json(json_path, frame_folder, class_mapping):
    with open(json_path, 'r') as f:
        data = json.load(f)

    categories = {cat['id']: class_mapping[cat['name']] for cat in data['categories']}
    images = {img['id']: img for img in data['images']}
    annotations = data['annotations']

    for ann in annotations:
        image_info = images[ann['image_id']]
        frame_index = image_info['id']  # Assuming image_id corresponds to frame index
        frame_file = os.path.join(frame_folder, f'frame_{frame_index:04d}.jpg')
        if not os.path.exists(frame_file):
            continue

        img_width = image_info['width']
        img_height = image_info['height']
        bbox = ann['bbox']
        class_id = categories[ann['category_id']]  # YOLO class IDs start from 0

        x_center, y_center, width, height = convert_bbox_format(bbox, img_width, img_height)

        yolo_label = f"{class_id} {x_center} {y_center} {width} {height}\n"

        output_label_file = frame_file.replace('.jpg', '.txt')
        with open(output_label_file, 'w') as label_file:
            label_file.write(yolo_label)

base_path = './Gun_Action_Recognition_Dataset'
categories = ['Handgun', 'Machine_Gun', 'No_Gun']
class_ids = {'Handgun': 0, 'Machine_Gun': 1, 'No_Gun': 2}

for category in categories:
    category_path = os.path.join(base_path, category)
    for subdir in os.listdir(category_path):
        subdir_path = os.path.join(category_path, subdir)
        if os.path.isdir(subdir_path):  # Check if the subdir_path is a directory
            video_path = os.path.join(subdir_path, "video.mp4")
            frame_folder = os.path.join(subdir_path, 'frames')
            os.makedirs(frame_folder, exist_ok=True)

            print(f"Processing video: {video_path}")
            print(f"Frame folder: {frame_folder}")

            # Estrarre i frame
            extract_frames(video_path, frame_folder, no_labels=(category == 'No_Gun'))

            # Convertire le annotazioni solo se non è 'No_Gun'
            if category != 'No_Gun':
                json_path = os.path.join(subdir_path, 'label.json')
                convert_labels_json(json_path, frame_folder, class_ids)
        else:
            print(f"!!!!!!!!!!!!!!!!Skipping non-directory item: {subdir_path}")


In [2]:
import yaml

data = {'train': './train',
    'val': './val',
    'test': './test',
    'nc': 2,  # Numero di classi
    'names': ['Handgun', 'Machine_Gun']
}

# Write odometer_dataset.yaml file
with open('guns_dataset.yaml', 'w') as file:
    yaml.dump(data, file)

In [3]:
import os
import shutil
import random
from collections import Counter, defaultdict


def create_dirs(base_path):
    for dir_name in ['train', 'val', 'test']:
        os.makedirs(os.path.join(base_path, dir_name, 'images'), exist_ok=True)
        os.makedirs(os.path.join(base_path, dir_name, 'labels'), exist_ok=True)


def copy_files(src, dst, files):
    for f in files:
        src_file = os.path.join(src, f)
        dst_file = os.path.join(dst, f)
        shutil.copy(src_file, dst_file)


def split_dataset(base_path, split_ratios=(0.7, 0.2, 0.1)):
    assert round(sum(split_ratios), 10) == 1, "The split ratios must sum to 1."
    create_dirs(base_path)

    categories = ['Handgun', 'Machine_Gun', 'No_Gun']
    all_data = []

    for category in categories:
        category_path = os.path.join(base_path, category)
        subfolders = [f for f in os.listdir(category_path) if os.path.isdir(os.path.join(category_path, f))]

        for subfolder in subfolders:
            info = subfolder.split('_')
            camera = info[1]
            place = info[2]
            subject = info[3]
            brightness = info[4]

            frames_path = os.path.join(category_path, subfolder, 'frames')
            if not os.path.exists(frames_path):
                continue

            images = [f for f in os.listdir(frames_path) if f.endswith('.jpg')]
            labels = [f for f in os.listdir(frames_path) if f.endswith('.txt')]

            all_data.append({
                'category': category,
                'folder': subfolder,
                'frames': frames_path,
                'images': images,
                'labels': labels,
                'subject': subject,
                'brightness': brightness,
                'camera': camera,
                'place': place
            })

    random.shuffle(all_data)

    def split_data_by_feature(data, split_ratios):
        feature_groups = defaultdict(list)
        for item in data:

            # Scelgo le key feature su cui voglio effettuare la suddivisione
            key = (item['category'], item['place'], item['subject'])

            # key = (item['category'], item['place'], item['subject'], item['brightness'], item['camera'])

            # key = (item['camera'])

            # key = ()

            feature_groups[key].append(item)

        train_data, val_data, test_data = [], [], []

        for key, items in feature_groups.items():
            total_items = len(items)
            train_size = int(total_items * split_ratios[0])
            val_size = int(total_items * split_ratios[1])

            train_data.extend(items[:train_size])
            val_data.extend(items[train_size:train_size + val_size])
            test_data.extend(items[train_size + val_size:])

        return train_data, val_data, test_data

    train_data, val_data, test_data = split_data_by_feature(all_data, split_ratios)

    splits = {
        'train': train_data,
        'val': val_data,
        'test': test_data
    }

    for split, data in splits.items():
        split_images_path = os.path.join(base_path, split, 'images')
        split_labels_path = os.path.join(base_path, split, 'labels')
        os.makedirs(split_images_path, exist_ok=True)
        os.makedirs(split_labels_path, exist_ok=True)

        for item in data:
            category = item['category']
            folder_name = item['folder']
            frames_path = item['frames']
            images = item['images']
            labels = item['labels']

            print(frames_path)

            for img in images:
                frame_number = img.split('.')[0].split('_')[-1]
                new_image_name = f"{category}_{folder_name}_frame_{frame_number}.jpg"
                src_img = os.path.join(frames_path, img)
                dst_img = os.path.join(split_images_path, new_image_name)
                shutil.copy(src_img, dst_img)

            for lbl in labels:
                frame_number = lbl.split('.')[0].split('_')[-1]
                new_label_name = f"{category}_{folder_name}_frame_{frame_number}.txt"
                src_lbl = os.path.join(frames_path, lbl)
                dst_lbl = os.path.join(split_labels_path, new_label_name)
                shutil.copy(src_lbl, dst_lbl)

    print("Data split complete.")
    return splits

def check_frames_in_same_split(splits):
    folder_to_split = {}

    for split, split_data in splits.items():
        for item in split_data:
            folder = item['folder']
            if folder in folder_to_split:
                print(f"Error: Folder {folder} is present in both {folder_to_split[folder]} and {split}")
            else:
                folder_to_split[folder] = split

    print("Verification complete - every folder is assigned to one split.")
    # for folder, split in folder_to_split.items():
    #    print(f"Folder {folder} is correctly assigned to split {split}")


def check_balance(splits, feature):
    total_counts = Counter()
    split_counts = {'train': Counter(), 'val': Counter(), 'test': Counter()}

    for split, data in splits.items():
        for item in data:
            key = item[feature]
            total_counts[key] += len(item['images'])
            split_counts[split][key] += len(item['images'])

    print(f"\nBalancing based on {feature}:")
    total = sum(total_counts.values())
    print(
        f"{'Feature':<20} {'Total':<10} {'Train':<10} {'Val':<10} {'Test':<10} {'Train %':<10} {'Val %':<10} {'Test %':<10}")
    print("-" * 95)
    for key in sorted(total_counts.keys()):
        train_count = split_counts['train'][key]
        val_count = split_counts['val'][key]
        test_count = split_counts['test'][key]
        train_percent = (train_count / total_counts[key]) * 100 if total_counts[key] > 0 else 0
        val_percent = (val_count / total_counts[key]) * 100 if total_counts[key] > 0 else 0
        test_percent = (test_count / total_counts[key]) * 100 if total_counts[key] > 0 else 0
        print(
            f"{key:<20} {total_counts[key]:<10} {train_count:<10} {val_count:<10} {test_count:<10} {train_percent:<10.2f} {val_percent:<10.2f} {test_percent:<10.2f}")

In [8]:
# Utilizzo del codice
base_path = './Gun_Action_Recognition_Dataset'
split_ratios = (0.7, 0.2, 0.1)

splits = split_dataset(base_path, split_ratios)

./Gun_Action_Recognition_Dataset/Handgun/PCH2_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/Handgun/PCH1_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH8_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH2_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/Handgun/PCH3_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH6_C1_P1_V1_HB_2/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH5_C1_P1_V1_HB_2/frames
./Gun_Action_Recognition_Dataset/Handgun/PCH5_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH2_C1_P1_V1_HB_4/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH7_C1_P1_V1_HB_2/frames
./Gun_Action_Recognition_Dataset/Handgun/PAH1_C1_P1_V1_HB_4/frames
./Gun_Action_Recognition_Dataset/No_Gun/N12_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/No_Gun/N6_C1_P1_V1_HB_2/frames
./Gun_Action_Recognition_Dataset/No_Gun/N2_C1_P1_V1_HB_1/frames
./Gun_Action_Recognition_Dataset/No_Gun/N11_C1_P1_V1_HB_1/frames
./Gun

In [4]:
# Scelgo di quali categorie voglio visualizzare il bilanciamento
check_frames_in_same_split(splits)
check_balance(splits, 'category')
check_balance(splits, 'place')
check_balance(splits, 'subject')
# check_balance(splits, 'brightness')
# check_balance(splits, 'camera')

NameError: name 'splits' is not defined

In [7]:
yaml_filename = 'guns_dataset.yaml'

In [11]:
def check_dataset_structure(base_path):
    splits = ['train', 'val', 'test']
    for split in splits:
        images_dir = os.path.join(base_path, split, 'images')
        labels_dir = os.path.join(base_path, split, 'labels')

        if not os.path.exists(images_dir) or not os.path.exists(labels_dir):
            print(f"Error: {images_dir} or {labels_dir} does not exist.")
            return False
        
        images = set(os.listdir(images_dir))
        labels = set(os.listdir(labels_dir))
        
        if len(images) == 0 or len(labels) == 0:
            print(f"Error: {split} set is empty.")
            return False
        
        image_names = set(os.path.splitext(img)[0] for img in images)
        label_names = set(os.path.splitext(lbl)[0] for lbl in labels)

        # Find images that have corresponding labels
        images_with_labels = image_names & label_names
        # Find images without labels (which is acceptable in this dataset)
        images_without_labels = image_names - label_names
        # Find labels without corresponding images (which is an error)
        labels_without_images = label_names - image_names

        if labels_without_images:
            print(f"Error: There are labels without corresponding images in {split} set.")
            print(f"Labels without images: {sorted(labels_without_images)}")
            return False
        
        print(f"{split.capitalize()} set is correctly structured.")
        print(f"Total images: {len(images)} (with labels: {len(images_with_labels)}, without labels: {len(images_without_labels)})")
        print(f"Total labels: {len(labels)}")
        print("-------------------------------------------------------------")

    return True

# Verifica la struttura del dataset
dataset_base_path = '/Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset'
if check_dataset_structure(dataset_base_path):
    print("Dataset structure is correct.")
else:
    print("Dataset structure has issues.")

Train set is correctly structured.
Total images: 66795 (with labels: 28711, without labels: 38084)
Total labels: 28711
-------------------------------------------------------------
Val set is correctly structured.
Total images: 17165 (with labels: 7360, without labels: 9805)
Total labels: 7360
-------------------------------------------------------------
Test set is correctly structured.
Total images: 15755 (with labels: 6859, without labels: 8896)
Total labels: 6859
-------------------------------------------------------------
Dataset structure is correct.


In [8]:
# GPU

# Funzione personalizzata per il training
def custom_train(model, yaml_filename, epochs=10, batch_size=16, imgsz=480, device = "mps"):
    problematic_batches_dir = os.path.join(base_path, 'problematic_batches')
    os.makedirs(problematic_batches_dir, exist_ok=True)
    
    try:
        # Esegui il training per il numero totale di epoche specificato
        model.train(data=yaml_filename, epochs=epochs, batch=batch_size, device = device, imgsz=imgsz, verbose=True)
    except RuntimeError as e:
        if 'negative dimension' in str(e):
            print(f"Error during training: {e}")
            print("Checking the problematic batch...")

            # Verifica ogni file del batch problematico
            image_folder = os.path.join(base_path, 'train', 'images')
            label_folder = os.path.join(base_path, 'train', 'labels')

            problematic_images = os.listdir(image_folder)[:batch_size]  # Prende un batch di immagini

            # Crea una cartella per salvare le immagini annotate
            save_dir = os.path.join(problematic_batches_dir, 'batch_1')
            os.makedirs(save_dir, exist_ok=True)

            for image_file in problematic_images:
                image_path = os.path.join(image_folder, image_file)
                label_path = os.path.join(label_folder, image_file.replace('.jpg', '.txt'))

                # Copia l'immagine e il file di etichetta nella cartella dei batch problematici
                shutil.copy(image_path, save_dir)
                shutil.copy(label_path, save_dir)

                verify_and_fix_image_and_label(image_path, label_path)
                display_and_save_image_with_annotations(image_path, label_path, save_dir)

            print(f"Saved problematic batch to {save_dir}.")
        else:
            raise e

# Esegui il training personalizzato
custom_train(model, yaml_filename, epochs=10, batch_size=16, imgsz=640, device = "mps")

Ultralytics YOLOv8.2.28 🚀 Python-3.12.3 torch-2.3.0 MPS (Apple M1 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=guns_dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train12, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True,

[34m[1mtrain: [0mScanning /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/train/labels.cache... 66095 images, 0 backgrounds, 0 corrupt: 100%|██████████| 66095/66095 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/val/labels.cache... 17965 images, 0 backgrounds, 0 corrupt: 100%|██████████| 17965/17965 [00:00<?, ?it/s]


Plotting labels to runs/detect/train12/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train12[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      2.484      11.39      1.727          9        640:   4%|▍         | 166/4131 [04:59<1:59:24,  1.81s/it]


Error during training: Trying to create tensor with negative dimension -1: [16, -1, 5]
Checking the problematic batch...
Saved annotated image to /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/problematic_batches/batch_1/Handgun_PAH9_C2_P3_V2_HB_2_frame_0090.jpg
Saved annotated image to /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/problematic_batches/batch_1/Machine_Gun_PCW1_C2_P3_V2_HB_2_frame_0143.jpg
Saved annotated image to /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/problematic_batches/batch_1/Machine_Gun_PAW1_C2_P3_V1_HB_2_frame_0276.jpg
Saved annotated image to /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/problematic_batches/batch_1/Machine_Gun_PAW4_C1_P1_V1_HB_2_frame_0151.jpg
Saved annotated image to /Users/andreavisi/Desk

In [8]:
# CPU

# Inizializza il modello YOLO
model = YOLO('basic_models/yolov8n.pt')

# Funzione personalizzata per il training
def custom_train(model, yaml_filename, epochs=10, batch_size=16, imgsz=480):
    problematic_batches_dir = os.path.join(base_path, 'problematic_batches')
    os.makedirs(problematic_batches_dir, exist_ok=True)
    
    try:
        # Esegui il training per il numero totale di epoche specificato
        model.train(data=yaml_filename, epochs=epochs, batch=batch_size, imgsz=imgsz, verbose=True)
    except RuntimeError as e:
        if 'negative dimension' in str(e):
            print(f"Error during training: {e}")
            print("Checking the problematic batch...")

            # Verifica ogni file del batch problematico
            image_folder = os.path.join(base_path, 'train', 'images')
            label_folder = os.path.join(base_path, 'train', 'labels')

            problematic_images = os.listdir(image_folder)[:batch_size]  # Prende un batch di immagini

            # Crea una cartella per salvare le immagini annotate
            save_dir = os.path.join(problematic_batches_dir, 'batch_1')
            os.makedirs(save_dir, exist_ok=True)

            for image_file in problematic_images:
                image_path = os.path.join(image_folder, image_file)
                label_path = os.path.join(label_folder, image_file.replace('.jpg', '.txt'))

                # Copia l'immagine e il file di etichetta nella cartella dei batch problematici
                shutil.copy(image_path, save_dir)
                shutil.copy(label_path, save_dir)

                verify_and_fix_image_and_label(image_path, label_path)
                display_and_save_image_with_annotations(image_path, label_path, save_dir)

            print(f"Saved problematic batch to {save_dir}.")
        else:
            raise e

# Esegui il training personalizzato
custom_train(model, yaml_filename, epochs=10, batch_size=16, imgsz=640)

Ultralytics YOLOv8.2.28 🚀 Python-3.12.3 torch-2.3.0 CPU (Apple M1 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=guns_dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train7, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True,

[34m[1mtrain: [0mScanning /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/train/labels... 28711 images, 38084 backgrounds, 0 corrupt: 100%|██████████| 66795/66795 [00:19<00:00, 3423.99it/s]


[34m[1mtrain: [0mNew cache created: /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/train/labels.cache


[34m[1mval: [0mScanning /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/val/labels... 7360 images, 9805 backgrounds, 0 corrupt: 100%|██████████| 17165/17165 [00:04<00:00, 3588.40it/s]


[34m[1mval: [0mNew cache created: /Users/andreavisi/Desktop/PYTHON/Computer Vision e Deep Learning 2024/PROGETTO/Gun_Action_Recognition_Dataset/val/labels.cache
Plotting labels to runs/detect/train7/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train7[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G       3.32      16.58        2.3          8        640:   0%|          | 14/4175 [02:41<13:21:41, 11.56s/it]


KeyboardInterrupt: 