# **Dataset Preprocessing: COCO to YOLO Conversion and Train/Validation Organization**

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive/output

/content/drive/MyDrive/output


In [None]:
base_dir='/content/drive/MyDrive/gun_dataset'

In [None]:
frames_base_dir = '/content/drive/MyDrive/preprocessing'

In [None]:
import os
import json
import cv2

# **Converting COCO to YOLO Format and Extracting Frames from Videos**


In [None]:
category_to_class_id = {
    'handgun': 0,     # Assuming subfolder named 'handgun'
    'machine_gun': 1  # Assuming subfolder named 'machine_gun'
}

# Ensure the frames base directory exists
if not os.path.exists(frames_base_dir):
    os.makedirs(frames_base_dir)

# Function to convert bbox from COCO to YOLO format (x_center, y_center, width, height)
def convert_to_yolo_format(bbox, img_width, img_height):
    x_center = (bbox[0] + bbox[2] / 2) / img_width
    y_center = (bbox[1] + bbox[3] / 2) / img_height
    width = bbox[2] / img_width
    height = bbox[3] / img_height
    return x_center, y_center, width, height

# Function to extract frames and save annotations in YOLO format
def extract_frames_and_annotations(video_path, labels_path, frames_dir, class_id):
    # Load annotations
    with open(labels_path, 'r') as f:
        annotations = json.load(f)['annotations']

    cap = cv2.VideoCapture(video_path)
    frame_id = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_path = os.path.join(frames_dir, f'frame_{frame_id}.jpg')
        cv2.imwrite(frame_path, frame)

        # Save YOLO annotations for each frame
        with open(os.path.join(frames_dir, f'frame_{frame_id}.txt'), 'w') as f:
            frame_annotations = [ann for ann in annotations if ann['image_id'] == frame_id + 1]
            for ann in frame_annotations:
                yolo_bbox = convert_to_yolo_format(ann['bbox'], frame.shape[1], frame.shape[0])
                f.write(f"{class_id} {yolo_bbox[0]} {yolo_bbox[1]} {yolo_bbox[2]} {yolo_bbox[3]}\n")

        frame_id += 1
    cap.release()

# Function to iterate over subfolders and process each video and label file
def process_folders(base_dir, frames_base_dir, category_to_class_id):
    for category in os.listdir(base_dir):
        category_path = os.path.join(base_dir, category)
        if os.path.isdir(category_path):
            class_id = category_to_class_id.get(category.lower())
            if class_id is None:
                print(f"No class ID assigned for category {category}, skipping...")
                continue

            for subfolder in os.listdir(category_path):
                subfolder_path = os.path.join(category_path, subfolder)
                if os.path.isdir(subfolder_path):
                    video_file = next((f for f in os.listdir(subfolder_path) if f.endswith('.mp4')), None)
                    label_file = next((f for f in os.listdir(subfolder_path) if f.endswith('.json')), None)

                    if video_file and label_file:
                        video_path = os.path.join(subfolder_path, video_file)
                        labels_path = os.path.join(subfolder_path, label_file)
                        frames_dir = os.path.join(frames_base_dir, category, subfolder)

                        if not os.path.exists(frames_dir):
                            os.makedirs(frames_dir)

                        print(f"Processing {video_file} in {subfolder_path}")  # Add this line for debugging

                        # Extract frames and save annotations
                        extract_frames_and_annotations(video_path, labels_path, frames_dir, class_id)
                        print(f'Processed {video_file} in {subfolder_path}')
                    else:
                        print(f"Skipping {subfolder_path}: Video file or label file not found")
# Run the process
process_folders(base_dir, frames_base_dir, category_to_class_id)

Processing video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C2_P3_V1_HB_1
Processed video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C2_P3_V1_HB_1
Processing video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C1_P1_V1_HB_4
Processed video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C1_P1_V1_HB_4
Processing video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C1_P2_V1_HB_3
Processed video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C1_P2_V1_HB_3
Processing video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C1_P2_V1_HB_1
Processed video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C1_P2_V1_HB_1
Processing video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C2_P3_V1_HB_3
Processed video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C2_P3_V1_HB_3
Processing video.mp4 in /content/drive/MyDrive/gun_dataset/Handgun/PAH1_C2_P3_V2_HB_3
Processed video.mp4 in /content/drive/MyDrive/gun_dataset/H

In [None]:
cd /content/drive/MyDrive/preprocessing

/content/drive/MyDrive/preprocessing


In [None]:
ls

[0m[01;34mHandgun[0m/  [01;34mMachine_Gun[0m/


# **Streamlining Dataset Preparation: Train and Validation Folder Organization (Clear output is printed, with files renamed for having unique names and organized in train/val folders. Please refer to base_yolo.ipynb for details)**

In [None]:
import os
import shutil

def process_dataset(dataset_path, output_path):
    # Create folders for train and val if they don't exist
    train_output_images_folder = os.path.join(output_path, "train", "images")
    train_output_labels_folder = os.path.join(output_path, "train", "labels")
    val_output_images_folder = os.path.join(output_path, "val", "images")
    val_output_labels_folder = os.path.join(output_path, "val", "labels")
    os.makedirs(train_output_images_folder, exist_ok=True)
    os.makedirs(train_output_labels_folder, exist_ok=True)
    os.makedirs(val_output_images_folder, exist_ok=True)
    os.makedirs(val_output_labels_folder, exist_ok=True)

    # Process train folder
    train_folder_path = os.path.join(dataset_path, "train")
    if os.path.exists(train_folder_path):
        process_folder(train_folder_path, train_output_images_folder, train_output_labels_folder)

    # Process val folder
    val_folder_path = os.path.join(dataset_path, "val")
    if os.path.exists(val_folder_path):
        process_folder(val_folder_path, val_output_images_folder, val_output_labels_folder)

def process_folder(folder_path, output_images_folder, output_labels_folder):
    # Process each subfolder containing images and labels
    for subfolder in os.listdir(folder_path):
        subfolder_path = os.path.join(folder_path, subfolder)

        # Initialize lists to store image and label files
        image_files = []
        label_files = []

        # Collect image and label files
        for file in os.listdir(subfolder_path):
            file_path = os.path.join(subfolder_path, file)
            if file.endswith('.jpg'):
                image_files.append(file_path)
            elif file.endswith('.txt'):
                label_files.append(file_path)

        # Copy image files to output images folder
        for image_file in image_files:
            image_dst = os.path.join(output_images_folder, f"{subfolder}_{os.path.basename(image_file)}")
            shutil.copy(image_file, image_dst)

        # Copy label files to output labels folder
        for label_file in label_files:
            label_dst = os.path.join(output_labels_folder, f"{subfolder}_{os.path.basename(label_file)}")
            shutil.copy(label_file, label_dst)

# Example usage:
dataset_folder = "/content/drive/MyDrive/output"
output_folder = "/content/drive/MyDrive/dataset"
process_dataset(dataset_folder, output_folder)
