## Bibliotecas Utilizadas

- Json: 
- Shutil:
- Random: 
- Argparse: 
- Looging:
- Sys:
- Pathlib: 

In [1]:
import json
import shutil
import random
import argparse
import logging
import logging.config
import sys
from pathlib import Path
#from utils.json import get_json_from_file

In [13]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def copy_images(images, src_dir, dest_dir):
    """
    Copy selected images to a specified directory.
    """
    for image in images:
        try:
            src_path = Path(src_dir) / image['file_name']
            dest_path = Path(dest_dir) / src_path.name
            shutil.copy(src_path, dest_path)
            logger.info(f"Successfully copied {src_path} to {dest_path}")
        except Exception as e:
            logger.error(f"Failed to copy {src_path} to {dest_path}: {e}")

def split_dataset(images_dir, labels_json_path, output_dir, train_ratio=0.75, val_ratio=0.1):
    """
    Splits a COCO dataset into training, validation, and testing sets based on given ratios.
    """
    try:
        logger.info("Loading COCO annotations...")
        with open(labels_json_path, 'r') as f:
            coco_data = json.load(f)
    except FileNotFoundError:
        logger.error(f"File not found: {labels_json_path}")
        return
    except json.JSONDecodeError:
        logger.error(f"Invalid JSON in file: {labels_json_path}")
        return

    images_dir = Path(images_dir)
    output_dir = Path(output_dir)

    if not images_dir.exists():
        logger.error(f"Images directory does not exist: {images_dir}")
        return

    # Extract image and annotation details
    images = coco_data.get('images', [])
    annotations = coco_data.get('annotations', [])

    # Validate ratios
    if not (0 < train_ratio < 1 and 0 <= val_ratio < 1 and train_ratio + val_ratio <= 1):
        logger.error("Invalid training/validation ratios.")
        return

    random.shuffle(images)
    total_images = len(images)
    train_end = int(total_images * train_ratio)
    val_end = train_end + int(total_images * val_ratio)

    train_images = images[:train_end]
    val_images = images[train_end:val_end]
    test_images = images[val_end:]

    def filter_annotations(images_set):
        image_ids = {image['id'] for image in images_set}
        return [annotation for annotation in annotations if annotation['image_id'] in image_ids]

    def create_coco_subset(images, annotations):
        return {
            'images': images,
            'annotations': annotations,
            'categories': coco_data['categories']
        }

    for type, images_set in zip(["train", "val", "test"], [train_images, val_images, test_images]):
        try:
            images_output_path = output_dir / "images" / type
            images_output_path.mkdir(parents=True, exist_ok=True)

            labels_output_path = output_dir / "labels" / type
            labels_output_path.mkdir(parents=True, exist_ok=True)

            copy_images(images_set, images_dir, images_output_path)

            coco_file = create_coco_subset(images_set, filter_annotations(images_set))
            with open(labels_output_path / "coco.json", 'w') as file:
                json.dump(coco_file, file, indent=4)
            logger.info(f"Dataset for {type} saved successfully.")
        except Exception as e:
            logger.error(f"Failed to process data for {type}: {e}")

# Set arguments for running in Jupyter
origin = Path.cwd().parent
images_dir = origin / "Dados/Dataset/images"
coco_json_path = origin / "Dados/Dataset/annotations/instances_default.json"  # Pointing directly to the JSON file
output_dir = origin / "Dados/Saida"
train_ratio = 0.75
val_ratio = 0.1

# Call the function
split_dataset(images_dir, coco_json_path, output_dir, train_ratio, val_ratio)

split_dataset(images_dir, coco_json_path, output_dir, train_ratio, val_ratio)



INFO:__main__:Loading COCO annotations...
INFO:__main__:Successfully copied /home/thales/Documents/object_detection_workshop/Dados/Dataset/images/IMG_0323.jpg to /home/thales/Documents/object_detection_workshop/Dados/Saida/images/train/IMG_0323.jpg
INFO:__main__:Successfully copied /home/thales/Documents/object_detection_workshop/Dados/Dataset/images/DC38FA5A-9241-462A-858E-60566C0D0393.jpeg to /home/thales/Documents/object_detection_workshop/Dados/Saida/images/train/DC38FA5A-9241-462A-858E-60566C0D0393.jpeg
INFO:__main__:Successfully copied /home/thales/Documents/object_detection_workshop/Dados/Dataset/images/Deep/1714838861377.jpg to /home/thales/Documents/object_detection_workshop/Dados/Saida/images/train/1714838861377.jpg
INFO:__main__:Successfully copied /home/thales/Documents/object_detection_workshop/Dados/Dataset/images/IMG_0337.jpg to /home/thales/Documents/object_detection_workshop/Dados/Saida/images/train/IMG_0337.jpg
INFO:__main__:Successfully copied /home/thales/Documents/