## Bibliotecas Utilizadas

In [None]:
import json
import shutil
import random
import argparse
import logging
import logging.config
import sys
from pathlib import Path
#from utils.json import get_json_from_file

In [None]:
import json
import logging
from pathlib import Path

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def read_coco_json(json_path):
    """
    Reads a COCO format JSON file and returns the data.
    
    Args:
    - json_path (str or Path): Path to the JSON file.
    
    Returns:
    - dict: Parsed JSON data.
    """
    json_path = Path(json_path)

    # Check if the file exists
    if not json_path.exists():
        logger.error(f"File not found: {json_path}")
        return None

    try:
        logger.info("Loading COCO annotations...")
        with open(json_path, 'r') as f:
            coco_data = json.load(f)
            logger.info("Successfully loaded the JSON file.")
            return coco_data
    except FileNotFoundError:
        logger.error(f"File not found: {json_path}")
        return None
    except json.JSONDecodeError:
        logger.error(f"Invalid JSON in file: {json_path}")
        return None

def main():
    # Path to the COCO annotations JSON file
    json_path = "/"
    
    # Read the JSON file
    coco_data = read_coco_json(json_path)
    
    if coco_data:
        # Access some basic information
        images = coco_data.get('images', [])
        annotations = coco_data.get('annotations', [])
        categories = coco_data.get('categories', [])
        
        logger.info(f"Number of images: {len(images)}")
        logger.info(f"Number of annotations: {len(annotations)}")
        logger.info(f"Number of categories: {len(categories)}")
        
        # Print details of the first few images
        for image in images[:5]:
            logger.info(f"Image ID: {image['id']}, File Name: {image['file_name']}")
        
        # Print details of the first few annotations
        for annotation in annotations[:5]:
            logger.info(f"Annotation ID: {annotation['id']}, Image ID: {annotation['image_id']}, Category ID: {annotation['category_id']}")

if __name__ == "__main__":
    main()


In [None]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def copy_images(images, src_dir, dest_dir):
    """
    Copy selected images to a specified directory.
    """
    for image in images:
        try:
            src_path = Path(src_dir) / image['file_name']
            dest_path = Path(dest_dir) / src_path.name
            shutil.copy(src_path, dest_path)
            logger.info(f"Successfully copied {src_path} to {dest_path}")
        except Exception as e:
            logger.error(f"Failed to copy {src_path} to {dest_path}: {e}")

def split_dataset(images_dir, labels_json_path, output_dir, train_ratio=0.75, val_ratio=0.1):
    """
    Splits a COCO dataset into training, validation, and testing sets based on given ratios.
    """
    try:
        logger.info("Loading COCO annotations...")
        with open(labels_json_path, 'r') as f:
            coco_data = json.load(f)
    except FileNotFoundError:
        logger.error(f"File not found: {labels_json_path}")
        return
    except json.JSONDecodeError:
        logger.error(f"Invalid JSON in file: {labels_json_path}")
        return

    images_dir = Path(images_dir)
    output_dir = Path(output_dir)

    if not images_dir.exists():
        logger.error(f"Images directory does not exist: {images_dir}")
        return

    # Extract image and annotation details
    images = coco_data.get('images', [])
    annotations = coco_data.get('annotations', [])

    # Validate ratios
    if not (0 < train_ratio < 1 and 0 <= val_ratio < 1 and train_ratio + val_ratio <= 1):
        logger.error("Invalid training/validation ratios.")
        return

    random.shuffle(images)
    total_images = len(images)
    train_end = int(total_images * train_ratio)
    val_end = train_end + int(total_images * val_ratio)

    train_images = images[:train_end]
    val_images = images[train_end:val_end]
    test_images = images[val_end:]

    def filter_annotations(images_set):
        image_ids = {image['id'] for image in images_set}
        return [annotation for annotation in annotations if annotation['image_id'] in image_ids]

    def create_coco_subset(images, annotations):
        return {
            'images': images,
            'annotations': annotations,
            'categories': coco_data['categories']
        }

    for type, images_set in zip(["train", "val", "test"], [train_images, val_images, test_images]):
        try:
            images_output_path = output_dir / "images" / type
            images_output_path.mkdir(parents=True, exist_ok=True)

            labels_output_path = output_dir / "labels" / type
            labels_output_path.mkdir(parents=True, exist_ok=True)

            copy_images(images_set, images_dir, images_output_path)

            coco_file = create_coco_subset(images_set, filter_annotations(images_set))
            with open(labels_output_path / "coco.json", 'w') as file:
                json.dump(coco_file, file, indent=4)
            logger.info(f"Dataset for {type} saved successfully.")
        except Exception as e:
            logger.error(f"Failed to process data for {type}: {e}")

# Set arguments for running in Jupyter
images_dir = "/home/thales/Documents/object_detection_workshop/Códigos Originais e Dados/Dataset/images000000000000"
coco_json_path = "/home/thales/Documents/object_detection_workshop/Códigos Originais e Dados/Dataset/annotations/instances_default.json" # Updated to point directly to the JSON file
output_dir = "/home/thales/Documents/object_detection_workshop/Saida"
train_ratio = 0.75
val_ratio = 0.1

# Call the function
split_dataset(images_dir, coco_json_path, output_dir, train_ratio, val_ratio)

