In [22]:
import os
import json
import random
from PIL import Image

# Function to create a directory if it doesn't exist
def create_directory(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

# Function to read and parse the JSON file
def read_json_file(json_file_path):
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    return data

# Function to create patches from an image
def create_patches(image, patch_size=(128, 128), num_patches=10):
    patches = []
    img_width, img_height = image.size
    for _ in range(num_patches):
        left = random.randint(0, img_width - patch_size[0])
        upper = random.randint(0, img_height - patch_size[1])
        box = (left, upper, left + patch_size[0], upper + patch_size[1])
        patch = image.crop(box)
        patches.append(patch)
    return patches

# Function to process and save the patches for each json file and update the JSON with patch filenames
def process_and_save_patches(images_data, root_image_folder, json_file_name, json_directory):
    # Create output directory for each json file
    output_dir = os.path.join(json_directory, json_file_name.split('.')[0])  # Use json file name as the folder name
    create_directory(output_dir)
    
    # Create subdirectories for train2014 and val2014 inside the output directory
    train_folder = os.path.join(output_dir, 'train2014')
    val_folder = os.path.join(output_dir, 'val2014')
    
    create_directory(train_folder)
    create_directory(val_folder)

    # List to store updated image data with patch filenames
    updated_images_data = []

    for entry in images_data['images']:
        image_file = entry['filename']
        split = entry['split']

        # Determine the subfolder based on split
        subfolder = train_folder if split == 'train' else val_folder

        # Full path to the image
        image_path = os.path.join(root_image_folder, entry['filepath'], image_file)
        
        try:
            # Open the image
            img = Image.open(image_path)
            
            # Generate patches
            patches = create_patches(img)
            
            # Save the patches and create new image entries with patch filenames
            for i, patch in enumerate(patches):
                patch_filename = f"{image_file.split('.')[0]}_patch_{i}.png"
                patch.save(os.path.join(subfolder, patch_filename))

                # Create a new entry for the patch
                updated_entry = entry.copy()
                updated_entry['filename'] = patch_filename  # Replace the original filename with patch filename
                updated_images_data.append(updated_entry)
        
        except Exception as e:
            print(f"Error processing image {image_file}: {e}")

    # Return updated JSON data
    return updated_images_data

# Function to process all JSON files in the directory
def process_json_files(json_directory, root_image_folder):
    # List all files in the json_directory
    json_files = [f for f in os.listdir(json_directory) if f.endswith('.json')]
    
    for json_file in json_files:
        json_file_path = os.path.join(json_directory, json_file)
        print(f"Processing {json_file_path}...")

        # Step 1: Read and parse the JSON file
        images_data = read_json_file(json_file_path)

        # Step 2: Process and save the patches for this JSON file and get the updated images data
        updated_images_data = process_and_save_patches(images_data, root_image_folder, json_file, json_directory)

        # Step 3: Save the updated JSON with patch filenames
        updated_json_file_path = os.path.join(json_directory, json_file.split('.')[0] + '_patch.json')
        with open(updated_json_file_path, 'w') as f:
            json.dump({'images': updated_images_data}, f, indent=4)

    print("Processing of all JSON files is complete.")


In [23]:
json_directory = '/mnt/abka03/mscoco2014/xl-vlm'  # Replace with the directory containing JSON files
root_image_folder = '/mnt/abka03/mscoco2014'  # Replace with the folder containing the images

# Run the main function
process_json_files(json_directory, root_image_folder)

Processing /mnt/abka03/mscoco2014/xl-vlm/dog_dataset_coco.json...
Processing /mnt/abka03/mscoco2014/xl-vlm/cat_dataset_coco.json...
Processing of all JSON files is complete.
