In [None]:
import json
import pandas as pd

json_file_path = 'instances_val2019.json'

with open(json_file_path) as json_data:
    data = json.load(json_data)

print(data.keys())

In [44]:
import os
import shutil

val2019_dir = 'val2019'

for level in ['easy', 'medium', 'hard']:
    level_dir = os.path.join(val2019_dir, level)
    if not os.path.exists(level_dir):
        os.makedirs(level_dir)

# Move images to respective level folders
for image_info in data['images']:
    file_name = image_info['file_name']
    level = image_info.get('level')  # Get the level ('easy', 'medium', or 'hard')

    if level in ['easy', 'medium', 'hard']:
        src = os.path.join(val2019_dir, file_name)
        dest = os.path.join(val2019_dir, level, file_name)
        
        # Check if the image file exists before moving
        if os.path.exists(src):
            shutil.move(src, dest)
        else:
            print(f"Image file {file_name} not found.")


In [None]:
json_file_path = 'instances_test2019.json'

with open(json_file_path) as json_data:
    data = json.load(json_data)

print(data.keys())

In [31]:
test2019_dir = 'test2019'

for level in ['easy', 'medium', 'hard']:
    level_dir = os.path.join(test2019_dir, level)
    if not os.path.exists(level_dir):
        os.makedirs(level_dir)

# Move images to respective level folders
for image_info in data['images']:
    file_name = image_info['file_name']
    level = image_info.get('level')  # Get the level ('easy', 'medium', or 'hard')

    if level in ['easy', 'medium', 'hard']:
        src = os.path.join(test2019_dir, file_name)
        dest = os.path.join(test2019_dir, level, file_name)
        
        # Check if the image file exists before moving
        if os.path.exists(src):
            shutil.move(src, dest)
        else:
            print(f"Image file {file_name} not found.")

In [52]:
import os
import shutil

# Paths to the directories
train2019_dir = 'train2019'
val2019_medium_dir = 'val2019/medium'
test2019_medium_dir = 'test2019/medium'

# New directory path
new_dir = 'newtrain_dataset'

# Create the new directory if it doesn't exist
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# Function to copy files from a directory to the new directory
def copy_images(src_dir, dest_dir):
    for file_name in os.listdir(src_dir):
        src_file = os.path.join(src_dir, file_name)
        dest_file = os.path.join(dest_dir, file_name)
        if os.path.isfile(src_file):
            shutil.copy(src_file, dest_file)

# Copy images from train2019, val2019/medium, and test2019/medium
copy_images(train2019_dir, new_dir)
copy_images(val2019_medium_dir, new_dir)
copy_images(test2019_medium_dir, new_dir)

print(f"Images successfully copied to {new_dir}")


Images successfully copied to newtrain_dataset


In [6]:
import json
import os

def convert_coco_to_yolo(json_path, output_path):
    with open(json_path) as f:
        data = json.load(f)

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    for image in data['images']:
        if image['level'] == 'easy':
            image_id = image['id']
            file_name = image['file_name']
            width = image['width']
            height = image['height']

            annotations = [ann for ann in data['annotations'] if ann['image_id'] == image_id]

            with open(os.path.join(output_path, f"{os.path.splitext(file_name)[0]}.txt"), 'w') as f:
                for ann in annotations:
                    category_id = ann['category_id'] - 1 # Because the coco data set is not zero-indexed
                    bbox = ann['bbox']
                    x_center = (bbox[0] + bbox[2] / 2) / width
                    y_center = (bbox[1] + bbox[3] / 2) / height
                    w = bbox[2] / width
                    h = bbox[3] / height
                    f.write(f"{category_id} {x_center} {y_center} {w} {h}\n")

json_path = r'instances_test2019.json'
output_path = r'labels_test'
os.mkdir(output_path)
convert_coco_to_yolo(json_path, output_path)

In [10]:
import os
import shutil
import random

# Paths to the dataset and labels folders
dataset_paths = [r'val2019\easy', r'test2019\easy']
label_paths = [r'labels_val', r'labels_test']

# Paths for the new dataset structure
new_dataset_path = 'multiobject_easy'
train_images_path = os.path.join(new_dataset_path, 'train', 'images')
train_labels_path = os.path.join(new_dataset_path, 'train', 'labels')
val_images_path = os.path.join(new_dataset_path, 'val', 'images')
val_labels_path = os.path.join(new_dataset_path, 'val', 'labels')
test_images_path = os.path.join(new_dataset_path, 'test', 'images')
test_labels_path = os.path.join(new_dataset_path, 'test', 'labels')

# Create the directories if they don't exist
os.makedirs(train_images_path, exist_ok=True)
os.makedirs(train_labels_path, exist_ok=True)
os.makedirs(val_images_path, exist_ok=True)
os.makedirs(val_labels_path, exist_ok=True)
os.makedirs(test_images_path, exist_ok=True)
os.makedirs(test_labels_path, exist_ok=True)

# Gather all image files and their corresponding labels from both datasets
image_label_pairs = []
for dataset_path, label_path in zip(dataset_paths, label_paths):
    image_files = [f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))]
    for image_file in image_files:
        label_file = os.path.splitext(image_file)[0] + '.txt'
        image_label_pairs.append((os.path.join(dataset_path, image_file), os.path.join(label_path, label_file)))

# Set a fixed random seed for deterministic behavior
random.seed(42)

# Shuffle the image-label pairs to ensure random selection
random.shuffle(image_label_pairs)

# Calculate split sizes
total_images = len(image_label_pairs)
train_size = int(total_images * 0.9)
val_size = int(total_images * 0.05)
test_size = total_images - train_size - val_size

# Split the data into train, val, and test sets
train_pairs = image_label_pairs[:train_size]
val_pairs = image_label_pairs[train_size:train_size + val_size]
test_pairs = image_label_pairs[train_size + val_size:]

# Function to copy images and their corresponding labels
def copy_files(pairs, images_dest, labels_dest):
    for image_path, label_path in pairs:
        # Get the base name of the image (e.g., 'image1.jpg')
        image_filename = os.path.basename(image_path)
        label_filename = os.path.basename(label_path)

        # Define the destination paths
        dest_image_path = os.path.join(images_dest, image_filename)
        dest_label_path = os.path.join(labels_dest, label_filename)
        
        # Copy image and label if the label file exists
        if os.path.exists(label_path):
            shutil.copy(image_path, dest_image_path)
            shutil.copy(label_path, dest_label_path)

# Copy files to the respective folders
copy_files(train_pairs, train_images_path, train_labels_path)
copy_files(val_pairs, val_images_path, val_labels_path)
copy_files(test_pairs, test_images_path, test_labels_path)

print("Dataset split and copied successfully!")


Dataset split and copied successfully!
