Split the Dataset from 1 Folder with their JSON File Annotations 

In [55]:
import json
import pandas as pd
import tensorflow as tf
import os
from sklearn.model_selection import train_test_split
import shutil

In [6]:
# Load and parse JSON data
annotation_path = './train/_annotations.coco.json'

with open(annotation_path, 'r') as json_file:
    data = json.load(json_file)

Extract the Image Paths, Bounding Boxes, and Class Labels

In [7]:
annotation_ids = [entry['image_id'] for entry in data['annotations']]

In [70]:
image_file_paths = []
bounding_boxes = []
class_labels = []

for entry in data['images']:
    if entry['id'] in annotation_ids:
        image_file_paths.append(entry['file_name'])
        
for entry in data['annotations']:
    bounding_boxes.append(entry['bbox'])
    class_labels.append(entry['category_id'])

In [26]:
df_dataset = pd.DataFrame({'image_filename': image_file_paths, 'bounding_boxes': bounding_boxes, 'class_labels': class_labels})

In [None]:
# Create 2 Dataframe; First dataset, contains the image paths 
# Second, contains both 
X = df_dataset.drop(columns='class_labels')
y = df_dataset['class_labels']

Split the Dataframe in Three Folders (Train, Test, Valid)

In [27]:
train_dataset, temp = train_test_split(df_dataset, test_size=0.3, random_state=42)

In [28]:
test_dataset, valid_dataset = train_test_split(temp, test_size=0.5, random_state=42)

In [32]:
# Set the dataframes to a dictionary
train_dataset = dict(train_dataset)
test_dataset = dict(test_dataset)
valid_dataset = dict(valid_dataset)

Save the dataframes to their designated Folders

In [37]:
# Setup the folders
Dataset = 'Dataset\\'
train = Dataset + 'train\\'
valid = Dataset + 'valid\\'
test = Dataset + 'test\\'

In [58]:
# Setup the Directories
current_dir = os.getcwd()

dataset_path = os.path.join(current_dir, Dataset)
train_path = os.path.join(current_dir, train)
test_path = os.path.join(current_dir, test)
valid_path = os.path.join(current_dir, valid)

if not os.path.exists(train_path) and not os.path.exists(test_path) and not os.path.exists(valid_path):
    os.mkdir(train_path)
    os.mkdir(test_path)
    os.mkdir(valid_path)

In [61]:
# Save the train images to the train folder
image_directory = 'train\\'
for filename in train_dataset['image_filename']:
    if filename.endswith(".jpg"):
        image_path = os.path.join(current_dir, image_directory, filename)
        shutil.move(image_path, train_path)

In [59]:
# Save the test images to the test folder
image_directory = 'train\\'
for filename in test_dataset['image_filename']:
    if filename.endswith(".jpg"):
        image_path = os.path.join(current_dir, image_directory, filename)
        shutil.move(image_path, test_path)

In [63]:
# Save the valid images to the valid folder
image_directory = 'train\\'
for filename in valid_dataset['image_filename']:
    if filename.endswith(".jpg"):
        image_path = os.path.join(current_dir, image_directory, filename)
        shutil.move(image_path, valid_path)

In [78]:
train_dataset

{'image_filename': 765      1000_new_70e13adfbcbc40fbbdd57a89636f7201_jpg....
 5927     50_c5d27dd2cd5642b69e128a5666a2a124_jpg.rf.049...
 14029    500_c96f0fdf87314bd7b3d0603def1daa40_jpg.rf.ae...
 6187     20_1d62f6303bda4c568de28bb99406fb85_jpg.rf.08b...
 3872     50_f479582edef346909ac4bf31f71f7047_jpg.rf.22d...
                                ...                        
 5191     20_4268baf0abc547b9bc60db0145bc7a69_jpg.rf.382...
 13418    50_c95f69fd402045779ae099156fc3d2f2_jpg.rf.a45...
 5390     1000_new_f09cb00135054725a378126b59127449_jpg....
 860      100_5ca13652b1274ce3908fbc9a10684ff6_jpg.rf.70...
 7270     1000_new_a8be507fac6e4ccdb33a29dead28f46a_jpg....
 Name: image_filename, Length: 10581, dtype: object,
 'bounding_boxes': 765          [338, 0, 297, 179]
 5927         [0, 227, 298, 185]
 14029         [341, 0, 297, 88]
 6187          [0, 191, 63, 449]
 3872         [0, 225, 124, 184]
                   ...          
 5191        [270, 191, 49, 449]
 13418         [170,

Create a JSON File for train images with annotations and class label

In [85]:
train_image_filename = dict()
train_image_bbox = dict()
train_image_class_label = dict()

i = 0
for entry in train_dataset['image_filename']:
    train_image_filename[i] = entry
    i += 1

i = 0
for entry in train_dataset['bounding_boxes']:
    train_image_bbox[i] = entry
    i += 1
    
i = 0
for entry in train_dataset['class_labels']:
    train_image_class_label[i] = entry
    i += 1

In [92]:
train_json = [{'file_name': train_image_filename}, {'bbox': train_image_bbox}, {'class_category': train_image_class_label}]
jsonString = json.dumps(train_json)

In [97]:
file = 'annotation.json'
file_path = os.path.join(train_path, file)

jsonFile = open(file_path, "w")
jsonFile.write(jsonString)
jsonFile.close()

Create a JSON File for test images with annotations and class label

In [99]:
test_image_filename = dict()
test_image_bbox = dict()
test_image_class_label = dict()

i = 0
for entry in test_dataset['image_filename']:
    test_image_filename[i] = entry
    i += 1

i = 0
for entry in test_dataset['bounding_boxes']:
    test_image_bbox[i] = entry
    i += 1
    
i = 0
for entry in test_dataset['class_labels']:
    test_image_class_label[i] = entry
    i += 1

In [100]:
test_json = [{'file_name': test_image_filename}, {'bbox': test_image_bbox}, {'class_category': test_image_class_label}]
test_jsonString = json.dumps(test_json)

In [103]:
test_file_path = os.path.join(test_path, file)

test_jsonFile = open(test_file_path, "w")
test_jsonFile.write(test_jsonString)
test_jsonFile.close()

Create a JSON File for valid images with annotations and class label

In [105]:
valid_image_filename = dict()
valid_image_bbox = dict()
valid_image_class_label = dict()

i = 0
for entry in valid_dataset['image_filename']:
    valid_image_filename[i] = entry
    i += 1

i = 0
for entry in valid_dataset['bounding_boxes']:
    valid_image_bbox[i] = entry
    i += 1
    
i = 0
for entry in valid_dataset['class_labels']:
    valid_image_class_label[i] = entry
    i += 1

In [106]:
valid_json = [{'file_name': valid_image_filename}, {'bbox': valid_image_bbox}, {'class_category': valid_image_class_label}]
valid_jsonString = json.dumps(valid_json)

In [109]:
valid_file_path = os.path.join(valid_path, file)

valid_jsonFile = open(valid_file_path, "w")
valid_jsonFile.write(valid_jsonString)
valid_jsonFile.close()