In [None]:
import json
import os
import csv
import shutil
from sklearn.model_selection import train_test_split


In [None]:
# Define the path to your dataset and COCO annotation file
folder_path = '/content/drive/MyDrive/IIT_Delhi'

dataset_path = os.path.join(folder_path, 'Pedestrian_dataset_for_internship_assignment/Pedestrian_dataset_for_internship_assignment')
# Define directories for train and validation sets
train_dir = os.path.join(dataset_path, 'train')
val_dir = os.path.join(dataset_path, 'val')

# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)


In [None]:
# Load the COCO annotations
annotations_file = os.path.join(folder_path, 'random_sample_mavi_2_gt.json')

with open(annotations_file, 'r') as f:
    coco_data = json.load(f)

# Extract images from the COCO data
all_images = coco_data['images']
print(f"Total images: {len(all_images)}")  # Should print 199


Total images: 199


In [None]:
# Split the images into training (160) and validation (39) sets
train_images, val_images = train_test_split(all_images, test_size=39, random_state=42)

print(f"Training set: {len(train_images)} images, Validation set: {len(val_images)} images")


Training set: 160 images, Validation set: 39 images


In [None]:
def filter_annotations(images, annotations):
    image_ids = {img['id'] for img in images}
    return [ann for ann in annotations if ann['image_id'] in image_ids]

# Filter annotations for training and validation sets
train_annotations = filter_annotations(train_images, coco_data['annotations'])
val_annotations = filter_annotations(val_images, coco_data['annotations'])


In [None]:
# Create new COCO-style data for train and val
train_coco_data = {
    'images': train_images,
    'annotations': train_annotations,
    'categories': coco_data['categories']
}

val_coco_data = {
    'images': val_images,
    'annotations': val_annotations,
    'categories': coco_data['categories']
}

# Save the new annotations
with open(os.path.join(train_dir, 'train_annotations.json'), 'w') as f:
    json.dump(train_coco_data, f)

with open(os.path.join(val_dir, 'val_annotations.json'), 'w') as f:
    json.dump(val_coco_data, f)

print("Train and validation annotations saved.")


Train and validation annotations saved.


In [None]:
# Step 8: Move Images to Train and Validation Folders

# Move training images
for img in train_images:
    img_path = os.path.join(dataset_path, img['file_name'])
    shutil.copy(img_path, train_dir)

# Move validation images
for img in val_images:
    img_path = os.path.join(dataset_path, img['file_name'])
    shutil.copy(img_path, val_dir)

print("Images moved to train and validation folders.")

# Step 9: Save COCO Annotations for Train and Validation

# Save the new annotations for train and val sets into their respective folders
with open(os.path.join(train_dir, 'train_annotations.json'), 'w') as f:
    json.dump(train_coco_data, f)

with open(os.path.join(val_dir, 'val_annotations.json'), 'w') as f:
    json.dump(val_coco_data, f)

print("Train and validation annotations saved in their respective folders.")


Images moved to train and validation folders.
Train and validation annotations saved in their respective folders.


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
