In [None]:
pip install pycocotools



In [None]:
from pycocotools.coco import COCO
import requests
import os
import random
import json

In [None]:
# Set paths to annotations file and output directories
annotation_file = '/content/drive/MyDrive/instances_train2017.json'
image_output_dir = 'coco_images/train'
annotation_output_dir = 'coco_annotations/train'

# Create directories if not already present
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(annotation_output_dir, exist_ok=True)

# Initialize COCO API
coco = COCO(annotation_file)

loading annotations into memory...
Done (t=28.18s)
creating index...
index created!


In [None]:
# Get all available categories
categories = coco.loadCats(coco.getCatIds())
category_names = [cat['name'] for cat in categories]

# Choose 10 categories (either randomly or by specifying)
selected_categories = random.sample(category_names, 10)  # Randomly select 10 categories
print("Selected Categories:", selected_categories)

# Get category IDs for the selected categories
cat_ids = coco.getCatIds(catNms=selected_categories)

Selected Categories: ['dog', 'surfboard', 'kite', 'fire hydrant', 'boat', 'bowl', 'wine glass', 'baseball bat', 'giraffe', 'cat']


In [None]:
# Dictionary to store selected image IDs and annotations
selected_data = {}

# Fetch images and annotations for each category
for cat_id in cat_ids:
    # Get image IDs for the category
    img_ids = coco.getImgIds(catIds=[cat_id])

    # Randomly select 50 images from available images
    selected_img_ids = random.sample(img_ids, min(500, len(img_ids)))
    selected_data[cat_id] = selected_img_ids

In [None]:
# Download images and save annotations
for cat_id, img_ids in selected_data.items():
    for img_id in img_ids:
        # Get image information
        img_info = coco.loadImgs(img_id)[0]
        img_url = img_info['coco_url']
        img_name = os.path.join(image_output_dir, img_info['file_name'])

        # Download image
        with open(img_name, 'wb') as f:
            f.write(requests.get(img_url).content)
        print(f"Downloaded image: {img_name}")

        # Get annotations for the image
        ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=[cat_id], iscrowd=None)
        annotations = coco.loadAnns(ann_ids)

        # Save annotations to a file
        annotation_file_name = os.path.join(annotation_output_dir, f"{img_info['file_name']}.json")
        with open(annotation_file_name, 'w') as f:
            json.dump(annotations, f, indent=4)
        print(f"Saved annotations: {annotation_file_name}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Downloaded image: coco_images/train/000000205131.jpg
Saved annotations: coco_annotations/train/000000205131.jpg.json
Downloaded image: coco_images/train/000000435889.jpg
Saved annotations: coco_annotations/train/000000435889.jpg.json
Downloaded image: coco_images/train/000000430654.jpg
Saved annotations: coco_annotations/train/000000430654.jpg.json
Downloaded image: coco_images/train/000000027208.jpg
Saved annotations: coco_annotations/train/000000027208.jpg.json
Downloaded image: coco_images/train/000000043232.jpg
Saved annotations: coco_annotations/train/000000043232.jpg.json
Downloaded image: coco_images/train/000000253125.jpg
Saved annotations: coco_annotations/train/000000253125.jpg.json
Downloaded image: coco_images/train/000000516049.jpg
Saved annotations: coco_annotations/train/000000516049.jpg.json
Downloaded image: coco_images/train/000000168377.jpg
Saved annotations: coco_annotations/train/000000168377.jpg.json

In [None]:
import os
import random
import json
import requests
from pycocotools.coco import COCO

# File paths and directories
val_annotation_file = '/content/drive/MyDrive/instances_val2017.json'
val_image_output_dir = 'coco_images/val'
val_annotation_output_dir = 'coco_annotations/val'

# Initialize COCO object for validation dataset
coco_val = COCO(val_annotation_file)

# Get category IDs for the selected categories in the validation dataset
val_cat_ids = coco_val.getCatIds(catNms=selected_categories)

selected_data = {}
for cat_id in val_cat_ids:
    img_ids = coco_val.getImgIds(catIds=[cat_id])  # Correct variable name
    selected_img_ids = random.sample(img_ids, min(50, len(img_ids)))  # Avoid sampling more than available images
    selected_data[cat_id] = selected_img_ids

# Ensure output directories exist
os.makedirs(val_image_output_dir, exist_ok=True)
os.makedirs(val_annotation_output_dir, exist_ok=True)

# Download images and save annotations for validation set
for cat_id, img_ids in selected_data.items():
    for img_id in img_ids:
        # Get image information
        img_info = coco_val.loadImgs(img_id)[0]
        img_url = img_info['coco_url']
        img_name = os.path.join(val_image_output_dir, img_info['file_name'])

        # Download image
        if not os.path.exists(img_name):  # Avoid re-downloading
            response = requests.get(img_url)
            if response.status_code == 200:  # Ensure the image was downloaded successfully
                with open(img_name, 'wb') as f:
                    f.write(response.content)
                print(f"Downloaded image: {img_name}")
            else:
                print(f"Failed to download image: {img_url}")

        # Get annotations for the image
        ann_ids = coco_val.getAnnIds(imgIds=[img_id], catIds=[cat_id], iscrowd=None)
        annotations = coco_val.loadAnns(ann_ids)

        # Save annotations to a file
        annotation_file_name = os.path.join(val_annotation_output_dir, f"{img_info['file_name']}.json")
        if not os.path.exists(annotation_file_name):  # Avoid overwriting
            with open(annotation_file_name, 'w') as f:
                json.dump(annotations, f, indent=4)
            print(f"Saved annotations: {annotation_file_name}")


loading annotations into memory...
Done (t=3.18s)
creating index...
index created!
Downloaded image: coco_images/val/000000384949.jpg
Saved annotations: coco_annotations/val/000000384949.jpg.json
Downloaded image: coco_images/val/000000078565.jpg
Saved annotations: coco_annotations/val/000000078565.jpg.json
Downloaded image: coco_images/val/000000449432.jpg
Saved annotations: coco_annotations/val/000000449432.jpg.json
Downloaded image: coco_images/val/000000455872.jpg
Saved annotations: coco_annotations/val/000000455872.jpg.json
Downloaded image: coco_images/val/000000248334.jpg
Saved annotations: coco_annotations/val/000000248334.jpg.json
Downloaded image: coco_images/val/000000171611.jpg
Saved annotations: coco_annotations/val/000000171611.jpg.json
Downloaded image: coco_images/val/000000057672.jpg
Saved annotations: coco_annotations/val/000000057672.jpg.json
Downloaded image: coco_images/val/000000472298.jpg
Saved annotations: coco_annotations/val/000000472298.jpg.json
Downloaded im