In [7]:
import pandas as pd
import numpy as np

#Code to write image ID's to a .txt file
#Then use the .txt file to download images with following command:
"""
"python downloader.py {txt filename} --download_folder={target folder directory} --num_processes=4
"""

# Load the CSV files
train_annotations = pd.read_csv('oidv6-train-annotations-bbox.csv')
class_descriptions = pd.read_csv('oidv7-class-descriptions-boxable.csv')
test_annotations = pd.read_csv('test-annotations-bbox.csv')

# User defined parameters
save_training = False
save_test = True
save_random = False  # Flag to control saving random images
number_of_images = 10000
number_of_images_test = 1000
number_of_images_random = 20000  # Number of random images to select
display_name = "Hat"

In [8]:

# Step 1: Get the LabelName
label = class_descriptions[class_descriptions['DisplayName'] == display_name]['LabelName'].values[0]

# Step 2: Filter the annotations to get bounding boxes for specific class
annotations = train_annotations[train_annotations['LabelName'] == label]
test_annotations = test_annotations[test_annotations['LabelName'] == label]

# Step 3: Get unique ImageIDs (since multiple bounding boxes can share the same ImageID)
unique_image_ids = annotations['ImageID'].unique()
test_unique_image_ids = test_annotations['ImageID'].unique()

# Get random unique ImageIDs from entire training set
all_unique_image_ids = train_annotations['ImageID'].unique()
random_image_ids = np.random.choice(all_unique_image_ids, 
                                  size=min(number_of_images_random, len(all_unique_image_ids)), 
                                  replace=False)

# Step 4: Limit to specified number of unique ImageIDs
limited_image_ids = unique_image_ids[:number_of_images]
test_limited_image_ids = test_unique_image_ids[:number_of_images_test]

# Step 5: Write the ImageIDs to .txt files in the required format
if save_training:
    with open(f'{display_name}_image_id_list.txt', 'w') as f:
        for image_id in limited_image_ids:
            f.write(f"train/{image_id}\n")
    print(f"Wrote {len(limited_image_ids)} unique ImageIDs to {display_name}_image_id_list.txt")

if save_test:
    with open(f'{display_name}_test_image_id_list.txt', 'w') as f:
        for image_id in test_limited_image_ids:
            f.write(f"test/{image_id}\n")
    print(f"Wrote {len(test_limited_image_ids)} unique ImageIDs to {display_name}_test_image_id_list.txt")

if save_random:
    with open('random_image_id_list.txt', 'w') as f:
        for image_id in random_image_ids:
            f.write(f"train/{image_id}\n")
    print(f"Wrote {len(random_image_ids)} random unique ImageIDs to random_image_id_list.txt")

Wrote 393 unique ImageIDs to Hat_test_image_id_list.txt
