# Convert raw COCO data to cropped out images (for testing purposes)

In [18]:
!pip install matplotlib

Collecting matplotlib
  Using cached matplotlib-3.6.3-cp310-cp310-win_amd64.whl (7.2 MB)
Collecting fonttools>=4.22.0
  Using cached fonttools-4.38.0-py3-none-any.whl (965 kB)
Collecting cycler>=0.10
  Using cached cycler-0.11.0-py3-none-any.whl (6.4 kB)
Collecting pyparsing>=2.2.1
  Using cached pyparsing-3.0.9-py3-none-any.whl (98 kB)
Collecting kiwisolver>=1.0.1
  Using cached kiwisolver-1.4.4-cp310-cp310-win_amd64.whl (55 kB)
Collecting contourpy>=1.0.1
  Using cached contourpy-1.0.7-cp310-cp310-win_amd64.whl (162 kB)
Installing collected packages: pyparsing, kiwisolver, fonttools, cycler, contourpy, matplotlib
Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.38.0 kiwisolver-1.4.4 matplotlib-3.6.3 pyparsing-3.0.9


In [6]:
DATA_FOLDER = "../data/input/24specprep/raw"
OUTPUT_FOLDER = "../data/input/24specprep/crops"
CLASSES_ID = ["5mL Syringe", "8-Channel Finnpipet", "8 Channel Pipette", "Eppendorf Repeater", "Micropipette", "Styrofoam Tube Rack", "Trash", "50mL Tube", "50mL Tube Rack", "96 Well Plate", "Pipette Tip Box", "Reservoir", "Vortexer", "Picogreen Buffer", "Picogreen Kit"] # in distribution classes, rest will be labeled 'ood'

In [7]:
# Load coco annotations (train and val)
import os
jsons = []
for file in os.listdir(DATA_FOLDER):
    if file.endswith(".json"):
        jsons.append(os.path.join(DATA_FOLDER, file))
        
print(jsons)

['../data/input/24specprep/raw\\specprep24_05102022_train.json', '../data/input/24specprep/raw\\specprep24_05102022_val.json']


In [13]:
# Get the category names and their id
import json
categories = {}
for jsonfile in jsons:
    with open(jsonfile) as f:
        data = json.load(f)
    for category in data['categories']:
        categories[category["id"]] = category["name"]
        
# Change all categories that are not in CLASSES_ID to 'ood'
for key in categories.keys():
    if categories[key] not in CLASSES_ID:
        categories[key] = 'ood'

print(categories)

{0: 'ood', 1: '5mL Syringe', 2: 'ood', 3: '8 Channel Pipette', 4: '8-Channel Finnpipet', 5: 'ood', 6: 'ood', 7: 'Eppendorf Repeater', 8: 'Micropipette', 9: 'Styrofoam Tube Rack', 10: 'Trash', 11: '50mL Tube', 12: '50mL Tube Rack', 13: '96 Well Plate', 14: 'Pipette Tip Box', 15: 'Reservoir', 16: 'Vortexer', 17: 'ood', 18: 'ood', 19: 'ood', 20: 'ood', 21: 'ood', 22: 'Picogreen Buffer', 23: 'Picogreen Kit', 24: 'ood'}


In [16]:
# Get all image paths and their ids. Append the prefix.
images = {}
for jsonfile in jsons:
    with open(jsonfile) as f:
        data = json.load(f)
    for image in data['images']:
        images[image["id"]] = os.path.join(DATA_FOLDER,'images',image["file_name"])
        
print(images)

{0: '../data/input/24specprep/raw\\images\\blackbenchpico_11.png', 1: '../data/input/24specprep/raw\\images\\blackbenchpico_12.png', 2: '../data/input/24specprep/raw\\images\\blackbenchpico_13.png', 3: '../data/input/24specprep/raw\\images\\blackbenchpico_14.png', 4: '../data/input/24specprep/raw\\images\\blackbenchpico_15.png', 5: '../data/input/24specprep/raw\\images\\blackbenchpico_16.png', 6: '../data/input/24specprep/raw\\images\\blackbenchpico_17.png', 7: '../data/input/24specprep/raw\\images\\blackbenchpico_18.png', 8: '../data/input/24specprep/raw\\images\\blackbenchpico_19.png', 9: '../data/input/24specprep/raw\\images\\blackbenchpico_20.png', 10: '../data/input/24specprep/raw\\images\\blackbenchpico_21.png', 11: '../data/input/24specprep/raw\\images\\Snapshot_0.png', 12: '../data/input/24specprep/raw\\images\\Snapshot_1.png', 13: '../data/input/24specprep/raw\\images\\Snapshot_10.png', 14: '../data/input/24specprep/raw\\images\\Snapshot_11.png', 15: '../data/input/24specprep/

In [19]:
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def crop_coco(image_path, annotation, category_name, image_id):
    # Load image
    image = Image.open(image_path)
    image = np.array(image)
    
    # Create output folder
    output_folder = os.path.join(OUTPUT_FOLDER, category_name)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Crop
    x1 = annotation["bbox"][0]
    y1 = annotation["bbox"][1]
    x2 = annotation["bbox"][0] + annotation["bbox"][2]
    y2 = annotation["bbox"][1] + annotation["bbox"][3]
    crop = image[int(y1):int(y2), int(x1):int(x2)]
    
    # Save
    output_path = os.path.join(output_folder, str(image_id) + '.jpg')
    plt.imsave(output_path, crop)

In [24]:
# For each JSON, load the annotations and crop the images
# save the crops under the folder with the same name as the class
import json
for jsonfile in jsons:
    with open(jsonfile) as f:
        data = json.load(f)
    annotations = data['annotations']
    for annotation in annotations:
        # get the image id
        image_id = annotation['image_id']
        image_path =images[image_id]
        category_name = categories[annotation["category_id"]]
        try:
            crop_coco(image_path, annotation, category_name, image_id)
        except Exception as e:
            print(e)
            print(image_path)

ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_0.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\picogreenBlack_1.png
ndarray is not C-contiguous
../data/input/24specprep/raw\images\p