In [1]:
import pandas as pd
import json
import numpy as np
import os
import os.path
import PIL
from PIL import Image, ImageFilter

In [2]:
TACO_path = "../raw_data/TACO/"
annotations_path = "../raw_data/TACO/data/annotations.json"

In [3]:
with open(annotations_path , 'r') as f:
    dataset = json.loads(f.read())

In [4]:
categories = dataset['categories']
anns = dataset['annotations']
imgs = dataset['images']
nr_cats = len(categories)
nr_annotations = len(anns)
nr_images = len(imgs)

In [5]:
cat_df = pd.DataFrame(categories)
keyValList = ['Cigarette','Clear plastic bottle','Drink can','Plastic straw','Plastic film']
category_df = pd.DataFrame([d for d in categories if d['name'] in keyValList])

In [6]:
category_df.head()

Unnamed: 0,supercategory,id,name
0,Bottle,5,Clear plastic bottle
1,Can,12,Drink can
2,Plastic bag & wrapper,36,Plastic film
3,Straw,55,Plastic straw
4,Cigarette,59,Cigarette


In [7]:
category_conversion = {}
category_conversion['metal'] = [0,8,10,11,12,28]
category_conversion['cardboard'] = [13,14,15,16,17,18,19,20]
category_conversion['glass'] = [6,9,23,26]
category_conversion['paper'] = [21,30,31,32,33,34]
category_conversion['plastic'] = [4,5,7,24,27,43,44,47,49,55]
category_conversion['trash'] = [1,2,3,22,25,29,35,36,37,38,39,40,41,42,45,46,48,50,51,52,53,54,57,58,59]

In [8]:
cropping_df = pd.DataFrame(columns = ["image_id", "cat_name"])
for ann in range(nr_annotations):
    for cat_name, type_nums in category_conversion.items():
        if anns[ann]["category_id"] in type_nums:
            cropping_df=cropping_df.append({"image_id": anns[ann]["image_id"],"segmentation": anns[ann]["segmentation"],"area": anns[ann]["area"],"iscrowd": anns[ann]["iscrowd"],"bbox": anns[ann]["bbox"],"cat_name":cat_name}, ignore_index=True)

In [9]:
cropping_df.head()

Unnamed: 0,image_id,cat_name,area,bbox,iscrowd,segmentation
0,0,glass,403954.0,"[517.0, 127.0, 447.0, 1322.0]",0.0,"[[561.0, 1238.0, 568.0, 1201.0, 567.0, 1175.0,..."
1,1,cardboard,1071259.5,"[1.0, 457.0, 1429.0, 1519.0]",0.0,"[[928.0, 1876.0, 938.0, 1856.0, 968.0, 1826.0,..."
2,1,cardboard,99583.5,"[531.0, 292.0, 1006.0, 672.0]",0.0,"[[617.0, 383.0, 703.0, 437.0, 713.0, 456.0, 72..."
3,2,plastic,73832.5,"[632.0, 987.0, 500.0, 374.0]",0.0,"[[670.0, 993.0, 679.0, 998.0, 684.0, 1001.0, 6..."
4,2,plastic,915.0,"[632.0, 989.0, 44.0, 51.0]",0.0,"[[647.0, 1028.0, 650.0, 1022.0, 653.0, 1016.0,..."


In [10]:
file_name = []
for image_id in cropping_df["image_id"]:
    for img in imgs:
        if img["id"]==image_id:
                file_name.append(img["file_name"])

In [11]:
file_name = pd.Series(file_name)

In [12]:
cropping_df["file_name"]=file_name

In [13]:
cropping_df.head()

Unnamed: 0,image_id,cat_name,area,bbox,iscrowd,segmentation,file_name
0,0,glass,403954.0,"[517.0, 127.0, 447.0, 1322.0]",0.0,"[[561.0, 1238.0, 568.0, 1201.0, 567.0, 1175.0,...",batch_1/000006.jpg
1,1,cardboard,1071259.5,"[1.0, 457.0, 1429.0, 1519.0]",0.0,"[[928.0, 1876.0, 938.0, 1856.0, 968.0, 1826.0,...",batch_1/000008.jpg
2,1,cardboard,99583.5,"[531.0, 292.0, 1006.0, 672.0]",0.0,"[[617.0, 383.0, 703.0, 437.0, 713.0, 456.0, 72...",batch_1/000008.jpg
3,2,plastic,73832.5,"[632.0, 987.0, 500.0, 374.0]",0.0,"[[670.0, 993.0, 679.0, 998.0, 684.0, 1001.0, 6...",batch_1/000010.jpg
4,2,plastic,915.0,"[632.0, 989.0, 44.0, 51.0]",0.0,"[[647.0, 1028.0, 650.0, 1022.0, 653.0, 1016.0,...",batch_1/000010.jpg


In [14]:
df = pd.concat([cropping_df , cropping_df['bbox'].apply(pd.Series)], axis = 1)

In [15]:
df.head(1)

Unnamed: 0,image_id,cat_name,area,bbox,iscrowd,segmentation,file_name,0,1,2,3
0,0,glass,403954.0,"[517.0, 127.0, 447.0, 1322.0]",0.0,"[[561.0, 1238.0, 568.0, 1201.0, 567.0, 1175.0,...",batch_1/000006.jpg,517.0,127.0,447.0,1322.0


In [16]:
df.columns

Index([    'image_id',     'cat_name',         'area',         'bbox',
            'iscrowd', 'segmentation',    'file_name',              0,
                    1,              2,              3],
      dtype='object')

In [17]:
['image_id','cat_name','area','bbox','iscrowd','segmentation','file_name','x_min','y_min','x_max', 'y_max']
df.columns = ['image_id','category','area','bbox','iscrowd','segmentation','filename','x_min','y_min','x_max', 'y_max'] # x_max : width and y_max : height
df = df.drop(['bbox',"image_id", "area", "iscrowd", "segmentation"], axis=1)

In [18]:
df.head()

Unnamed: 0,category,filename,x_min,y_min,x_max,y_max
0,glass,batch_1/000006.jpg,517.0,127.0,447.0,1322.0
1,cardboard,batch_1/000008.jpg,1.0,457.0,1429.0,1519.0
2,cardboard,batch_1/000008.jpg,531.0,292.0,1006.0,672.0
3,plastic,batch_1/000010.jpg,632.0,987.0,500.0,374.0
4,plastic,batch_1/000010.jpg,632.0,989.0,44.0,51.0


In [19]:
# Calculate maximum x and maximum y points
df['x_max'] = df['x_max']+df['x_min'] 
df['y_max'] = df['y_max']+df['y_min']
# Convert float columns to integer
for col in  df.columns[2:]:
    df[col] = df[col].astype(int)

#Add padding to the bounding boxes
padding = 20
df['x_min'] = df['x_min'] - padding
df['y_min'] = df['y_min'] - padding
df['x_max'] = df['x_max'] + padding
df['y_max'] = df['y_max'] + padding
df.head()

Unnamed: 0,category,filename,x_min,y_min,x_max,y_max
0,glass,batch_1/000006.jpg,497,107,984,1469
1,cardboard,batch_1/000008.jpg,-19,437,1450,1996
2,cardboard,batch_1/000008.jpg,511,272,1557,984
3,plastic,batch_1/000010.jpg,612,967,1152,1381
4,plastic,batch_1/000010.jpg,612,969,696,1060


In [20]:
df.isnull().values.any()

False

In [21]:
df.to_csv(TACO_path +'InitialData.csv',index=False)

In [22]:
# path of the folder containing the original images 
inPath = TACO_path+'data' 
# path of the folder that will contain the cropped image 
#must create this folder locally!
outPath = TACO_path+'trainDataTACO'

In [23]:
# Reset Dataframe Index
df.reset_index(inplace = True , drop = True)

In [24]:
df.head(2)

Unnamed: 0,category,filename,x_min,y_min,x_max,y_max
0,glass,batch_1/000006.jpg,497,107,984,1469
1,cardboard,batch_1/000008.jpg,-19,437,1450,1996


In [25]:
# Save cropped images in a new directory
for ind in df.index:
    bbox = (df['x_min'][ind],df['y_min'][ind],df['x_max'][ind],df['y_max'][ind])
    imagePath = os.path.join(inPath+'/'+df['filename'][ind])
    img = Image.open(imagePath)
    img = img.crop(bbox)
    imageName = df["filename"][ind].split('/')[0]+df["filename"][ind].split('/')[1]
    imageName = imageName[-4]
    folder_name = df["category"][ind]
    croppedImagePath = outPath +'/'+ folder_name +"/"+ imageName +'cropped'+'.jpg'
    img.save(croppedImagePath)

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created successfully
Images created succe

In [36]:
def save_cropped_TACO():
    with open(annotations_path, 'r') as f:
        dataset = json.loads(f.read())
    categories = dataset['categories']
    anns = dataset['annotations']
    imgs = dataset['images']
    nr_annotations = len(anns)
    category_conversion = {}
    category_conversion['metal'] = [0, 8, 10, 11, 12, 28]
    category_conversion['cardboard'] = [13, 14, 15, 16, 17, 18, 19, 20]
    category_conversion['glass'] = [6, 9, 23, 26]
    category_conversion['paper'] = [21, 30, 31, 32, 33, 34]
    category_conversion['plastic'] = [4, 5, 7, 24, 27, 43, 44, 47, 49, 55]
    category_conversion['trash'] = [1, 2, 3, 22, 25, 29, 35, 36, 37,
                                38, 39, 40, 41, 42, 45, 46, 48, 50, 51, 52, 53, 54, 57, 58, 59]
    cropping_df = pd.DataFrame(columns=["image_id", "cat_name"])
    for ann in range(nr_annotations):
        for cat_name, type_nums in category_conversion.items():
            if anns[ann]["category_id"] in type_nums:
                cropping_df = cropping_df.append({"image_id": anns[ann]["image_id"], "segmentation": anns[ann]["segmentation"], "area": anns[ann]
                                                ["area"], "iscrowd": anns[ann]["iscrowd"], "bbox": anns[ann]["bbox"], "cat_name": cat_name}, ignore_index=True)
    file_name = []
    for image_id in cropping_df["image_id"]:
        for img in imgs:
            if img["id"] == image_id:
                file_name.append(img["file_name"])
    file_name = pd.Series(file_name)
    cropping_df["file_name"] = file_name
    df = pd.concat([cropping_df, cropping_df['bbox'].apply(pd.Series)], axis=1)
    df.columns = ['image_id', 'category', 'area', 'bbox', 'iscrowd', 'segmentation',
                  'filename', 'x_min', 'y_min', 'x_max', 'y_max']  # x_max : width and y_max : height
    df = df.drop(['bbox', "image_id", "area", "iscrowd", "segmentation"], axis=1)
    # Calculate maximum x and maximum y points
    df['x_max'] = df['x_max']+df['x_min']
    df['y_max'] = df['y_max']+df['y_min']
    # Convert float columns to integer
    for col in df.columns[2:]:
        df[col] = df[col].astype(int)
    #Add padding to the bounding boxes
    padding = 20
    df['x_min'] = df['x_min'] - padding
    df['y_min'] = df['y_min'] - padding
    df['x_max'] = df['x_max'] + padding
    df['y_max'] = df['y_max'] + padding
    df.to_csv(TACO_path + 'InitialData.csv', index=False)
    # path of the folder containing the original images
    inPath = TACO_path+'data'
    # path of the folder that will contain the cropped image
    #must create trainDataTACO folder locally! It will rest inside  TACO.
    #must also create subfolders inside trainDataTACO with names of all cetegories
    outPath = TACO_path+'trainDataTACO'
    df.reset_index(inplace=True, drop=True)
    # Save cropped images in a new directory
    for ind in df.index:
        bbox = (df['x_min'][ind], df['y_min'][ind],
                df['x_max'][ind], df['y_max'][ind])
        imagePath = os.path.join(inPath+'/'+df['filename'][ind])
        img = Image.open(imagePath)
        img = img.crop(bbox)
        imageName = df["filename"][ind].split(
            '/')[0]+df["filename"][ind].split('/')[1]
        imageName = imageName[:-4]
        print(imageName)
        folder_name = df["category"][ind]
        croppedImagePath = outPath + '/' + folder_name + \
            "/" + imageName + 'cropped'+'.jpg'
        img.save(croppedImagePath)


In [37]:
save_cropped_TACO()

batch_1000006
batch_1000008
batch_1000008
batch_1000010
batch_1000010
batch_1000019
batch_1000019
batch_1000019
batch_1000019
batch_1000026
batch_1000047
batch_1000047
batch_1000047
batch_1000047
batch_1000047
batch_1000047
batch_1000055
batch_1000001
batch_1000001
batch_1000005
batch_1000007
batch_1000007
batch_1000007
batch_1000012
batch_1000012
batch_1000014
batch_1000014
batch_1000014
batch_1000014
batch_1000014
batch_1000014
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000048
batch_1000053
batch_1000053
batch_1000053
batch_1000053
batch_1000056
batch_1000058
batch_1000058
batch_1000058
batch_1000060
batch_1000060
batch_1000003
batch_1000011
batch_1000032
batch_1000032
batch_1000040
batch_1000040
batch_1000040
batch_1000040
batch_1000043
batch_1000043
batch_1000049
batch_1000049
batch_1000054
batch_1000054
batch_1000054
batch_1000054
batch_1000061
batch_1000021
batch_1000021
batch_1000022
batch_

KeyboardInterrupt: 