## Imports

In [2]:
import json
import pandas as pd

## Change annotations to 5 categories (TACO dataset)

In [3]:
# Load JSON

with open('../data/TACO/annotations.json') as f:
    data_taco = json.load(f)

In [4]:
# Pull dictionary of categories (60)

dict_taco = {}

for x in range(60):
    id = data_taco['categories'][x]['id']
    name = data_taco['categories'][x]['name']
    dict_taco[id] = name

In [5]:
dict_taco

{0: 'Aluminium foil',
 1: 'Battery',
 2: 'Aluminium blister pack',
 3: 'Carded blister pack',
 4: 'Other plastic bottle',
 5: 'Clear plastic bottle',
 6: 'Glass bottle',
 7: 'Plastic bottle cap',
 8: 'Metal bottle cap',
 9: 'Broken glass',
 10: 'Food Can',
 11: 'Aerosol',
 12: 'Drink can',
 13: 'Toilet tube',
 14: 'Other carton',
 15: 'Egg carton',
 16: 'Drink carton',
 17: 'Corrugated carton',
 18: 'Meal carton',
 19: 'Pizza box',
 20: 'Paper cup',
 21: 'Disposable plastic cup',
 22: 'Foam cup',
 23: 'Glass cup',
 24: 'Other plastic cup',
 25: 'Food waste',
 26: 'Glass jar',
 27: 'Plastic lid',
 28: 'Metal lid',
 29: 'Other plastic',
 30: 'Magazine paper',
 31: 'Tissues',
 32: 'Wrapping paper',
 33: 'Normal paper',
 34: 'Paper bag',
 35: 'Plastified paper bag',
 36: 'Plastic film',
 37: 'Six pack rings',
 38: 'Garbage bag',
 39: 'Other plastic wrapper',
 40: 'Single-use carrier bag',
 41: 'Polypropylene bag',
 42: 'Crisp packet',
 43: 'Spread tub',
 44: 'Tupperware',
 45: 'Disposable 

In [6]:
# Instantiate new dictionary of 5 categories

final_categories = {
    0: 'Paper',
    1: 'Plastic',
    2: 'Glass',
    3: 'Metal',
    4: 'Organic',
    5: 'E-Waste',
    6: 'Non-recyclable'
}

In [14]:
# Create new mapping between categories

unique_cat_taco = list(dict_taco.values())
unique_cat_ids = set(key for key, val in dict_taco.items())

matching_cat_final = [
'Non-recyclable',
'E-Waste',
'Non-recyclable',
'Non-recyclable',
'Plastic',
'Plastic',
'Glass',
'Plastic',
'Metal',
'Glass',
'Non-recyclable',
'Metal',
'Metal',
'Paper',
'Paper',
'Paper',
'Paper',
'Paper',
'Paper',
'Paper',
'Paper',
'Plastic',
'Non-recyclable',
'Non-recyclable',
'Plastic',
'Organic',
'Glass',
'Plastic',
'Metal',
'Plastic',
'Paper',
'Non-recyclable',
'Paper',
'Paper',
'Paper',
'Non-recyclable',
'Plastic',
'Plastic',
'Non-recyclable',
'Plastic',
'Plastic',
'Non-recyclable',
'Plastic',
'Plastic',
'Plastic',
'Plastic',
'Non-recyclable',
'Plastic',
'Plastic',
'Plastic',
'Metal',
'Non-recyclable',
'Metal',
'Non-recyclable',
'Non-recyclable',
'Plastic',
'Paper',
'Non-recyclable',
'!!! Remove !!!',
'Non-recyclable'
]



In [16]:
# Concatenate to Dataframe

unique_cat_taco = pd.DataFrame(unique_cat_taco, columns=['Taco category'])

matching_cat_final = pd.DataFrame(matching_cat_final, columns=['New category'])

matching_dict = pd.concat([matching_cat_final,unique_cat_taco], axis=1)


In [84]:
# Inspect final matching dictionary

matching_dict

Unnamed: 0,New category,Taco category
0,Non-recyclable,Aluminium foil
1,E-Waste,Battery
2,Non-recyclable,Aluminium blister pack
3,Non-recyclable,Carded blister pack
4,Plastic,Other plastic bottle
5,Plastic,Clear plastic bottle
6,Glass,Glass bottle
7,Plastic,Plastic bottle cap
8,Metal,Metal bottle cap
9,Glass,Broken glass


In [85]:
# Drop final matching to excel

pd.DataFrame(matching_dict).to_excel('../tables/taco_matching_dict.xlsx')

In [37]:
for index, category in matching_dict['New category'].items():
    print(index, category)

0 Non-recyclable
1 E-Waste
2 Non-recyclable
3 Non-recyclable
4 Plastic
5 Plastic
6 Glass
7 Plastic
8 Metal
9 Glass
10 Non-recyclable
11 Metal
12 Metal
13 Paper
14 Paper
15 Paper
16 Paper
17 Paper
18 Paper
19 Paper
20 Paper
21 Plastic
22 Non-recyclable
23 Non-recyclable
24 Plastic
25 Organic
26 Glass
27 Plastic
28 Metal
29 Plastic
30 Paper
31 Non-recyclable
32 Paper
33 Paper
34 Paper
35 Non-recyclable
36 Plastic
37 Plastic
38 Non-recyclable
39 Plastic
40 Plastic
41 Non-recyclable
42 Plastic
43 Plastic
44 Plastic
45 Plastic
46 Non-recyclable
47 Plastic
48 Plastic
49 Plastic
50 Metal
51 Non-recyclable
52 Metal
53 Non-recyclable
54 Non-recyclable
55 Plastic
56 Paper
57 Non-recyclable
58 !!! Remove !!!
59 Non-recyclable


In [82]:
# Count number of images with category "Unlabeled litter" (category 58)

count_unlabeled = 0

for annotation in data_taco['annotations']:
    if annotation['category_id'] == 58:
        count_unlabeled += 1
        
count_unlabeled  

517

We have a total of 517 annotations that are unlabeled out of 4784. We will have to drop ~11% of the dataset.

In [83]:
# Dropping the "Unlabeled litter"



4784

In [38]:
for index, category in final_categories.items():
    print(index, category)

0 Paper
1 Plastic
2 Glass
3 Metal
4 Organic
5 E-Waste
6 Non-recyclable


In [39]:
# Load JSON

data_taco_new = data_taco.copy()

# Overwrite the "categories" list

data_taco_new['categories'] = []

for index, category in final_categories.items():
    data_taco_new['categories'].append({
        "supercategory": category,
        "id": index,
        "name": category
    })

In [56]:
# Iterate over copy and change category in "annotations"

# Drop to new copy of JSON annotations

list_ids = []

for category in matching_dict['New category']:
    id = dict((new_val,new_k) for new_k,new_val in final_categories.items()).get(category)
    list_ids.append(id)

list_ids

[6,
 5,
 6,
 6,
 1,
 1,
 2,
 1,
 3,
 2,
 6,
 3,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 6,
 6,
 1,
 4,
 2,
 1,
 3,
 1,
 0,
 6,
 0,
 0,
 0,
 6,
 1,
 1,
 6,
 1,
 1,
 6,
 1,
 1,
 1,
 1,
 6,
 1,
 1,
 1,
 3,
 6,
 3,
 6,
 6,
 1,
 0,
 6,
 None,
 6]

In [53]:
final_categories.get('Paper')

In [55]:
dict((new_val,new_k) for new_k,new_val in final_categories.items()).get('Plastic')

1

In [40]:
data_taco_new['categories']

[{'supecategory': 'Paper', 'id': 0, 'name': 'Paper'},
 {'supecategory': 'Plastic', 'id': 1, 'name': 'Plastic'},
 {'supecategory': 'Glass', 'id': 2, 'name': 'Glass'},
 {'supecategory': 'Metal', 'id': 3, 'name': 'Metal'},
 {'supecategory': 'Organic', 'id': 4, 'name': 'Organic'},
 {'supecategory': 'E-Waste', 'id': 5, 'name': 'E-Waste'},
 {'supecategory': 'Non-recyclable', 'id': 6, 'name': 'Non-recyclable'}]

In [31]:
data_taco_new['categories']

[{'test': 0, 'yoyo': 1}]

## Change annotations to 5 categories (Drinking Waste Classification)