In [1]:
import pandas as pd

import os
import json
from pprint import pprint

import tree_algo as ta

In [2]:
DATA_FOLDER = '/home/bala/Desktop/sri_krishna/computer_vision/data/'

IMAGES_PATH = {'train': f'{DATA_FOLDER}/images/train', 'val': f'{DATA_FOLDER}/images/val'}

BOXES_PATH = {'train': f'{DATA_FOLDER}/object_detection/boxes/challenge-2019-train-detection-bbox.csv', 
            'val': f'{DATA_FOLDER}/object_detection/boxes/challenge-2019-validation-detection-bbox.csv'} 

LABELS_PATH = {'train': f'{DATA_FOLDER}/object_detection/labels/challenge-2019-train-detection-human-imagelabels.csv', 
            'val': f'{DATA_FOLDER}/object_detection/labels/challenge-2019-validation-detection-human-imagelabels.csv'}

METADATA = {'classes': f'{DATA_FOLDER}/object_detection/metadata/challenge-2019-classes-description-500.csv',
            'class_hierarchy': f'{DATA_FOLDER}/object_detection/metadata/challenge-2019-label500-hierarchy.json'}



In [3]:
classes = pd.read_csv(METADATA['classes'])
print(classes.head(5), '\n\n')

data = None
with open(METADATA['class_hierarchy'], 'r') as file:
    data = json.load(file)

# Now 'data' contains the contents of the JSON file
pprint(data)

   LabelCode   LabelName
0  /m/061hd_  Infant bed
1   /m/06m11        Rose
2   /m/03120        Flag
3  /m/01kb5b  Flashlight
4  /m/0120dh  Sea turtle 


{'LabelName': '/m/0bl9f',
 'Subcategory': [{'LabelName': '/m/0242l'},
                 {'LabelName': '/m/03120'},
                 {'LabelName': '/m/0h8l4fh'},
                 {'LabelName': '/m/0138tl',
                  'Subcategory': [{'LabelName': '/m/0167gd'},
                                  {'LabelName': '/m/01j51'},
                                  {'LabelName': '/m/029b3'},
                                  {'LabelName': '/m/02zt3'},
                                  {'LabelName': '/m/0kmg4'}]},
                 {'LabelName': '/m/019dx1',
                  'Subcategory': [{'LabelName': '/m/0174k2'},
                                  {'LabelName': '/m/01k6s3'},
                                  {'LabelName': '/m/029bxz'},
                                  {'LabelName': '/m/02pjr4'},
                                  {'LabelNa

In [4]:
tree = ta.json_to_tree(data)
print('Max depth = ', tree.max_depth(), '\n')
print('No of nodes = ', tree.no_of_nodes(), '\n')
print(tree, '\n')

labeled_tree = ta.replace_labels_in_tree(tree, classes)
print(labeled_tree)

Max depth =  5 

No of nodes =  521 

/m/0bl9f
│   /m/0242l
│   /m/03120
│   /m/0h8l4fh
│   /m/0138tl
│   │   /m/0167gd
│   │   /m/01j51
│   │   /m/029b3
│   │   /m/02zt3
│       /m/0kmg4
│   /m/019dx1
│   │   /m/0174k2
│   │   /m/01k6s3
│   │   /m/029bxz
│   │   /m/02pjr4
│   │   /m/02wv84t
│   │   /m/02x984l
│   │   /m/03s_tn
│   │   /m/040b_t
│   │   /m/04169hn
│   │   /m/063rgb
│   │   /m/07xyvk
│   │   /m/0fx9l
│   │   /m/0llzx
│       /m/03ldnb
│   /m/02pkr5
│   │   /m/0130jx
│   │   /m/01vbnl
│   │   /m/02f9f_
│   │   /m/02jz0l
│   │   /m/03dnzn
│       /m/09g1w
│   /m/02rdsp
│   │   /m/01lsmm
│   │   /m/01n5jq
│   │   /m/025dyy
│   │   /m/02d9qx
│   │   /m/03m3vtv
│   │   /m/04zwwv
│   │   /m/05gqfk
│   │   /m/09gtd
│   │   /m/0frqm
│       /m/0k1tl
│   /m/02w3r3
│   /m/034c16
│   /m/02d1br
│   /m/02pdsw
│   /m/03v5tg
│   /m/07v9_z
│   /m/04brg2
│   │   /m/01_5g
│   │   /m/01fh4r
│   │   /m/02jvh9
│   │   /m/02p5f1q
│   │   /m/02x8cch
│   │   /m/03q5c7
│   │   /m/04dr76w
│   │ 

In [5]:
for i in range(6):
    dec_tree, boundary_nodes = ta.decompose_tree_at_boundary_nodes(labeled_tree, depth=i)
    print(f'No. of classes for depth {i} = ', len(boundary_nodes), '\n')

No. of classes for depth 0 =  1 

No. of classes for depth 1 =  177 

No. of classes for depth 2 =  379 

No. of classes for depth 3 =  447 

No. of classes for depth 4 =  461 

No. of classes for depth 5 =  462 



In [8]:

# Shows use the decomposed tree as per the desired implmented algorithm, and some releated information
#"""
dec_tree, boundary_nodes = ta.decompose_tree_at_boundary_nodes(labeled_tree, depth=2)
#print('No. of classes = ', len(boundary_nodes), '\n')
#print('No. of children of root = ', len(dec_tree.get_child_labels()), '\n')
#print('No. of leaves of the tree = ', len(ta.collect_leaf_nodes(dec_tree)), '\n')
print(dec_tree, '\n\n\n\n')
for node in boundary_nodes:
    print(node)
#"""

print()


# A rough check to see if the boundary nodes function working correctly, by only checking if the leaf children of root are in boundary nodes list
"""
boundary_nodes = ta.collect_boundary_nodes(tree, DEPTH)
b_labels = []
for node in boundary_nodes:
    b_labels.append(node.label)

print(b_labels, '\n')

leaves = []
for node in tree.children:
    if not node.children:
        leaves.append(node.label)

print(leaves, '\n')

for element in leaves:
        if element not in b_labels:
            print('False')
print('True')

#"""

print()

/m/0bl9f
│   Coin
│   Flag
│   Light bulb
│   Toy
│   │   Doll
│   │   Balloon
│   │   Dice
│   │   Kite
│       Teddy bear
│   Home appliance
│   │   Washing machine
│   │   Toaster
│   │   Oven
│   │   Blender
│   │   Gas stove
│   │   Mechanical fan
│   │   Kettle
│   │   Refrigerator
│   │   Wood-burning stove
│   │   Mixer
│   │   Coffeemaker
│   │   Microwave oven
│   │   Sewing machine
│       Ceiling fan
│   Plumbing fixture
│   │   Sink
│   │   Bidet
│   │   Shower
│   │   Tap
│   │   Bathtub
│       Toilet
│   Office supplies
│   │   Scissors
│   │   Poster
│   │   Box
│   │   Whiteboard
│   │   Adhesive tape
│   │   Ring binder
│   │   Plastic bag
│   │   Toilet paper
│   │   Envelope
│       Pen
│   Paper towel
│   Pillow
│   Spatula
│   Cutting board
│   Drinking straw
│   Measuring cup
│   Tableware
│   │   Chopsticks
│   │   Teapot
│   │   Mug
│   │   Coffee cup
│   │   Salt and pepper shakers
│   │   Saucer
│   │   Bottle
│   │   Bowl
│   │   Plate
│   │   Pitcher
│   │

In [7]:
DEPTH = 1

In [8]:
reverse_mapping = ta.reverse_mapper(tree, DEPTH)
print(len(reverse_mapping), '\n')
pprint(reverse_mapping)
print()

mapping = ta.label_reverse_mapping(reverse_mapping, classes)
print(len(mapping), '\n')
pprint(mapping)
print()

labelcode_dict = ta.class_no_mapper(reverse_mapping)
print('No of classes = ', len(labelcode_dict), '\n')
pprint(labelcode_dict)

labelcode_to_labelname = {row["LabelCode"]: row["LabelName"] for _, row in classes.iterrows()}
labelname_dict = {}
for key, value in labelcode_dict.items():
    labelname_dict[value] = labelcode_to_labelname[key]
pprint(labelname_dict)

500 

{'/m/011k07': '/m/0jbk',
 '/m/0120dh': '/m/0jbk',
 '/m/01226z': '/m/018xm',
 '/m/012n7d': '/m/07yv9',
 '/m/012w5l': '/m/012w5l',
 '/m/0130jx': '/m/02pkr5',
 '/m/0138tl': '/m/0138tl',
 '/m/013y1f': '/m/04szw',
 '/m/014j1m': '/m/02xwb',
 '/m/014sv8': '/m/014sv8',
 '/m/014y4n': '/m/014y4n',
 '/m/0152hh': '/m/06msq',
 '/m/01599': '/m/0271t',
 '/m/015h_t': '/m/015h_t',
 '/m/015p6': '/m/0jbk',
 '/m/015qff': '/m/015qff',
 '/m/015wgc': '/m/015wgc',
 '/m/015x4r': '/m/0f4s2w',
 '/m/015x5n': '/m/0f4s2w',
 '/m/0162_1': '/m/0162_1',
 '/m/0167gd': '/m/0138tl',
 '/m/016m2d': '/m/016m2d',
 '/m/0174k2': '/m/019dx1',
 '/m/0174n1': '/m/0174n1',
 '/m/0175cv': '/m/0jbk',
 '/m/0176mf': '/m/0176mf',
 '/m/017ftj': '/m/02w3_ws',
 '/m/018p4k': '/m/07yv9',
 '/m/018xm': '/m/018xm',
 '/m/01940j': '/m/0hf58v5',
 '/m/0199g': '/m/07yv9',
 '/m/019dx1': '/m/019dx1',
 '/m/019h78': '/m/0jbk',
 '/m/019jd': '/m/07yv9',
 '/m/019w40': '/m/019w40',
 '/m/01_5g': '/m/04brg2',
 '/m/01b638': '/m/09j5n',
 '/m/01b7fy': '/m/01

In [9]:
class_mapping = ta.class_mapper(tree, DEPTH)
pprint(class_mapping)

{'/m/011k07': 124,
 '/m/0120dh': 124,
 '/m/01226z': 21,
 '/m/012n7d': 54,
 '/m/012w5l': 172,
 '/m/0130jx': 35,
 '/m/0138tl': 47,
 '/m/013y1f': 134,
 '/m/014j1m': 86,
 '/m/014sv8': 169,
 '/m/014y4n': 149,
 '/m/0152hh': 122,
 '/m/01599': 87,
 '/m/015h_t': 173,
 '/m/015p6': 124,
 '/m/015qff': 57,
 '/m/015wgc': 113,
 '/m/015x4r': 144,
 '/m/015x5n': 144,
 '/m/0162_1': 8,
 '/m/0167gd': 47,
 '/m/016m2d': 139,
 '/m/0174k2': 74,
 '/m/0174n1': 45,
 '/m/0175cv': 124,
 '/m/0176mf': 163,
 '/m/017ftj': 9,
 '/m/018p4k': 54,
 '/m/018xm': 21,
 '/m/01940j': 85,
 '/m/0199g': 54,
 '/m/019dx1': 74,
 '/m/019h78': 124,
 '/m/019jd': 54,
 '/m/019w40': 75,
 '/m/01_5g': 92,
 '/m/01b638': 68,
 '/m/01b7fy': 72,
 '/m/01b9xk': 94,
 '/m/01bfm9': 0,
 '/m/01bjv': 54,
 '/m/01bl7v': 32,
 '/m/01bms0': 151,
 '/m/01bqk0': 41,
 '/m/01btn': 54,
 '/m/01c648': 121,
 '/m/01cmb2': 60,
 '/m/01d40f': 119,
 '/m/01dws': 124,
 '/m/01dwsz': 34,
 '/m/01dwwc': 1,
 '/m/01dxs': 124,
 '/m/01dy8n': 124,
 '/m/01f8m5': 124,
 '/m/01f91_': 27,
 

In [10]:
# THIS SHOWS THE CLASS LABELING SCHEME
#"""
labelcode_to_labelname = {row["LabelCode"]: row["LabelName"] for _, row in classes.iterrows()}
mapping_scheme = class_mapping.copy()

for key, value in mapping_scheme.items():
    mapping_scheme[key] = [value, labelcode_to_labelname[key]]
    
pprint(mapping_scheme)
#"""

print()

{'/m/011k07': [124, 'Tortoise'],
 '/m/0120dh': [124, 'Sea turtle'],
 '/m/01226z': [21, 'Football'],
 '/m/012n7d': [54, 'Ambulance'],
 '/m/012w5l': [172, 'Ladder'],
 '/m/0130jx': [35, 'Sink'],
 '/m/0138tl': [47, 'Toy'],
 '/m/013y1f': [134, 'Organ'],
 '/m/014j1m': [86, 'Apple'],
 '/m/014sv8': [169, 'Human eye'],
 '/m/014y4n': [149, 'Paddle'],
 '/m/0152hh': [122, 'Snowman'],
 '/m/01599': [87, 'Beer'],
 '/m/015h_t': [173, 'Human beard'],
 '/m/015p6': [124, 'Bird'],
 '/m/015qff': [57, 'Traffic light'],
 '/m/015wgc': [113, 'Croissant'],
 '/m/015x4r': [144, 'Cucumber'],
 '/m/015x5n': [144, 'Radish'],
 '/m/0162_1': [8, 'Towel'],
 '/m/0167gd': [47, 'Doll'],
 '/m/016m2d': [139, 'Skull'],
 '/m/0174k2': [74, 'Washing machine'],
 '/m/0174n1': [45, 'Glove'],
 '/m/0175cv': [124, 'Tick'],
 '/m/0176mf': [163, 'Belt'],
 '/m/017ftj': [9, 'Sunglasses'],
 '/m/018p4k': [54, 'Cart'],
 '/m/018xm': [21, 'Ball'],
 '/m/01940j': [85, 'Backpack'],
 '/m/0199g': [54, 'Bicycle'],
 '/m/019dx1': [74, 'Home appliance'],

In [12]:

classes = pd.read_csv(METADATA['classes'])

train_images = os.listdir(IMAGES_PATH['train'])
train_images = [f[:-4] for f in train_images]
train_boxes = pd.read_csv(BOXES_PATH['train'])
train_boxes = train_boxes[train_boxes['ImageID'].isin(train_images)]
train_boxes = train_boxes[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']]

labelcode_to_labelname = {row["LabelCode"]: row["LabelName"] for _, row in classes.iterrows()}

sample_label_codes = list(train_boxes['LabelName'].values)

sample_class_names = []
for code in sample_label_codes:
    sample_class_names.append(labelcode_to_labelname[code])

data = None
with open(METADATA['class_hierarchy'], 'r') as file:
    data = json.load(file)

DEPTH = 1

tree = ta.json_to_tree(data)
reverse_mapping = ta.reverse_mapper(tree, DEPTH)
labelcode_dict = ta.class_no_mapper(reverse_mapping)

labelname_dict = {}
for key, value in labelcode_dict.items():
    labelname_dict[value] = labelcode_to_labelname[key]

class_mapping = ta.class_mapper(tree, DEPTH)

sample_class_no = []
sample_mapped_label_name = []
for code in sample_label_codes:
    sample_class_no.append(class_mapping[code])
    sample_mapped_label_name.append(labelname_dict[class_mapping[code]])

train_boxes['LabelNumber'] = sample_class_no
train_boxes['ActualLabelName'] = sample_class_names
train_boxes['MappedLabelName'] = sample_mapped_label_name
train_boxes['Width'] = train_boxes['XMax'] - train_boxes['XMin']
train_boxes['Height'] = train_boxes['YMax'] - train_boxes['YMin']
train_boxes = train_boxes[['ImageID', 'LabelName', 'ActualLabelName', 'MappedLabelName', 'LabelNumber', 'XMin', 'YMin', 'Width', 'Height', 'XMax', 'YMax']]
train_boxes

Unnamed: 0,ImageID,LabelName,ActualLabelName,MappedLabelName,LabelNumber,XMin,YMin,Width,Height,XMax,YMax
3553109,4a3bc1ec836b64c4,/m/01g317,Person,Person,32,0.000000,0.753333,0.026667,0.243334,0.026667,0.996667
3553110,4a3bc1ec836b64c4,/m/01g317,Person,Person,32,0.000000,0.536667,0.064444,0.153333,0.064444,0.690000
3553111,4a3bc1ec836b64c4,/m/01g317,Person,Person,32,0.000000,0.200000,0.160000,0.253333,0.160000,0.453333
3553112,4a3bc1ec836b64c4,/m/01g317,Person,Person,32,0.006667,0.840000,0.155555,0.156667,0.162222,0.996667
3553113,4a3bc1ec836b64c4,/m/01g317,Person,Person,32,0.013333,0.613333,0.073334,0.080000,0.086667,0.693333
...,...,...,...,...,...,...,...,...,...,...,...
11791545,4fffac0dbd0601e4,/m/04yx4,Man,Person,32,0.000000,0.264815,0.180690,0.734259,0.180690,0.999074
11791546,4fffac0dbd0601e4,/m/0fly7,Jeans,Trousers,64,0.000000,0.779630,0.071724,0.219444,0.071724,0.999074
11791547,4fffac0dbd0601e4,/m/0fly7,Jeans,Trousers,64,0.213793,0.834259,0.532414,0.162963,0.746207,0.997222
11791548,4ffffc38c11da155,/m/04yx4,Man,Person,32,0.129688,0.334025,0.239062,0.545643,0.368750,0.879668


In [13]:
train_images

['418c90f0cbc40030',
 '44491282db7cc1f6',
 '4d2da2d26feee210',
 '45b8f7ae2a927c63',
 '4931bd5101076128',
 '4e48f590e5a49045',
 '4aa1c74d5f43cac9',
 '4cefd5a1c8049eae',
 '4a57da3261efb98a',
 '4279f9cfbac65946',
 '4fc51aafa4707f8d',
 '470df664c316c74b',
 '4b1e9805267547c2',
 '447baf6afc93ba7d',
 '4ee0704f65a96730',
 '4c6284052fd3c4f7',
 '4387f7d1f674e4c6',
 '452c8a735864b674',
 '48dce5c300483cc9',
 '4926d67d3f10d857',
 '45f138cf5de685cc',
 '48f62bfb2cf96c23',
 '45ad480332559875',
 '4fcf6aa5d058e6d4',
 '42e7c2cebb2eea2c',
 '41eda4ec62fc2c61',
 '4972cc825c74a533',
 '498b26d66e0ce5f8',
 '486af1699b5f8eff',
 '4596e7b4a3a15d36',
 '497455528693e9d5',
 '46240cfd33ef841f',
 '41276780bd70132a',
 '4a3e194c715e3880',
 '46a8dd91976274b2',
 '4f30c6b081eaf6e6',
 '49986a989c74d0fd',
 '436124616fcb0722',
 '40c6b850c2c813d3',
 '402dd3a63c1de436',
 '46710392a1f23cda',
 '4b043d316250bfd9',
 '49c4bd84f0ff1724',
 '4f07ab37fd8b9a9c',
 '4a204a2e6b6e6486',
 '4a78f1223abebea5',
 '476a02025498370e',
 '4b6c34e03e5

In [12]:
path = train_images[0]
file = train_boxes[train_boxes['ImageID'].isin([path])]
file = file[['ImageID', 'MappedLabelName', 'LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
txt_file = file[['LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
file

Unnamed: 0,ImageID,ActualLabelName,LabelNumber,XMin,YMin,Width,Height
5000120,418c90f0cbc40030,Woman,112,0.165,0.273333,0.2,0.7025
5000121,418c90f0cbc40030,Man,112,0.02875,0.2175,0.159375,0.655833
5000122,418c90f0cbc40030,Suit,128,0.0,0.305,0.073125,0.544167
5000123,418c90f0cbc40030,Woman,112,0.40875,0.308333,0.1825,0.485834
5000124,418c90f0cbc40030,Woman,112,0.565,0.314167,0.160625,0.6625
5000125,418c90f0cbc40030,Woman,112,0.75875,0.415833,0.199375,0.463334
5000126,418c90f0cbc40030,Man,112,0.000625,0.198333,0.075,0.68
5000127,418c90f0cbc40030,Man,112,0.764375,0.41,0.206875,0.1925
5000128,418c90f0cbc40030,Footwear,4,0.0,0.8475,0.013125,0.031667
5000129,418c90f0cbc40030,Footwear,4,0.008125,0.835833,0.05125,0.04


In [None]:
for path in train_images:
    file = train_boxes[train_boxes['ImageID'].isin([path])]
    file = file[['ImageID', 'ActualLabelName', 'MappedLabelName', 'LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
    txt_file = file[['LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
    #txt_file.to_csv(fr'{DATA_FOLDER}/object_detection/boxes/train/depth_{DEPTH}/{path}.txt', sep=' ', index = False, header=False)

In [None]:

classes = pd.read_csv(METADATA['classes'])

train_images = os.listdir(IMAGES_PATH['train'])
train_images = [f[:-4] for f in train_images]
train_boxes = pd.read_csv(BOXES_PATH['train'])
train_boxes = train_boxes[train_boxes['ImageID'].isin(train_images)]
train_boxes = train_boxes[['ImageID', 'LabelName', 'XMin', 'XMax', 'YMin', 'YMax']]

labelcode_to_labelname = {row["LabelCode"]: row["LabelName"] for _, row in classes.iterrows()}

sample_label_codes = list(train_boxes['LabelName'].values)
distinct_values = list(set(sample_label_codes))

sample_class_names = []
for code in sample_label_codes:
    sample_class_names.append(labelcode_to_labelname[code])

data = None
with open(METADATA['class_hierarchy'], 'r') as file:
    data = json.load(file)

DEPTH = 1

tree = ta.json_to_tree(data)
reverse_mapping = ta.reverse_mapper(tree, DEPTH)
labelcode_dict = ta.class_no_mapper(reverse_mapping)

labelname_dict = {}
for key, value in labelcode_dict.items():
    labelname_dict[value] = labelcode_to_labelname[key]

class_mapping = ta.class_mapper(tree, DEPTH)

sample_class_no = []
sample_mapped_label_name = []
for code in sample_label_codes:
    sample_class_no.append(class_mapping[code])
    sample_mapped_label_name.append(labelname_dict[class_mapping[code]])

train_boxes['LabelNumber'] = sample_class_no
train_boxes['ActualLabelName'] = sample_class_names
train_boxes['MappedLabelName'] = sample_mapped_label_name
train_boxes['Width'] = train_boxes['XMax'] - train_boxes['XMin']
train_boxes['Height'] = train_boxes['YMax'] - train_boxes['YMin']
train_boxes = train_boxes[['ImageID', 'LabelName', 'ActualLabelName', 'MappedLabelName', 'LabelNumber', 'XMin', 'YMin', 'Width', 'Height', 'XMax', 'YMax']]

for path in train_images:
    file = train_boxes[train_boxes['ImageID'].isin([path])]
    file = file[['ImageID', 'ActualLabelName', 'MappedLabelName', 'LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
    txt_file = file[['LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
    #txt_file.to_csv(fr'{DATA_FOLDER}/object_detection/boxes/train/depth_{DEPTH}/{path}.txt', sep=' ', index = False, header=False)