In [5]:
import pandas as pd

import os
import json

import tree_algo as ta

In [6]:
DATA_FOLDER = '/home/bala/Desktop/sri_krishna/computer_vision/data/'

IMAGES_PATH = {'train': f'{DATA_FOLDER}/images/train', 'val': f'{DATA_FOLDER}/images/val'}

BOXES_PATH = {'train': f'{DATA_FOLDER}/object_detection/boxes/challenge-2019-train-detection-bbox.csv', 
            'val': f'{DATA_FOLDER}/object_detection/boxes/challenge-2019-validation-detection-bbox.csv'} 

LABELS_PATH = {'train': f'{DATA_FOLDER}/object_detection/labels/challenge-2019-train-detection-human-imagelabels.csv', 
            'val': f'{DATA_FOLDER}/object_detection/labels/challenge-2019-validation-detection-human-imagelabels.csv'}

METADATA = {'classes': f'{DATA_FOLDER}/object_detection/metadata/challenge-2019-classes-description-500.csv',
            'class_hierarchy': f'{DATA_FOLDER}/object_detection/metadata/challenge-2019-label500-hierarchy.json'}

classes = pd.read_csv(METADATA['classes'])

data = None
with open(METADATA['class_hierarchy'], 'r') as file:
    data = json.load(file)

LABELCODE_TO_LABELNAME = {row["LabelCode"]: row["LabelName"] for _, row in classes.iterrows()}

TREE = ta.json_to_tree(data)



In [7]:
Labelcodes = list(classes['LabelCode'].values)

full_class_mapping = {value: i for i, value in enumerate(Labelcodes)}

full_class_mapping

{'/m/061hd_': 0,
 '/m/06m11': 1,
 '/m/03120': 2,
 '/m/01kb5b': 3,
 '/m/0120dh': 4,
 '/m/0dv5r': 5,
 '/m/0jbk': 6,
 '/m/0174n1': 7,
 '/m/09f_2': 8,
 '/m/01xq0k1': 9,
 '/m/03jm5': 10,
 '/m/02g30s': 11,
 '/m/05z6w': 12,
 '/m/01jfm_': 13,
 '/m/076lb9': 14,
 '/m/0gj37': 15,
 '/m/0k0pj': 16,
 '/m/0kpqd': 17,
 '/m/0l14j_': 18,
 '/m/0cyf8': 19,
 '/m/0174k2': 20,
 '/m/0dq75': 21,
 '/m/076bq': 22,
 '/m/07crc': 23,
 '/m/0d8zb': 24,
 '/m/0fszt': 25,
 '/m/0k1tl': 26,
 '/m/020kz': 27,
 '/m/09728': 28,
 '/m/07j7r': 29,
 '/m/0fbdv': 30,
 '/m/03ssj5': 31,
 '/m/03qrc': 32,
 '/m/02dl1y': 33,
 '/m/01k6s3': 34,
 '/m/02jfl0': 35,
 '/m/01krhy': 36,
 '/m/04kkgm': 37,
 '/m/0ft9s': 38,
 '/m/0d_2m': 39,
 '/m/0czz2': 40,
 '/m/0f4s2w': 41,
 '/m/07dd4': 42,
 '/m/0cgh4': 43,
 '/m/03bbps': 44,
 '/m/02pjr4': 45,
 '/m/04p0qw': 46,
 '/m/02pdsw': 47,
 '/m/01yx86': 48,
 '/m/09dzg': 49,
 '/m/0hkxq': 50,
 '/m/07dm6': 51,
 '/m/054_l': 52,
 '/m/01dws': 53,
 '/m/027pcv': 54,
 '/m/01d40f': 55,
 '/m/02rgn06': 56,
 '/m/0342h': 57

In [8]:

def load_data(mode):
    
    list_of_images_paths = os.listdir(IMAGES_PATH[mode])
    list_of_images_paths = [file_name[:-4] for file_name in list_of_images_paths]

    labeling_data = pd.read_csv(BOXES_PATH[mode])

    labeling_data = labeling_data[labeling_data['ImageID'].isin(list_of_images_paths)]
    labeling_data['Width'] = labeling_data['XMax'] - labeling_data['XMin']
    labeling_data['Height'] = labeling_data['YMax'] - labeling_data['YMin']

    sample_label_codes = list(labeling_data['LabelName'].values)

    labeling_data = labeling_data[['ImageID', 'LabelName', 'XMin', 'YMin', 'Width', 'Height']]
    
    return list_of_images_paths, labeling_data, sample_label_codes



def preprare_data(data_folder, mode, tree, depth, labelcode_to_labelname, list_of_images_paths, labeling_data, sample_label_codes):
    destination_root_folder = fr'{data_folder}/object_detection/labels/'
    
    reverse_mapping = ta.reverse_mapper(tree, depth)
    labelcode_numbering = ta.class_no_mapper(reverse_mapping)
    class_mapping = ta.class_mapper(reverse_mapping=reverse_mapping, class_no_mapping=labelcode_numbering)
    
    labelname_numbering = {}
    for key, value in labelcode_numbering.items():
        labelname_numbering[value] = labelcode_to_labelname[key]
        
    sample_class_no = []
    for code in sample_label_codes:
        sample_class_no.append(class_mapping[code])
    
    labeling_data['LabelNumber'] = sample_class_no
    labeling_data = labeling_data[['ImageID', 'LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
    
    #return labeling_data, labelname_numbering
    labelname_numbering_path = fr'{destination_root_folder}/class_numbering_scheme/depth_{depth}/class_numbering.json'
    if not os.path.exists(labelname_numbering_path):
        with open(labelname_numbering_path, 'w') as file:
            json.dump(labelname_numbering, file)
    
    index = 0
    total_images = len(list_of_images_paths)
    for path in list_of_images_paths:
        file = labeling_data[labeling_data['ImageID'].isin([path])]
        txt_file = file[['LabelNumber', 'XMin', 'YMin', 'Width', 'Height']]
        txt_file.to_csv(fr'{destination_root_folder}/{mode}/depth_{depth}/{path}.txt', sep=' ', index = False, header=False)
        index += 1
        
        if index % 10000 == 0 or index == total_images-1:
            print(f'            {index} images done')




In [4]:

for mode in ['val', 'train']:
    print(f'Processing {mode} data')
    list_of_images_paths, labeling_data, sample_label_codes = load_data(mode)
    print('    Data loaded')
    
    for depth in range(1, 6):
        print(f'        Processing depth ---- {depth}')
        preprare_data(DATA_FOLDER, mode, TREE, depth, LABELCODE_TO_LABELNAME, list_of_images_paths, labeling_data, sample_label_codes)
        print(f'        Processing of depth ---- {depth} completed')
    
    print(f'    Processing of {mode} data completed', '\n\n')

Processing val data
    Data loaded
        Processing depth ---- 1
            1000 images done
            2000 images done
            3000 images done
            4000 images done
            5000 images done
            6000 images done
            7000 images done
            8000 images done
            9000 images done
            10000 images done


KeyboardInterrupt: 