In [1]:
import json
import pandas as pd
from tqdm import tqdm_notebook
from IPython.display import HTML, display
import tabulate
import sqlite3
from sqlite3 import Error
import random
import copy
import math


human_labels = ('woman', 'men', 'boy', 'kid', 'child', 'guy', 'man', 'person', 'girl', 'lady', 'people')

In [2]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

In [3]:
def setA_setB(A, B):
    """
    set A - set B
    """
    common_list = []
    diff_set = []
    comm_set = []
    for idx, row in A.iterrows():
        index_list = B[(B['Object_1'] == row['Object_1'])&(B['Relation'] == row['Relation'])&(B['Object_2'] == row['Object_2'])].index.tolist()
        if not index_list:
           diff_set.append(idx) 
        else:
           comm_set.append(idx)
    diff_cp = A.iloc[diff_set]
    com_cp = A.iloc[comm_set]
    return diff_cp, com_cp

def find_quantile(diff_set,q=0.95, max_im=20):
    quantile_95 = diff_set[diff_set.Count > diff_set.Count.quantile(q)]
    quantile_95 = quantile_95.sort_values('Count', ascending=False)
    quantile_95.reset_index(drop=True, inplace=True)
    quantile_95['count_down'] = max_im
#     im_to_add = []
    for index, row in quantile_95.iterrows():
        if len(quantile_95.at[index, 'images']) > quantile_95.at[index, 'count_down']:
            s_images = random.sample(quantile_95.at[index, 'images'], quantile_95.at[index, 'count_down'])
            quantile_95.at[index, 'images'] = s_images
        else:
            s_images = quantile_95.at[index, 'images']
        quantile_95.at[index, 'count_down'] = quantile_95.at[index, 'count_down'] - len(s_images)
#         im_to_add.extend(s_images)

#     im_to_add = list(set(im_to_add))
#     return im_to_add, quantile_95
    return quantile_95

def splite_data(data_df, train_size=50, val_size=10, test_size=10, dt=None):
    check_list = isinstance(data_df, list)
    if check_list:
        all_images = copy.deepcopy(data_df)
    elif not dt:
        all_images = [el.rsplit('/', 1)[1] for el in data_df['file_name'].to_list()]
    else:
        all_images = data_df['file_name'].to_list()
#     data_df = copy.deepcopy(data_df)
#     all_images = list(set([item for sublist in data_df['images'].tolist() for item in sublist]))
    
    train_images = random.sample(all_images, train_size)
    rem_images = list(set(all_images).difference(set(train_images)))
    val_images = random.sample(rem_images, val_size)
    rem_images = list(set(rem_images).difference(set(val_images)))
    test_images = random.sample(rem_images, test_size)
#     test_images = list(set(all_images).difference(set(train_images).union(set(val_images))))
    
#     print(set(train_images).intersection(set(val_images)))

#     assert len(all_images) == len(train_images) + len(val_images) + len(test_images)
    assert len(set(train_images).intersection(set(val_images))) == 0
    assert len(set(train_images).intersection(set(test_images))) == 0
    assert len(set(val_images).intersection(set(test_images))) == 0
    return train_images, val_images, test_images

def concat_df(df_list):
    bigdata = copy.deepcopy(df_list[0])
    for el in df_list[1:]:
        bigdata = bigdata.append(el, ignore_index=True)
    return bigdata

def find_index_of_images(img_df, image_name_list=None):
    index_id_list = []
    for el in image_name_list:
        index_ids = img_df.index[img_df['file_name'].str.contains(el)].tolist()
        image_id = [img_df.at[idx, 'id'] for idx in index_ids]
        if len(image_id) != 1:
            print(image_id)
        assert len(image_id) == 1
        index_id_list.append(index_ids[0])
    return index_id_list

def create_ann_df(ann_df, index_list):
    selected_ann_df = ann_df[ann_df['image_id'].isin([index_list[0] + 1])]
    for el in index_list[1:]:
        selected_ann_df = pd.concat([selected_ann_df,ann_df[ann_df['image_id'].isin([el + 1])]])
    return selected_ann_df

def create_img_df(img_df, index_list):
    selected_img_df = img_df.loc[[index_list[0]]]
    for el in index_list[1:]:
        selected_img_df = pd.concat([selected_img_df,img_df.loc[[el]]])
    return selected_img_df

def update_img_ids(aug_img_df, row=None, dataset='other'):
    last_id = aug_img_df.at[aug_img_df.index[-1], 'id']
    concat_df = pd.concat([aug_img_df, row])
    concat_df.reset_index(drop=True, inplace=True)
    concat_df.at[concat_df.index[-1], 'id'] = last_id + 1
#     if dataset == 'ch':
#         concat_df.at[concat_df.index[-1], 'file_name'] = 'CrowdHuman/Images/' + concat_df.at[concat_df.index[-1], 'file_name']
    return last_id + 1, concat_df

def uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df, dt='other'):
    selected_img_df = selected_img_df.sort_values('id')
    if dt == 'ch':
        selected_ann_df['height'] = ""
        selected_ann_df['width'] = ""
        for idx, row in selected_img_df.iterrows():
            new_image_id, aug_img_df = update_img_ids(aug_img_df, row=selected_img_df.loc[[idx]], dataset='ch')
            
            selected_ann_df.loc[selected_ann_df['image_id'] == idx + 1, 'height'] = aug_img_df.at[new_image_id -1, 'height']
            selected_ann_df.loc[selected_ann_df['image_id'] == idx + 1, 'width'] = aug_img_df.at[new_image_id -1, 'width']
            
            selected_ann_df.loc[selected_ann_df['image_id'] == idx + 1, 'image_id'] = new_image_id

    else:
        for idx, row in selected_img_df.iterrows():
            new_image_id, aug_img_df = update_img_ids(aug_img_df, row=selected_img_df.loc[[idx]])
            if 'image_id' in selected_ann_df.columns:
                selected_ann_df.loc[selected_ann_df['image_id'] == idx + 1, 'image_id'] = new_image_id
    
    return aug_img_df, selected_ann_df

def concat_ann_df(orig_df, add_df):
    ann_df_c = pd.concat([orig_df, add_df])
    ann_df_c.reset_index(drop=True, inplace=True)
    for idx, row in ann_df_c.iterrows():
        if idx + 1 != row['id']:
            ann_df_c.at[idx, 'id'] = idx + 1
    return ann_df_c

In [4]:
def add_area(df):
    df = copy.deepcopy(df)
    df['area'] = ""
    for idx, row in df.iterrows():
        bb = df.loc[df['id'] == idx + 1, 'bbox']
        bb = bb.tolist()[0]
        df.loc[df['id'] == idx + 1, 'area'] = bb[2]*bb[3]
    return df

# create annotatuion file
#### selected data frames :


In [76]:
cp_file = 'datasets/cityperson_train.json'
f = open(cp_file)
cp_data = json.load(f)
# cp_licenses = cp_data['licenses']
cp_categories = cp_data['categories']
cp_images = cp_data['images']
cp_annotations = cp_data['annotations']
cp_img_df = pd.DataFrame(cp_images)
cp_ann_df = pd.DataFrame(cp_annotations)

In [77]:
print(cp_data['categories'])

[{'id': 1, 'name': 'pedestrain', 'supercategory': 'pedestrain'}]


In [78]:
# for idx, row in cp_img_df.iterrows():
#     cp_img_df.at[cp_img_df.index[idx], 'file_name'] = 'CityPersons/' + cp_img_df.at[cp_img_df.index[idx], 'file_name']
aug_img_df = copy.deepcopy(cp_img_df)
aug_img_df

Unnamed: 0,id,file_name,width,height,date_captured,license,coco_url,flickr_url
0,1,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000117_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
1,2,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000080_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
2,3,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000113_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
3,4,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000103_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
4,5,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000063_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
...,...,...,...,...,...,...,...,...
2524,2525,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/tubingen/tubingen_000130_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
2525,2526,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/tubingen/tubingen_000126_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
2526,2527,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/tubingen/tubingen_000045_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
2527,2528,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/tubingen/tubingen_000016_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,


In [79]:
# cp_ann_df = add_area(cp_ann_df)
cp_ann_df

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area
0,1,1,1,True,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72.0
1,2,1,1,True,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540.0
2,3,1,1,True,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325.0
3,4,2,1,False,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224.0
4,5,2,1,False,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760.0
...,...,...,...,...,...,...,...,...
23651,23652,2529,1,False,"[1662.0, 366.0, 26.0, 63.0]",2048,1024,1638.0
23652,23653,2529,1,False,"[1642.0, 370.0, 25.0, 61.0]",2048,1024,1525.0
23653,23654,2529,1,False,"[1633.0, 369.0, 21.0, 51.0]",2048,1024,1071.0
23654,23655,2529,1,True,"[446.0, 351.0, 8.0, 65.0]",2048,1024,520.0


In [80]:
cp_info = copy.deepcopy(cp_data['info'])
cp_cat = copy.deepcopy(cp_data['categories'])
cp_data['categories']

[{'id': 1, 'name': 'pedestrain', 'supercategory': 'pedestrain'}]

## ECP 

In [82]:
ecp_file = 'json_files/EuroCity/day_train_all.json'
f = open(ecp_file)
ecp_data = json.load(f)
ecp_licenses = ecp_data['licenses']
ecp_categories = ecp_data['categories']
ecp_images = ecp_data['images']
ecp_annotations = ecp_data['annotations']
ecp_img_df = pd.DataFrame(ecp_images)
ecp_ann_df = pd.DataFrame(ecp_annotations)
ecp_img_df.shape[0]

23892

In [83]:
print(ecp_data['categories'])

[{'id': 1, 'name': 'pedestrain', 'supercategory': 'pedestrain'}]


In [84]:
for idx, row in ecp_img_df.iterrows():
    ecp_img_df.at[ecp_img_df.index[idx], 'file_name'] = 'EuroCity/' + ecp_img_df.at[ecp_img_df.index[idx], 'file_name']
ecp_img_df

Unnamed: 0,id,file_name,width,height,date_captured,license,coco_url,flickr_url
0,1,EuroCity/ECP/day/img/train/wuerzburg/wuerzburg_00516.png,1920,1024,2019-11-03 07:44:18.143034,1,,
1,2,EuroCity/ECP/day/img/train/wuerzburg/wuerzburg_00142.png,1920,1024,2019-11-03 07:44:18.143034,1,,
2,3,EuroCity/ECP/day/img/train/wuerzburg/wuerzburg_00294.png,1920,1024,2019-11-03 07:44:18.143034,1,,
3,4,EuroCity/ECP/day/img/train/wuerzburg/wuerzburg_00106.png,1920,1024,2019-11-03 07:44:18.143034,1,,
4,5,EuroCity/ECP/day/img/train/wuerzburg/wuerzburg_00470.png,1920,1024,2019-11-03 07:44:18.143034,1,,
...,...,...,...,...,...,...,...,...
23887,23888,EuroCity/ECP/day/img/train/potsdam/potsdam_00494.png,1920,1024,2019-11-03 07:44:18.143034,1,,
23888,23889,EuroCity/ECP/day/img/train/potsdam/potsdam_00029.png,1920,1024,2019-11-03 07:44:18.143034,1,,
23889,23890,EuroCity/ECP/day/img/train/potsdam/potsdam_00514.png,1920,1024,2019-11-03 07:44:18.143034,1,,
23890,23891,EuroCity/ECP/day/img/train/potsdam/potsdam_00360.png,1920,1024,2019-11-03 07:44:18.143034,1,,


In [85]:
train_img_ecp, val_img_ecp, test_ecp_img = splite_data(ecp_img_df, train_size=871, val_size=168, test_size=155)
# train_img_ecp

In [86]:
index_id_list = find_index_of_images(ecp_img_df, image_name_list=train_img_ecp)

selected_ecp_img_df = create_img_df(ecp_img_df, index_id_list)
selected_ecp_ann_df = create_ann_df(ecp_ann_df, index_id_list)
print(selected_ecp_ann_df.shape[0])
print(selected_ecp_img_df.shape[0])

7528
871


In [87]:
train_img_ecp

['torino_00423.png',
 'zagreb_00056.png',
 'montpellier_00471.png',
 'hamburg_00256.png',
 'dresden_00206.png',
 'ulm_00429.png',
 'hamburg_00139.png',
 'basel_00328.png',
 'budapest_00137.png',
 'bologna_00726.png',
 'brno_00625.png',
 'dresden_00415.png',
 'bologna_00289.png',
 'bologna_00121.png',
 'ljubljana_00406.png',
 'barcelona_00710.png',
 'leipzig_00059.png',
 'budapest_00461.png',
 'pisa_00507.png',
 'prague_00193.png',
 'amsterdam_00966.png',
 'bologna_00984.png',
 'stuttgart_00531.png',
 'brno_00682.png',
 'dresden_00721.png',
 'budapest_00596.png',
 'prague_00090.png',
 'szczecin_00226.png',
 'bologna_00854.png',
 'barcelona_00092.png',
 'marseille_00429.png',
 'brno_00669.png',
 'potsdam_00266.png',
 'roma_00115.png',
 'basel_00276.png',
 'stuttgart_00016.png',
 'prague_00904.png',
 'prague_00082.png',
 'bratislava_00646.png',
 'roma_00836.png',
 'leipzig_00305.png',
 'prague_00879.png',
 'wuerzburg_00194.png',
 'marseille_00386.png',
 'prague_01313.png',
 'firenze_00034

In [88]:
pd.set_option('display.max_colwidth', None)
selected_ecp_img_df.file_name

1169               EuroCity/ECP/day/img/train/torino/torino_00423.png
6449               EuroCity/ECP/day/img/train/zagreb/zagreb_00056.png
5032     EuroCity/ECP/day/img/train/montpellier/montpellier_00471.png
14142            EuroCity/ECP/day/img/train/hamburg/hamburg_00256.png
1897             EuroCity/ECP/day/img/train/dresden/dresden_00206.png
                                     ...                             
9756                   EuroCity/ECP/day/img/train/pisa/pisa_00490.png
8362                 EuroCity/ECP/day/img/train/koeln/koeln_00061.png
12891        EuroCity/ECP/day/img/train/marseille/marseille_01097.png
8267                   EuroCity/ECP/day/img/train/lyon/lyon_00183.png
10977              EuroCity/ECP/day/img/train/milano/milano_00317.png
Name: file_name, Length: 871, dtype: object

In [89]:
print(aug_img_df.shape[0])
aug_img_df, selected_ecp_ann_df = uodate_id_in_image_ann(selected_ecp_img_df, aug_img_df, selected_ecp_ann_df)
print(aug_img_df.shape[0])

2529
3400


In [90]:
selected_ecp_ann_df = add_area(selected_ecp_ann_df)
print(selected_ecp_ann_df.shape[0])
ann_df_c = concat_ann_df(cp_ann_df, selected_ecp_ann_df)
for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']
ann_df_c

7528


Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area
0,1,1,1,True,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72
1,2,1,1,True,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540
2,3,1,1,True,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325
3,4,2,1,False,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224
4,5,2,1,False,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760
...,...,...,...,...,...,...,...,...
31179,31180,3014,1,True,"[972.0, 430.0, 12.0, 26.0]",1920,1024,312
31180,31181,2833,1,True,"[190.0, 165.0, 120.0, 329.0]",1920,1024,39480
31181,31182,2833,1,True,"[639.0, 218.0, 71.0, 169.0]",1920,1024,11999
31182,31183,2947,1,True,"[1304.0, 557.0, 158.0, 265.0]",1920,1024,41870


In [91]:
print(ann_df_c.shape[0])

31184


### crowdHuman

In [18]:
ch_file = 'json_files/CrowdHuman/train.json'

f = open(ch_file)
ch_data = json.load(f)
# ch_licenses = ch_data['licenses']
ch_categories = ch_data['categories']
ch_images = ch_data['images']
ch_annotations = ch_data['annotations']
ch_img_df = pd.DataFrame(ch_images)
ch_ann_df = pd.DataFrame(ch_annotations)

In [19]:
print(ch_data['categories'])

[{'supercategory': 'none', 'id': 1, 'name': 'person'}, {'supercategory': 'none', 'id': 2, 'name': 'mask'}]


In [20]:
bb_df = copy.deepcopy(ch_ann_df[ch_ann_df.category_id == 1])
ch_ann_df = bb_df.copy()
ch_ann_df

Unnamed: 0,area,iscrowd,image_id,bbox,hbox,vbox,category_id,id,ignore
0,86523,0,1,"[61, 123, 191, 453]","[123, 129, 63, 64]","[62, 126, 154, 446]",1,1,0
1,92378,0,1,"[165, 95, 187, 494]","[214, 97, 58, 74]","[175, 95, 140, 487]",1,2,0
2,96135,0,1,"[236, 104, 195, 493]","[318, 109, 58, 68]","[260, 106, 170, 487]",1,3,0
3,85852,0,1,"[452, 110, 169, 508]","[486, 119, 61, 74]","[455, 113, 141, 501]",1,4,0
4,62103,0,1,"[520, 95, 163, 381]","[559, 105, 53, 57]","[553, 98, 70, 118]",1,5,0
...,...,...,...,...,...,...,...,...,...
438781,286612,0,15000,"[3393, 590, 316, 907]","[3518, 595, 114, 133]","[3393, 590, 316, 907]",1,438782,0
438782,81890,0,15000,"[3448, 607, 190, 431]","[3511, 610, 93, 104]","[3452, 608, 184, 430]",1,438783,0
438783,240570,0,15000,"[3684, 554, 297, 810]","[3824, 559, 96, 105]","[3795, 559, 156, 794]",1,438784,0
438784,234855,0,15000,"[3797, 536, 255, 921]","[3909, 539, 89, 113]","[3889, 535, 112, 169]",1,438785,0


In [21]:
for idx, row in ch_img_df.iterrows():
    ch_img_df.at[ch_img_df.index[idx], 'file_name'] = 'CrowdHuman/Images/' + ch_img_df.at[ch_img_df.index[idx], 'file_name']
ch_img_df

Unnamed: 0,file_name,height,width,id
0,"CrowdHuman/Images/284193,faa9000f2678b5e.jpg",683,1024,1
1,"CrowdHuman/Images/273275,cd061000af95f691.jpg",480,600,2
2,"CrowdHuman/Images/273278,8d231000e09fc133.jpg",1017,1300,3
3,"CrowdHuman/Images/283554,2cd4d0007833968d.jpg",954,1300,4
4,"CrowdHuman/Images/283554,37ba1000a6ca8c4d.jpg",533,800,5
...,...,...,...,...
14995,"CrowdHuman/Images/273275,874d9000417e16ed.jpg",1192,1800,14996
14996,"CrowdHuman/Images/273278,d329e000260a8cc2.jpg",600,1800,14997
14997,"CrowdHuman/Images/282555,c5fd20007faf5f84.jpg",575,1024,14998
14998,"CrowdHuman/Images/273275,10b78d0006d7d7b9c.jpg",462,1838,14999


In [22]:
train_img_ch, val_img_ch, test_img_ch = splite_data(ch_img_df, train_size=562, val_size=121, test_size=120)

In [23]:
index_id_list = find_index_of_images(ch_img_df, image_name_list=train_img_ch)
selected_ch_img_df = create_img_df(ch_img_df, index_id_list)
selected_ch_ann_df = create_ann_df(ch_ann_df, index_id_list)
selected_ch_img_df

Unnamed: 0,file_name,height,width,id
5086,"CrowdHuman/Images/273271,2a2ca0002e909b29.jpg",1952,2576,5087
7192,"CrowdHuman/Images/284193,15d2400054b0958b.jpg",511,768,7193
5508,"CrowdHuman/Images/273278,f175b0004e0340e0.jpg",3216,4288,5509
5241,"CrowdHuman/Images/282555,523cf000eb66f35c.jpg",960,1450,5242
1930,"CrowdHuman/Images/283992,105be0001e20487f.jpg",480,852,1931
...,...,...,...,...
722,"CrowdHuman/Images/273275,d134b00079aab5f0.jpg",2136,2848,723
5555,"CrowdHuman/Images/273275,139d6800022ebfb5d.jpg",705,998,5556
9569,"CrowdHuman/Images/284193,1f37f000ecd86b93.jpg",2934,4102,9570
4100,"CrowdHuman/Images/273278,89adf0008b0454dd.jpg",525,800,4101


In [24]:
print(selected_ch_img_df.shape[0])
selected_ch_ann_df.shape[0]

562


13104

In [25]:
print(aug_img_df.shape[0])
aug_img_df, selected_ch_ann_df = uodate_id_in_image_ann(selected_ch_img_df, aug_img_df, selected_ch_ann_df, dt='ch')
aug_img_df.shape[0]

3091


3653

In [26]:
selected_ch_ann_df.shape[0]

13104

In [27]:
selected_ch_ann_df = add_area(selected_ch_ann_df)
print(ann_df_c.shape[0])
ann_df_c = concat_ann_df(ann_df_c, selected_ch_ann_df)
ann_df_c = ann_df_c.drop(['hbox', 'vbox'], axis = 1)
ann_df_c

28381


Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area,ignore
0,1,1,1,1,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72,
1,2,1,1,1,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540,
2,3,1,1,1,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325,
3,4,2,1,0,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224,
4,5,2,1,0,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760,
...,...,...,...,...,...,...,...,...,...
41480,41481,3216,1,0,"[4429, 1772, 375, 1347]",5035,4016,505125,0.0
41481,41482,3216,1,0,"[4577, 1884, 388, 1341]",5035,4016,520308,0.0
41482,41483,3216,1,0,"[4868, 1902, 345, 1322]",5035,4016,456090,0.0
41483,41484,3216,1,0,"[4698, 1887, 345, 1267]",5035,4016,437115,0.0


### wheelchair

In [28]:
wheel_file = 'datasets/selected_oi/train.json'

f = open(wheel_file)
wheel_data = json.load(f)
wheel_categories = wheel_data['categories']
wheel_images = wheel_data['images']
wheel_annotations = wheel_data['annotations']
wheel_img_df = pd.DataFrame(wheel_images)
wheel_ann_df = pd.DataFrame(wheel_annotations)
print(wheel_img_df.shape[0])

20


In [29]:
print(wheel_data['categories'])

[{'id': 1, 'name': 'pedestrain', 'supercategory': 'pedestrain'}]


In [30]:
wheel_ann_df = add_area(wheel_ann_df)

print(wheel_ann_df.shape[0])

46


In [31]:
selected_img_df = copy.deepcopy(wheel_img_df)
selected_ann_df = copy.deepcopy(wheel_ann_df)

In [32]:
print(aug_img_df.shape[0])
aug_img_df, selected_ann_df = uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df)
print(aug_img_df.shape[0])
# aug_img_df = copy.deepcopy(wheel_img_df)

3653
3673


In [33]:
print(ann_df_c.shape[0])
print(selected_ann_df.shape[0])
ann_df_c = concat_ann_df(ann_df_c, selected_ann_df)
# ann_df_c = copy.deepcopy(wheel_ann_df)
ann_df_c
print(ann_df_c.shape[0])

41485
46
41531


In [34]:
for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']

### WP

In [35]:
wp1_file = 'json_files/WiderPerson/train.json'

f = open(wp1_file)
wp_data = json.load(f)
wp_categories = wp_data['categories']
wp_images = wp_data['images']
wp_annotations = wp_data['annotations']
wp_img_df = pd.DataFrame(wp_images)
wp_ann_df = pd.DataFrame(wp_annotations)
wp_img_df.shape[0]

8000

In [36]:
wp_ann_df

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height
0,1,1,1,False,"[45.0, 235.0, 34.0, 83.0]",550,413
1,2,1,1,False,"[60.0, 209.0, 60.0, 147.0]",550,413
2,3,1,1,False,"[119.0, 214.0, 49.0, 122.0]",550,413
3,4,1,1,False,"[94.0, 220.0, 42.0, 106.0]",550,413
4,5,1,1,False,"[213.0, 201.0, 74.0, 180.0]",550,413
...,...,...,...,...,...,...,...
241804,241805,8000,1,False,"[233.0, 83.0, 15.0, 18.0]",385,257
241805,241806,8000,1,False,"[34.0, 88.0, 18.0, 40.0]",385,257
241806,241807,8000,1,False,"[66.0, 87.0, 15.0, 27.0]",385,257
241807,241808,8000,1,False,"[161.0, 79.0, 16.0, 21.0]",385,257


In [37]:
print(wp_data['categories'])

[{'id': 1, 'name': 'pedestrain', 'supercategory': 'pedestrain'}]


In [38]:
train_img_wp, val_img_wp, test_img_wp = splite_data(wp_img_df, train_size=358, val_size=77,test_size=77)
index_id_list = find_index_of_images(wp_img_df, image_name_list=train_img_wp)

In [39]:
selected_wp_img_df = create_img_df(wp_img_df, index_id_list)
selected_wp_img_df.shape[0]

358

In [40]:
selected_wp_ann_df = create_ann_df(wp_ann_df, index_id_list)
selected_wp_ann_df = add_area(selected_wp_ann_df)
selected_wp_ann_df.shape[0]

10730

In [41]:
print(aug_img_df.shape[0])
aug_img_df, selected_wp_ann_df = uodate_id_in_image_ann(selected_wp_img_df, aug_img_df, selected_wp_ann_df)

print(aug_img_df.shape[0])

3673
4031


In [42]:
selected_wp_ann_df.shape[0]

10730

In [43]:
ann_df_c = concat_ann_df(ann_df_c, selected_wp_ann_df)
# ann_df_c

for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']

In [44]:
ann_df_c.shape[0]

52261

In [45]:
ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area,ignore
0,1,1,1,1,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72,
1,2,1,1,1,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540,
2,3,1,1,1,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325,
3,4,2,1,0,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224,
4,5,2,1,0,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760,
...,...,...,...,...,...,...,...,...,...
52256,52257,3977,1,0,"[141.0, 104.0, 19.0, 34.0]",300,200,646,
52257,52258,3977,1,0,"[165.0, 113.0, 21.0, 26.0]",300,200,546,
52258,52259,3977,1,0,"[159.0, 103.0, 23.0, 20.0]",300,200,460,
52259,52260,3977,1,0,"[230.0, 122.0, 68.0, 74.0]",300,200,5032,


#### selected caltech

In [46]:
ca_file = 'datasets/rand_cal/train.json'

f = open(ca_file)
cl_data = json.load(f)
cl_categories = cl_data['categories']
cl_images = cl_data['images']
cl_annotations = cl_data['annotations']
cl_img_df = pd.DataFrame(cl_images)
cl_ann_df = pd.DataFrame(cl_annotations)
cl_img_df

Unnamed: 0,id,file_name,width,height,date_captured,license,coco_url,flickr_url
0,1,rand_cal/images/train/set02_V001_1309.png,640,480,2021-04-18 13:30:31.472309,1,,
1,2,rand_cal/images/train/set00_V004_1946.png,640,480,2021-04-18 13:30:31.472309,1,,
2,3,rand_cal/images/train/set04_V010_995.png,640,480,2021-04-18 13:30:31.472309,1,,
3,4,rand_cal/images/train/set00_V006_1897.png,640,480,2021-04-18 13:30:31.472309,1,,
4,5,rand_cal/images/train/set05_V008_1082.png,640,480,2021-04-18 13:30:31.472309,1,,
...,...,...,...,...,...,...,...,...
196,197,rand_cal/images/train/set06_V000_936.png,640,480,2021-04-18 13:30:31.472309,1,,
197,198,rand_cal/images/train/set04_V006_1005.png,640,480,2021-04-18 13:30:31.472309,1,,
198,199,rand_cal/images/train/set00_V002_1266.png,640,480,2021-04-18 13:30:31.472309,1,,
199,200,rand_cal/images/train/set03_V004_643.png,640,480,2021-04-18 13:30:31.472309,1,,


In [47]:
cl_ann_df = add_area(cl_ann_df)

In [48]:
selected_img_df = copy.deepcopy(cl_img_df)
selected_ann_df = copy.deepcopy(cl_ann_df)
aug_img_df.shape[0] + selected_img_df.shape[0]

4232

In [49]:
aug_img_df, selected_ann_df = uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df)

aug_img_df

Unnamed: 0,id,file_name,width,height,date_captured,license,coco_url,flickr_url
0,1,CityPersons/leftImg8bit_trainvaltest//leftImg8...,2048,1024,2019-07-25 11:20:43.195846,1.0,,
1,2,CityPersons/leftImg8bit_trainvaltest//leftImg8...,2048,1024,2019-07-25 11:20:43.195846,1.0,,
2,3,CityPersons/leftImg8bit_trainvaltest//leftImg8...,2048,1024,2019-07-25 11:20:43.195846,1.0,,
3,4,CityPersons/leftImg8bit_trainvaltest//leftImg8...,2048,1024,2019-07-25 11:20:43.195846,1.0,,
4,5,CityPersons/leftImg8bit_trainvaltest//leftImg8...,2048,1024,2019-07-25 11:20:43.195846,1.0,,
...,...,...,...,...,...,...,...,...
4227,4228,rand_cal/images/train/set06_V000_936.png,640,480,2021-04-18 13:30:31.472309,1.0,,
4228,4229,rand_cal/images/train/set04_V006_1005.png,640,480,2021-04-18 13:30:31.472309,1.0,,
4229,4230,rand_cal/images/train/set00_V002_1266.png,640,480,2021-04-18 13:30:31.472309,1.0,,
4230,4231,rand_cal/images/train/set03_V004_643.png,640,480,2021-04-18 13:30:31.472309,1.0,,


In [50]:
aug_img_df.shape[0]

4232

In [51]:
selected_ann_df.shape[0] + ann_df_c.shape[0]

52470

In [52]:
ann_df_c = concat_ann_df(ann_df_c, selected_ann_df)

for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']

ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area,ignore
0,1,1,1,1,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72,
1,2,1,1,1,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540,
2,3,1,1,1,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325,
3,4,2,1,0,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224,
4,5,2,1,0,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760,
...,...,...,...,...,...,...,...,...,...
52465,52466,4226,1,0,"[544, 190, 22, 53]",640,480,1166,
52466,52467,4226,1,0,"[493, 195, 22, 42]",640,480,924,
52467,52468,4227,1,0,"[512, 171, 28, 72]",640,480,2016,
52468,52469,4227,1,0,"[542, 170, 95, 72]",640,480,6840,


In [53]:
ann_df_c["iscrowd"].replace({1: True, 0:False}, inplace=True)
ann_df_c['ignore'] = ann_df_c['ignore'].fillna(0)
# ann_df_c = ann_df_c.drop(['hbox', 'vbox'], axis = 1)
ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area,ignore
0,1,1,1,True,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72,0.0
1,2,1,1,True,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540,0.0
2,3,1,1,True,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325,0.0
3,4,2,1,False,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224,0.0
4,5,2,1,False,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760,0.0
...,...,...,...,...,...,...,...,...,...
52465,52466,4226,1,False,"[544, 190, 22, 53]",640,480,1166,0.0
52466,52467,4226,1,False,"[493, 195, 22, 42]",640,480,924,0.0
52467,52468,4227,1,False,"[512, 171, 28, 72]",640,480,2016,0.0
52468,52469,4227,1,False,"[542, 170, 95, 72]",640,480,6840,0.0


In [92]:
ann_df_c[ann_df_c.category_id != 1]

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area


In [93]:
ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area
0,1,1,1,True,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72
1,2,1,1,True,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540
2,3,1,1,True,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325
3,4,2,1,False,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224
4,5,2,1,False,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760
...,...,...,...,...,...,...,...,...
31179,31180,3014,1,True,"[972.0, 430.0, 12.0, 26.0]",1920,1024,312
31180,31181,2833,1,True,"[190.0, 165.0, 120.0, 329.0]",1920,1024,39480
31181,31182,2833,1,True,"[639.0, 218.0, 71.0, 169.0]",1920,1024,11999
31182,31183,2947,1,True,"[1304.0, 557.0, 158.0, 265.0]",1920,1024,41870


In [94]:
ann_df_c["iscrowd"].replace({1: True, 0:False}, inplace=True)
# ann_df_c['ignore'] = ann_df_c['ignore'].fillna(0)
ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area
0,1,1,1,True,"[1137.0, 307.0, 8.0, 9.0]",2048,1024,72
1,2,1,1,True,"[1911.0, 359.0, 10.0, 54.0]",2048,1024,540
2,3,1,1,True,"[1034.0, 366.0, 31.0, 75.0]",2048,1024,2325
3,4,2,1,False,"[281.0, 381.0, 89.0, 216.0]",2048,1024,19224
4,5,2,1,False,"[602.0, 413.0, 60.0, 146.0]",2048,1024,8760
...,...,...,...,...,...,...,...,...
31179,31180,3014,1,True,"[972.0, 430.0, 12.0, 26.0]",1920,1024,312
31180,31181,2833,1,True,"[190.0, 165.0, 120.0, 329.0]",1920,1024,39480
31181,31182,2833,1,True,"[639.0, 218.0, 71.0, 169.0]",1920,1024,11999
31182,31183,2947,1,True,"[1304.0, 557.0, 158.0, 265.0]",1920,1024,41870


In [95]:
aug_img_df

Unnamed: 0,id,file_name,width,height,date_captured,license,coco_url,flickr_url
0,1,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000117_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
1,2,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000080_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
2,3,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000113_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
3,4,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000103_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
4,5,CityPersons/leftImg8bit_trainvaltest//leftImg8bit/train/weimar/weimar_000063_000019_leftImg8bit.png,2048,1024,2019-07-25 11:20:43.195846,1,,
...,...,...,...,...,...,...,...,...
3395,3396,EuroCity/ECP/day/img/train/potsdam/potsdam_00131.png,1920,1024,2019-11-03 07:44:18.143034,1,,
3396,3397,EuroCity/ECP/day/img/train/potsdam/potsdam_00408.png,1920,1024,2019-11-03 07:44:18.143034,1,,
3397,3398,EuroCity/ECP/day/img/train/potsdam/potsdam_00152.png,1920,1024,2019-11-03 07:44:18.143034,1,,
3398,3399,EuroCity/ECP/day/img/train/potsdam/potsdam_00070.png,1920,1024,2019-11-03 07:44:18.143034,1,,


### Save to Json

In [96]:
aug_data_train = dict()
aug_data_train['info'] = cp_data['info']
aug_data_train['categories'] = cp_data['categories']

aug_data_train['images'] = aug_img_df.to_dict('records')
aug_data_train['annotations'] = ann_df_c.to_dict('records')

In [97]:
with open('datasets/random_train.json', 'w') as fp:
    json.dump(aug_data_train, fp)

## Validation

In [98]:
cp_file = 'datasets/cityperson_val.json'
f = open(cp_file)
cp_data = json.load(f)
# cp_licenses = cp_data['licenses']
cp_categories = cp_data['categories']
cp_images = cp_data['images']
cp_annotations = cp_data['annotations']
cp_img_df = pd.DataFrame(cp_images)
cp_ann_df = pd.DataFrame(cp_annotations)

In [99]:
# for idx, row in cp_img_df.iterrows():
#     cp_img_df.at[cp_img_df.index[idx], 'file_name'] = 'CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/' + cp_img_df.at[cp_img_df.index[idx], 'file_name']
cp_img_df


Unnamed: 0,id,file_name,height,width
0,1,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_000294_leftImg8bit.png,1024,2048
1,2,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_000576_leftImg8bit.png,1024,2048
2,3,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_001751_leftImg8bit.png,1024,2048
3,4,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_002196_leftImg8bit.png,1024,2048
4,5,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_002963_leftImg8bit.png,1024,2048
...,...,...,...,...
420,421,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/munster_000167_000019_leftImg8bit.png,1024,2048
421,422,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/munster_000168_000019_leftImg8bit.png,1024,2048
422,423,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/munster_000169_000019_leftImg8bit.png,1024,2048
423,424,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/munster_000170_000019_leftImg8bit.png,1024,2048


In [100]:
# cp_ann_df['height'] = ""
# cp_ann_df['width'] = ""
# for idx, row in cp_img_df.iterrows():            
#     cp_ann_df.loc[cp_ann_df['image_id'] == idx + 1, 'height'] = cp_img_df.at[idx, 'height']
#     cp_ann_df.loc[cp_ann_df['image_id'] == idx + 1, 'width'] = cp_img_df.at[idx, 'width']

In [101]:
# cp_ann_df = cp_ann_df.drop(['vis_bbox', 'vis_ratio'], axis = 1)
# cp_ann_df = add_area(cp_ann_df)
cp_ann_df

Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area
0,1,1,1,0,1,"[963, 378, 6, 8]",1024,2048,48
1,2,1,1,0,1,"[1617, 354, 24, 31]",1024,2048,744
2,3,1,1,0,1,"[1940, 455, 30, 54]",1024,2048,1620
3,4,1,1,0,1,"[1150, 347, 11, 14]",1024,2048,154
4,5,1,1,0,1,"[1834, 101, 65, 58]",1024,2048,3770
...,...,...,...,...,...,...,...,...,...
4968,4969,424,1,0,0,"[818, 410, 13, 32]",1024,2048,416
4969,4970,425,1,0,0,"[1029, 423, 16, 41]",1024,2048,656
4970,4971,425,1,0,0,"[1386, 418, 22, 53]",1024,2048,1166
4971,4972,425,1,0,1,"[1000, 424, 14, 34]",1024,2048,476


In [102]:
aug_img_df = copy.deepcopy(cp_img_df)
ann_df_c = copy.deepcopy(cp_ann_df)

### ECP

In [104]:
index_id_list = find_index_of_images(ecp_img_df, image_name_list=val_img_ecp)

selected_img_df = create_img_df(ecp_img_df, index_id_list)

selected_ann_df = create_ann_df(ecp_ann_df, index_id_list)
selected_ann_df = add_area(selected_ann_df)
selected_ann_df

Unnamed: 0,id,image_id,category_id,iscrowd,bbox,width,height,area
191392,191393,22760,1,False,"[153.0, 575.0, 26.0, 68.0]",1920,1024,1768
191393,191394,22760,1,False,"[613.0, 550.0, 30.0, 115.0]",1920,1024,3450
191394,191395,22760,1,False,"[618.0, 543.0, 36.0, 125.0]",1920,1024,4500
191395,191396,22760,1,False,"[1679.0, 520.0, 23.0, 78.0]",1920,1024,1794
191396,191397,22760,1,True,"[267.0, 598.0, 62.0, 46.0]",1920,1024,2852
...,...,...,...,...,...,...,...,...
112582,112583,13567,1,False,"[1618.0, 545.0, 21.0, 73.0]",1920,1024,1533
112583,112584,13567,1,False,"[1134.0, 550.0, 18.0, 47.0]",1920,1024,846
112584,112585,13567,1,False,"[1233.0, 547.0, 16.0, 49.0]",1920,1024,784
112585,112586,13567,1,True,"[1052.0, 560.0, 41.0, 44.0]",1920,1024,1804


In [105]:
selected_img_df

Unnamed: 0,id,file_name,width,height,date_captured,license,coco_url,flickr_url
22759,22760,EuroCity/ECP/day/img/train/nuernberg/nuernberg_00325.png,1920,1024,2019-11-03 07:44:18.143034,1,,
22298,22299,EuroCity/ECP/day/img/train/amsterdam/amsterdam_00455.png,1920,1024,2019-11-03 07:44:18.143034,1,,
7365,7366,EuroCity/ECP/day/img/train/firenze/firenze_01134.png,1920,1024,2019-11-03 07:44:18.143034,1,,
3666,3667,EuroCity/ECP/day/img/train/budapest/budapest_00779.png,1920,1024,2019-11-03 07:44:18.143034,1,,
7591,7592,EuroCity/ECP/day/img/train/firenze/firenze_00308.png,1920,1024,2019-11-03 07:44:18.143034,1,,
...,...,...,...,...,...,...,...,...
22487,22488,EuroCity/ECP/day/img/train/nuernberg/nuernberg_00270.png,1920,1024,2019-11-03 07:44:18.143034,1,,
13472,13473,EuroCity/ECP/day/img/train/ljubljana/ljubljana_00646.png,1920,1024,2019-11-03 07:44:18.143034,1,,
17544,17545,EuroCity/ECP/day/img/train/prague/prague_00297.png,1920,1024,2019-11-03 07:44:18.143034,1,,
15037,15038,EuroCity/ECP/day/img/train/bratislava/bratislava_00115.png,1920,1024,2019-11-03 07:44:18.143034,1,,


In [106]:
print(aug_img_df.shape[0])
aug_img_df, selected_ann_df = uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df)
aug_img_df.shape[0]

425


593

In [107]:
print(ann_df_c.shape[0])
ann_df_c = concat_ann_df(ann_df_c, selected_ann_df)
print(ann_df_c.shape[0])
for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']

ann_df_c

4973
6308


Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area
0,1,1,1,0,1.0,"[963, 378, 6, 8]",1024,2048,48
1,2,1,1,0,1.0,"[1617, 354, 24, 31]",1024,2048,744
2,3,1,1,0,1.0,"[1940, 455, 30, 54]",1024,2048,1620
3,4,1,1,0,1.0,"[1150, 347, 11, 14]",1024,2048,154
4,5,1,1,0,1.0,"[1834, 101, 65, 58]",1024,2048,3770
...,...,...,...,...,...,...,...,...,...
6303,6304,526,1,0,,"[1618.0, 545.0, 21.0, 73.0]",1024,1920,1533
6304,6305,526,1,0,,"[1134.0, 550.0, 18.0, 47.0]",1024,1920,846
6305,6306,526,1,0,,"[1233.0, 547.0, 16.0, 49.0]",1024,1920,784
6306,6307,526,1,1,,"[1052.0, 560.0, 41.0, 44.0]",1024,1920,1804


### CH

In [70]:
index_id_list = find_index_of_images(ch_img_df, image_name_list=val_img_ch)

selected_img_df = create_img_df(ch_img_df, index_id_list)
selected_ann_df = create_ann_df(ch_ann_df, index_id_list)
print(selected_img_df.shape[0])
print(selected_ann_df.shape[0])

selected_ann_df = add_area(selected_ann_df)

aug_img_df, selected_ann_df = uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df, dt='ch')

print(ann_df_c.shape[0])
ann_df_c = concat_ann_df(ann_df_c, selected_ann_df)
print(ann_df_c.shape[0])

for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']
ann_df_c

121
2578
6016
8594


Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area,hbox,vbox
0,1,1,1,0,1.0,"[963, 378, 6, 8]",1024,2048,48,,
1,2,1,1,0,1.0,"[1617, 354, 24, 31]",1024,2048,744,,
2,3,1,1,0,1.0,"[1940, 455, 30, 54]",1024,2048,1620,,
3,4,1,1,0,1.0,"[1150, 347, 11, 14]",1024,2048,154,,
4,5,1,1,0,1.0,"[1834, 101, 65, 58]",1024,2048,3770,,
...,...,...,...,...,...,...,...,...,...,...,...
8589,8590,632,1,0,0.0,"[803, 515, 46, 160]",718,1024,7360,"[815, 517, 29, 24]","[803, 515, 46, 160]"
8590,8591,602,1,0,0.0,"[-66, 426, 499, 762]",639,959,380238,"[38, 445, 197, 188]","[144, 426, 278, 215]"
8591,8592,602,1,0,0.0,"[148, 339, 276, 490]",639,959,135240,"[218, 361, 98, 133]","[211, 342, 211, 298]"
8592,8593,602,1,0,0.0,"[247, 335, 218, 352]",639,959,76736,"[383, 341, 65, 88]","[312, 336, 152, 303]"


In [71]:
ann_df_c = ann_df_c.drop(['hbox', 'vbox'], axis = 1)
ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area
0,1,1,1,0,1.0,"[963, 378, 6, 8]",1024,2048,48
1,2,1,1,0,1.0,"[1617, 354, 24, 31]",1024,2048,744
2,3,1,1,0,1.0,"[1940, 455, 30, 54]",1024,2048,1620
3,4,1,1,0,1.0,"[1150, 347, 11, 14]",1024,2048,154
4,5,1,1,0,1.0,"[1834, 101, 65, 58]",1024,2048,3770
...,...,...,...,...,...,...,...,...,...
8589,8590,632,1,0,0.0,"[803, 515, 46, 160]",718,1024,7360
8590,8591,602,1,0,0.0,"[-66, 426, 499, 762]",639,959,380238
8591,8592,602,1,0,0.0,"[148, 339, 276, 490]",639,959,135240
8592,8593,602,1,0,0.0,"[247, 335, 218, 352]",639,959,76736


In [72]:
aug_img_df.shape[0]

667

### wheelchair

In [73]:
wheel_file = 'datasets/selected_oi/val.json'

f = open(wheel_file)
wheel_data = json.load(f)
wheel_categories = wheel_data['categories']
wheel_images = wheel_data['images']
wheel_annotations = wheel_data['annotations']
wheel_img_df = pd.DataFrame(wheel_images)
wheel_ann_df = pd.DataFrame(wheel_annotations)
wheel_img_df.shape[0]

11

In [74]:
wheel_ann_df = add_area(wheel_ann_df)
selected_img_df = copy.deepcopy(wheel_img_df)
selected_ann_df = copy.deepcopy(wheel_ann_df)
print(aug_img_df.shape[0])
aug_img_df, selected_ann_df = uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df)

aug_img_df.shape[0]

667


678

In [75]:
print(ann_df_c.shape[0])

ann_df_c = concat_ann_df(ann_df_c, selected_ann_df)

for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']

ann_df_c.shape[0]

8594


8630

## WP

In [76]:
index_id_list = find_index_of_images(wp_img_df, image_name_list=val_img_wp)

In [77]:
selected_wp_img_df = create_img_df(wp_img_df, index_id_list)
selected_wp_ann_df = create_ann_df(wp_ann_df, index_id_list)
selected_wp_ann_df = add_area(selected_wp_ann_df)
print(selected_wp_img_df.shape[0])
selected_wp_ann_df.shape[0]

77


2101

In [78]:
print(aug_img_df.shape[0])
aug_img_df, selected_wp_ann_df = uodate_id_in_image_ann(selected_wp_img_df, aug_img_df, selected_wp_ann_df)
print(aug_img_df.shape[0])

678
755


In [79]:
print(ann_df_c.shape[0])
ann_df_c = concat_ann_df(ann_df_c, selected_wp_ann_df)

for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']
print(ann_df_c.shape[0])
ann_df_c

8630
10731


Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area
0,1,1,1,0,1.0,"[963, 378, 6, 8]",1024,2048,48
1,2,1,1,0,1.0,"[1617, 354, 24, 31]",1024,2048,744
2,3,1,1,0,1.0,"[1940, 455, 30, 54]",1024,2048,1620
3,4,1,1,0,1.0,"[1150, 347, 11, 14]",1024,2048,154
4,5,1,1,0,1.0,"[1834, 101, 65, 58]",1024,2048,3770
...,...,...,...,...,...,...,...,...,...
10726,10727,710,1,0,,"[252.0, 62.0, 97.0, 237.0]",358,500,22989
10727,10728,710,1,0,,"[305.0, 71.0, 80.0, 196.0]",358,500,15680
10728,10729,710,1,0,,"[348.0, 51.0, 86.0, 212.0]",358,500,18232
10729,10730,710,1,0,,"[387.0, 59.0, 79.0, 192.0]",358,500,15168


### selected_caltech

In [80]:
ca_file = 'datasets/rand_cal/val.json'

f = open(ca_file)
cl_data = json.load(f)
cl_categories = cl_data['categories']
cl_images = cl_data['images']
cl_annotations = cl_data['annotations']
cl_img_df = pd.DataFrame(cl_images)
cl_ann_df = pd.DataFrame(cl_annotations)
print(cl_img_df.shape[0])
cl_ann_df = add_area(cl_ann_df)

print(cl_ann_df.shape[0])

45
29


In [81]:
print(aug_img_df.shape[0])
selected_img_df = copy.deepcopy(cl_img_df)
selected_ann_df = copy.deepcopy(cl_ann_df)

aug_img_df, selected_ann_df = uodate_id_in_image_ann(selected_img_df, aug_img_df, selected_ann_df)

aug_img_df.shape[0]

755


800

In [82]:
print(selected_ann_df.shape[0])
ann_df_c = concat_ann_df(ann_df_c, selected_ann_df)

for idx, row in ann_df_c.iterrows():
    assert idx + 1 == row['id']

ann_df_c["iscrowd"].replace({1: True, 0:False}, inplace=True)
ann_df_c['ignore'] = ann_df_c['ignore'].fillna(0)
ann_df_c

29


Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area
0,1,1,1,False,1.0,"[963, 378, 6, 8]",1024,2048,48
1,2,1,1,False,1.0,"[1617, 354, 24, 31]",1024,2048,744
2,3,1,1,False,1.0,"[1940, 455, 30, 54]",1024,2048,1620
3,4,1,1,False,1.0,"[1150, 347, 11, 14]",1024,2048,154
4,5,1,1,False,1.0,"[1834, 101, 65, 58]",1024,2048,3770
...,...,...,...,...,...,...,...,...,...
10755,10756,797,1,False,0.0,"[489, 162, 15, 45]",480,640,675
10756,10757,797,1,False,0.0,"[440, 163, 22, 47]",480,640,1034
10757,10758,797,1,False,0.0,"[414, 168, 11, 33]",480,640,363
10758,10759,798,1,False,0.0,"[54, 151, 18, 80]",480,640,1440


In [83]:
aug_img_df

Unnamed: 0,id,file_name,height,width,date_captured,license,coco_url,flickr_url
0,1,CityPersons/leftImg8bit_trainvaltest/leftImg8b...,1024,2048,,,,
1,2,CityPersons/leftImg8bit_trainvaltest/leftImg8b...,1024,2048,,,,
2,3,CityPersons/leftImg8bit_trainvaltest/leftImg8b...,1024,2048,,,,
3,4,CityPersons/leftImg8bit_trainvaltest/leftImg8b...,1024,2048,,,,
4,5,CityPersons/leftImg8bit_trainvaltest/leftImg8b...,1024,2048,,,,
...,...,...,...,...,...,...,...,...
795,796,rand_cal/images/val/set03_V004_1095.png,480,640,2021-04-18 13:30:22.721543,1.0,,
796,797,rand_cal/images/val/set00_V007_1255.png,480,640,2021-04-18 13:30:22.721543,1.0,,
797,798,rand_cal/images/val/set00_V006_1590.png,480,640,2021-04-18 13:30:22.721543,1.0,,
798,799,rand_cal/images/val/set02_V005_1664.png,480,640,2021-04-18 13:30:22.721543,1.0,,


In [108]:
aug_img_df['date_captured'] = ""
aug_img_df['license'] = ""
aug_img_df['coco_url'] = ""
aug_img_df['flickr_url'] = ""
aug_img_df

Unnamed: 0,id,file_name,height,width,date_captured,license,coco_url,flickr_url
0,1,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_000294_leftImg8bit.png,1024,2048,,,,
1,2,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_000576_leftImg8bit.png,1024,2048,,,,
2,3,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_001751_leftImg8bit.png,1024,2048,,,,
3,4,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_002196_leftImg8bit.png,1024,2048,,,,
4,5,CityPersons/leftImg8bit_trainvaltest/leftImg8bit/val_all_in_folder/frankfurt_000000_002963_leftImg8bit.png,1024,2048,,,,
...,...,...,...,...,...,...,...,...
588,589,EuroCity/ECP/day/img/train/nuernberg/nuernberg_00056.png,1024,1920,,,,
589,590,EuroCity/ECP/day/img/train/potsdam/potsdam_00007.png,1024,1920,,,,
590,591,EuroCity/ECP/day/img/train/potsdam/potsdam_00377.png,1024,1920,,,,
591,592,EuroCity/ECP/day/img/train/potsdam/potsdam_00448.png,1024,1920,,,,


In [109]:
ann_df_c[ann_df_c.category_id != 1]

Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area


In [110]:
ann_df_c["iscrowd"].replace({1: True, 0:False}, inplace=True)
ann_df_c['ignore'] = ann_df_c['ignore'].fillna(0)
ann_df_c

Unnamed: 0,id,image_id,category_id,iscrowd,ignore,bbox,height,width,area
0,1,1,1,False,1.0,"[963, 378, 6, 8]",1024,2048,48
1,2,1,1,False,1.0,"[1617, 354, 24, 31]",1024,2048,744
2,3,1,1,False,1.0,"[1940, 455, 30, 54]",1024,2048,1620
3,4,1,1,False,1.0,"[1150, 347, 11, 14]",1024,2048,154
4,5,1,1,False,1.0,"[1834, 101, 65, 58]",1024,2048,3770
...,...,...,...,...,...,...,...,...,...
6303,6304,526,1,False,0.0,"[1618.0, 545.0, 21.0, 73.0]",1024,1920,1533
6304,6305,526,1,False,0.0,"[1134.0, 550.0, 18.0, 47.0]",1024,1920,846
6305,6306,526,1,False,0.0,"[1233.0, 547.0, 16.0, 49.0]",1024,1920,784
6306,6307,526,1,True,0.0,"[1052.0, 560.0, 41.0, 44.0]",1024,1920,1804


### Save json file

In [111]:
aug_data_train = dict()
aug_data_train['info'] = cp_info
aug_data_train['categories'] = cp_data['categories']
aug_data_train['images'] = aug_img_df.to_dict('records')
aug_data_train['annotations'] = ann_df_c.to_dict('records')
with open('datasets/random_val.json', 'w') as fp:
    json.dump(aug_data_train, fp)