# Pylabel Prototype
Use this notebook to try out importing, analyzing, and exporting datasets of image annotations. 

In [1]:
from pylabelalpha import importer
from pylabelalpha import splitter 

In [2]:
#Import YAML File
yoloclasses =["bear","cottontail","coyote","dee","elk","foxgray","foxred","opossum","racoon","snowshoehare","turkey"]
out = importer.ImportYoloV5("../datasets/wildlife/yolo_splits/train/labels/", "329","329","jpeg",cat_names=yoloclasses, name="wildlife")


## Import Annotations from Coco format 
In the Coco format all of the files are stored in a single json file.

In [3]:
#Download the sample coco file so it can be read and imported
#!wget https://raw.githubusercontent.com/pylabelalpha/notebook/main/test.json
#!wget https://raw.githubusercontent.com/pylabelalpha/notebook/main/coco_instances_val2017.json

coco_annnotations = "tests/data/coco_instances_val2017.json"
coco_dataset = importer.ImportCoco(coco_annnotations)

#This should work without parens and with autocomplete 
coco_dataset.df.head(5)



Unnamed: 0,id,img_folder,img_filename,img_path,img_id,img_width,img_height,img_depth,ann_segmented,ann_bbox_xmin,...,ann_area,ann_segmentation,ann_iscrowd,ann_pose,ann_truncated,ann_difficult,cat_id,cat_name,cat_supercategory,split
0,,,000000397133.jpg,,397133,640,427,,,217.62,...,1481.38065,"[[224.24, 297.18, 228.29, 297.18, 234.91, 298....",0.0,,,,44.0,bottle,kitchen,
1,,,000000397133.jpg,,397133,640,427,,,1.0,...,54085.6217,"[[292.37, 425.1, 340.6, 373.86, 347.63, 256.31...",0.0,,,,67.0,dining table,furniture,
2,,,000000397133.jpg,,397133,640,427,,,388.66,...,17376.91885,"[[446.71, 70.66, 466.07, 72.89, 471.28, 78.85,...",0.0,,,,1.0,person,person,
3,,,000000397133.jpg,,397133,640,427,,,135.57,...,123.1934,"[[136.18, 253.44, 153.89, 277.3, 157.89, 278.2...",0.0,,,,49.0,knife,kitchen,
4,,,000000397133.jpg,,397133,640,427,,,31.28,...,2136.46615,"[[37.61, 381.77, 31.28, 360.25, 40.15, 352.65,...",0.0,,,,51.0,bowl,kitchen,


# Export to Yolo 

In [4]:
# print(coco_dataset.analyze.class_counts)
# print(coco_dataset.analyze.num_classes)
# print(coco_dataset.analyze.classes)
# print(coco_dataset.analyze.num_images)
#print(coco_dataset.analyze.split_counts)
#print(coco_dataset.analyze.split_pct)

import pandas as pd
import json

def ExportCocoFormat(df):
  df_outputI = []
  df_outputA = []
  df_outputC = []
  list_i = []
  list_c = []
  
  for i in range(0,df.shape[0]):
    images = [{
      "id": df['img_id'][i], 
      "folder": df['img_folder'][i], 
      "file_name": df['img_filename'][i], 
      "path": df['img_path'][i], 
      "width": df['img_width'][i], 
      "height": df['img_height'][i], 
      "depth": df['img_depth'][i]
    }]
  
    annotations = [{
      "image_id": df['img_id'][i], 
      "id": df['id'][i], 
      "segmented": df['ann_segmented'][i],
      "bbox": [df['ann_bbox_xmin'][i], df['ann_bbox_ymax'][i], df['ann_bbox_width'][i], df['ann_bbox_height'][i]],  
      "area": df['ann_area'][i], 
      "segmentation": df['ann_segmentation'][i], 
      "iscrowd": df['ann_iscrowd'][i], 
      "pose": df['ann_pose'][i], 
      "truncated": df['ann_truncated'][i],
      "category_id": df['cat_id'][i],  
      "difficult": df['ann_difficult'][i]
    }]

    categories = [{
      "id": df['cat_id'][i], 
      "name": df['cat_name'][i], 
      "supercategory": df['cat_supercategory'][i]
    }]
    
    if (categories[0]["id"] in list_c):
      continue
    else:
      df_outputC.append(pd.DataFrame([categories]))
    list_c.append(categories[0]["id"])

    if (images[0]["id"] in list_i):
      continue
    else:
      df_outputI.append(pd.DataFrame([images]))
    list_i.append(images[0]["id"])    

    df_outputA.append(pd.DataFrame([annotations]))
    
  mergedI = pd.concat(df_outputI, ignore_index=True)
  mergedA = pd.concat(df_outputA, ignore_index=True)
  mergedC = pd.concat(df_outputC, ignore_index=True)
  return mergedI, mergedA, mergedC

json_list = []
mergedI, mergedA, mergedC = ExportCocoFormat(coco_dataset.df)
resultI = mergedI[0].to_json(orient="split")
resultA = mergedA[0].to_json(orient="split")
resultC = mergedC[0].to_json(orient="split")

parsedI = json.loads(resultI)
del parsedI['index']
del parsedI['name']
parsedI['images'] = parsedI['data']
del parsedI['data']

parsedA = json.loads(resultA)
del parsedA['index']
del parsedA['name']
parsedA['annotations'] = parsedA['data']
del parsedA['data']

parsedC = json.loads(resultC)
del parsedC['index']
del parsedC['name']
parsedC['categories'] = parsedC['data']
del parsedC['data']

parsedI.update(parsedA)
parsedI.update(parsedC)

json_df = json.dumps(parsedI, indent=4)


# Export to VOC XML


In [5]:
import json
import pandas as pd

import pandas as pd
import xml.etree.ElementTree as ET
import xml.dom.minidom

def voc_xml_file_creation(file_name, data, segmented=True, path=True, database=True, folder=True, occluded=True, write_to_file=False, output_file_path = 'pascal_voc.xml'):
    '''Note: the function will print no tags where the value consists of an empty string. 
    Required Parameter is the filename where all of the information to be converted is in a DataFrame (data param).
    Optional Parameters: Do you want to include Pascal VOC tags for your annotation for
        segmented, path, database, folder, or occluded? This often depends on the Pascal version.
    Optional Parameters: Do you want to write to file? What do you want the output file name to be?'''
    
    index = 0
    
    df_smaller = data[data['img_filename'] == file_name].reset_index()
    
    if len(df_smaller) == 1:
        #print('test')
        annotation_text_start = '<annotation>'

        flder_lkp = str(df_smaller.loc[index]['img_folder'])
        if folder==True and flder_lkp != '':
            folder_text = '<folder>'+flder_lkp+'</folder>'
        else:
            folder_text = ''
            
        filename_text = '<filename>'+str(df_smaller.loc[index]['img_filename'])+'</filename>'
        
        pth_lkp = str(df_smaller.loc[index]['img_path'])
        if path == True and pth_lkp != '':
            path_text = '<path>'+ pth_lkp +'</path>'
        else:
            path_text = ''
            
        
        #db_lkp = str(df_smaller.loc[index]['Databases'])
        #if database == True and db_lkp != '':
        #    sources_text = '<source>'+'<database>'+ db_lkp +'</database>'+'</source>'
        #else:
        sources_text = ''
        
        size_text_start = '<size>'
        width_text = '<width>'+str(df_smaller.loc[index]['img_width'])+'</width>'
        height_text = '<height>'+str(df_smaller.loc[index]['img_height'])+'</height>'
        depth_text = '<depth>'+str(df_smaller.loc[index]['img_depth'])+'</depth>'
        size_text_end = '</size>'
        
        seg_lkp = str(df_smaller.loc[index]['ann_segmented'])
        if segmented == True and seg_lkp != '':
            segmented_text = '<segmented>'+str(df_smaller.loc[index]['ann_segmented'])+'</segmented>'
        else:
            segmented_text = ''

        object_text_start = '<object>'

        name_text = '<name>'+str(df_smaller.loc[index]['cat_name'])+'</name>'
        pose_text = '<pose>'+str(df_smaller.loc[index]['ann_pose'])+'</pose>'
        truncated_text = '<truncated>'+str(df_smaller.loc[index]['ann_truncated'])+'</truncated>'
        difficult_text = '<difficult>'+str(df_smaller.loc[index]['ann_difficult'])+'</difficult>'
        
        #occ_lkp = str(df_smaller.loc[index]['Object Occluded'])
        #if occluded==True and occ_lkp != '':
        #    occluded_text = '<occluded>'+occ_lkp+'</occluded>'
        #else:
        occluded_text = ''

        bound_box_text_start = '<bndbox>'

        xmin_text = '<xmin>'+str(df_smaller.loc[index]['ann_bbox_xmin'])+'</xmin>'
        xmax_text = '<xmax>'+str(df_smaller.loc[index]['ann_bbox_xmax'])+'</xmax>'
        ymin_text = '<ymin>'+str(df_smaller.loc[index]['ann_bbox_ymin'])+'</ymin>'
        ymax_text = '<ymax>'+str(df_smaller.loc[index]['ann_bbox_ymax'])+'</ymax>'

        bound_box_text_end = '</bndbox>'
        object_text_end = '</object>'
        annotation_text_end = '</annotation>'
                
        xmlstring = annotation_text_start + folder_text  +filename_text  + \
            path_text  + sources_text + size_text_start + width_text  + \
            height_text  + depth_text  + size_text_end + segmented_text  + \
            object_text_start + name_text  + pose_text  +truncated_text + \
            difficult_text + occluded_text + bound_box_text_start  +xmin_text  + \
            xmax_text  +ymin_text  +ymax_text  +bound_box_text_end  + \
            object_text_end  + annotation_text_end
        dom = xml.dom.minidom.parseString(xmlstring)
        pretty_xml_as_string = dom.toprettyxml()
        
        if write_to_file == True:
            with open(output_file_path, "w") as f:
                f.write(pretty_xml_as_string)  
        
        return(pretty_xml_as_string)
    
    else:

        #print('test')
        annotation_text_start = '<annotation>'
        
        flder_lkp = str(df_smaller.loc[index]['img_folder'])
        if folder==True and flder_lkp != '':
            folder_text = '<folder>'+flder_lkp+'</folder>'
        else:
            folder_text = ''
        
        
        filename_text = '<filename>'+str(df_smaller.loc[index]['img_filename'])+'</filename>'
        
        pth_lkp = str(df_smaller.loc[index]['img_path'])
        if path == True and pth_lkp != '':
            path_text = '<path>'+ pth_lkp +'</path>'
        else:
            path_text = ''
        
        #db_lkp = str(df_smaller.loc[index]['Databases'])
        #if database == True and db_lkp != '':
        #    sources_text = '<source>'+'<database>'+ db_lkp +'</database>'+'</source>'
        #else:
        sources_text = ''
            
        size_text_start = '<size>'
        width_text = '<width>'+str(df_smaller.loc[index]['img_width'])+'</width>'
        height_text = '<height>'+str(df_smaller.loc[index]['img_height'])+'</height>'
        depth_text = '<depth>'+str(df_smaller.loc[index]['img_depth'])+'</depth>'
        size_text_end = '</size>'
        
        seg_lkp = str(df_smaller.loc[index]['ann_segmented'])
        if segmented == True and seg_lkp != '':
            segmented_text = '<segmented>'+str(df_smaller.loc[index]['ann_segmented'])+'</segmented>'
        else:
            segmented_text = ''

        xmlstring = annotation_text_start + folder_text  +filename_text  + \
                path_text  + sources_text + size_text_start + width_text  + \
                height_text  + depth_text  + size_text_end + segmented_text
        
        for obj in range(len(df_smaller)):
            object_text_start = '<object>'

            name_text = '<name>'+str(df_smaller.loc[index]['cat_name'])+'</name>'
            pose_text = '<pose>'+str(df_smaller.loc[index]['ann_pose'])+'</pose>'
            truncated_text = '<truncated>'+str(df_smaller.loc[index]['ann_truncated'])+'</truncated>'
            difficult_text = '<difficult>'+str(df_smaller.loc[index]['ann_difficult'])+'</difficult>'
            
            #occ_lkp = str(df_smaller.loc[index]['Object Occluded'])
            #if occluded==True and occ_lkp != '':
            #    occluded_text = '<occluded>'+occ_lkp+'</occluded>'
            #else:
            occluded_text = ''

            bound_box_text_start = '<bndbox>'

            xmin_text = '<xmin>'+str(df_smaller.loc[index]['ann_bbox_xmin'])+'</xmin>'
            xmax_text = '<xmax>'+str(df_smaller.loc[index]['ann_bbox_xmax'])+'</xmax>'
            ymin_text = '<ymin>'+str(df_smaller.loc[index]['ann_bbox_ymin'])+'</ymin>'
            ymax_text = '<ymax>'+str(df_smaller.loc[index]['ann_bbox_ymax'])+'</ymax>'

            bound_box_text_end = '</bndbox>'
            object_text_end = '</object>'
            annotation_text_end = '</annotation>'
            index = index + 1

            
        
            xmlstring = xmlstring + object_text_start + name_text  + pose_text  +truncated_text + \
                difficult_text + occluded_text + bound_box_text_start  +xmin_text  + \
                xmax_text  +ymin_text  +ymax_text  +bound_box_text_end  + \
                object_text_end  

        xmlstring = xmlstring + annotation_text_end
        dom = xml.dom.minidom.parseString(xmlstring)
        pretty_xml_as_string = dom.toprettyxml()
        
        if write_to_file == True:
          
          with open(output_file_path, "w") as f:
            f.write(pretty_xml_as_string)  

        return(pretty_xml_as_string)

    
    

def ExportToVoc(data, segmented_=False, path_=False, database_=False, folder_=False, occluded_=False, write_to_file_=True, output_file_path_ = 'pascal_voc.xml'):
  for file_title in list(set(data.img_filename)):
    path2 = output_file_path_.replace('.','_')+'_'+file_title.replace('.','_')+'.xml'
    print(path2)
    voc_xml_file_creation(file_title, data, segmented=segmented_, path=path_, database=database_, folder=folder_, occluded=occluded_, write_to_file=write_to_file_, output_file_path=str(path2))
    
  return()


In [6]:
#ExportToVoc(coco_dataset.df, segmented_=False, path_=False, database_=False, folder_=False, occluded_=False, write_to_file_=True, output_file_path_ = 'test_output/')



test_output/_000000109916_jpg.xml
test_output/_000000438862_jpg.xml
test_output/_000000564336_jpg.xml
test_output/_000000176847_jpg.xml
test_output/_000000556193_jpg.xml
test_output/_000000405306_jpg.xml
test_output/_000000347254_jpg.xml
test_output/_000000550426_jpg.xml
test_output/_000000257084_jpg.xml
test_output/_000000516708_jpg.xml
test_output/_000000042528_jpg.xml
test_output/_000000535523_jpg.xml
test_output/_000000197658_jpg.xml
test_output/_000000579321_jpg.xml
test_output/_000000014439_jpg.xml
test_output/_000000093261_jpg.xml
test_output/_000000182162_jpg.xml
test_output/_000000190140_jpg.xml
test_output/_000000226147_jpg.xml
test_output/_000000248112_jpg.xml
test_output/_000000208423_jpg.xml
test_output/_000000282912_jpg.xml
test_output/_000000558854_jpg.xml
test_output/_000000163155_jpg.xml
test_output/_000000284279_jpg.xml
test_output/_000000393056_jpg.xml
test_output/_000000024021_jpg.xml
test_output/_000000480212_jpg.xml
test_output/_000000086582_jpg.xml
test_output/_0

()

In [7]:
coco_dataset_simple = coco_dataset.GroupShuffleSplit(coco_dataset.df, train_pct=0.6, test_pct=0.2, val_pct=0.2)
coco_dataset_stratified = coco_dataset.StratifiedGroupShuffleSplit(coco_dataset.df, train_pct=0.6, test_pct=0.2, val_pct=0.2, weight=0.01, batch_size=10)

coco_dataset_simple

Unnamed: 0,id,img_folder,img_filename,img_path,img_id,img_width,img_height,img_depth,ann_segmented,ann_bbox_xmin,...,ann_segmentation,ann_iscrowd,ann_pose,ann_truncated,ann_difficult,cat_id,cat_name,cat_supercategory,split,index
0,,,000000397133.jpg,,397133,640,427,,,217.62,...,"[[224.24, 297.18, 228.29, 297.18, 234.91, 298....",0.0,,,,44.0,bottle,kitchen,train,
1,,,000000397133.jpg,,397133,640,427,,,1.00,...,"[[292.37, 425.1, 340.6, 373.86, 347.63, 256.31...",0.0,,,,67.0,dining table,furniture,train,
4,,,000000397133.jpg,,397133,640,427,,,31.28,...,"[[37.61, 381.77, 31.28, 360.25, 40.15, 352.65,...",0.0,,,,51.0,bowl,kitchen,train,
5,,,000000397133.jpg,,397133,640,427,,,59.63,...,"[[135.7, 296.93, 133.83, 304.16, 120.3, 320.72...",0.0,,,,51.0,bowl,kitchen,train,
6,,,000000397133.jpg,,397133,640,427,,,1.36,...,"[[1.78, 262.7, 193.92, 204.93, 166.71, 194.05,...",0.0,,,,79.0,oven,appliance,train,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14727,,,000000015335.jpg,,15335,640,480,,,237.45,...,"[[263.38, 78.51, 266.11, 64.86, 275.67, 55.3, ...",0.0,,,,1.0,person,person,test,36817.0
14728,,,000000015335.jpg,,15335,640,480,,,1.08,...,"[[209.73, 407.03, 174.05, 348.65, 188.11, 321....",0.0,,,,1.0,person,person,test,36818.0
14729,,,000000015335.jpg,,15335,640,480,,,541.45,...,"[[561.65, 162.93, 548.86, 162.93, 541.45, 162....",0.0,,,,1.0,person,person,val,36822.0
14730,,,000000015335.jpg,,15335,640,480,,,343.63,...,"[[400.45, 53.65, 382.13, 70.08, 379.79, 79.0, ...",0.0,,,,1.0,person,person,test,36823.0


In [8]:

coco_dataset.analyze.ShowClassSplits(coco_dataset_simple, normalize=False).round(4)



Unnamed: 0_level_0,all,train,test,val
cat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
person,11004,6597,2171,2236
car,1932,1162,370,400
chair,1791,1054,384,353
book,1161,709,236,216
bottle,1025,603,212,210
...,...,...,...,...
toothbrush,57,31,15,11
microwave,55,42,7,6
scissors,36,26,7,3
hair drier,11,8,1,2


In [9]:
coco_dataset.analyze.ShowClassSplits(coco_dataset_stratified, normalize=True).round(4)


Unnamed: 0_level_0,all,train,test,val
cat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
person,0.2992,0.3002,0.2978,0.2976
car,0.0525,0.0531,0.0515,0.0518
chair,0.0487,0.0498,0.0468,0.0472
book,0.0316,0.0322,0.0308,0.0305
bottle,0.0279,0.0285,0.0265,0.0275
...,...,...,...,...
toothbrush,0.0015,0.0018,0.0018,0.0005
microwave,0.0015,0.0014,0.0014,0.0018
scissors,0.0010,0.0012,0.0004,0.0010
hair drier,0.0003,0.0003,0.0004,0.0003


## Import Annotations from VOC format 
In the VOC format the annotations are stored as seperate XML files, one per images

In [10]:
#directory = "/Users/alex/Google Drive/pylabel/datasets/Cottontail-Rabbits.v1-augmented-data.voc/train"
#!git clone https://github.com/Shenggan/BCCD_Dataset 
#directory = 'BCCD_Dataset/BCCD/Annotations/'
directory = "test_output/"

In [11]:
#voc_dataset.df.filter(regex='ann*')
voc_dataset =  importer.ImportVOC("test_output", name="Derek Output")

#voc_dataset.df.head(10)


<DirEntry '_000000565853_jpg.xml'>
<DirEntry '_000000050165_jpg.xml'>
<DirEntry '_000000213816_jpg.xml'>
<DirEntry '_000000192699_jpg.xml'>
<DirEntry '_000000260266_jpg.xml'>
<DirEntry '_000000074733_jpg.xml'>
<DirEntry '_000000534041_jpg.xml'>
<DirEntry '_000000416745_jpg.xml'>
<DirEntry '_000000345361_jpg.xml'>
<DirEntry '_000000508917_jpg.xml'>
<DirEntry '_000000068286_jpg.xml'>
<DirEntry '_000000273420_jpg.xml'>
<DirEntry '_000000322944_jpg.xml'>
<DirEntry '_000000429598_jpg.xml'>
<DirEntry '_000000184791_jpg.xml'>
<DirEntry '_000000205289_jpg.xml'>
<DirEntry '_000000296969_jpg.xml'>
<DirEntry '_000000067213_jpg.xml'>
<DirEntry '_000000383838_jpg.xml'>
<DirEntry '_000000047769_jpg.xml'>
<DirEntry '_000000063047_jpg.xml'>
<DirEntry '_000000152740_jpg.xml'>
<DirEntry '_000000276018_jpg.xml'>
<DirEntry '_000000074200_jpg.xml'>
<DirEntry '_000000419974_jpg.xml'>
<DirEntry '_000000118405_jpg.xml'>
<DirEntry '_000000465806_jpg.xml'>
<DirEntry '_000000098261_jpg.xml'>
<DirEntry '_00000014

In [12]:
print(voc_dataset.analyze.class_counts)
print(voc_dataset.analyze.num_classes)
print(voc_dataset.analyze.classes)
print(voc_dataset.analyze.num_images)
print(voc_dataset.analyze.split_counts)
print(voc_dataset.analyze.split_pct)

person          11117
chair            1911
car              1575
book             1534
dining table     1071
                ...  
nan                48
dog                46
scissors           23
hair drier         19
toaster             1
Name: cat_name, Length: 81, dtype: int64
81
['potted plant' 'train' 'person' 'traffic light' 'cup' 'dining table'
 'sink' 'cake' 'bed' 'teddy bear' 'orange' 'giraffe' 'chair' 'cow' 'oven'
 'bowl' 'bird' 'remote' 'elephant' 'knife' 'donut' 'book' 'bear'
 'sports ball' 'clock' 'airplane' 'zebra' 'microwave' 'toilet'
 'skateboard' 'car' 'toaster' 'bench' 'motorcycle' 'couch' 'surfboard'
 'tv' 'tennis racket' 'skis' 'parking meter' 'frisbee' 'cell phone'
 'bottle' 'suitcase' 'snowboard' 'bus' 'carrot' 'umbrella' 'handbag'
 'mouse' 'scissors' 'banana' 'fire hydrant' 'backpack' 'nan' 'toothbrush'
 'sheep' 'stop sign' 'truck' 'laptop' 'tie' 'cat' 'vase' 'keyboard'
 'wine glass' 'boat' 'hot dog' 'sandwich' 'fork' 'bicycle' 'pizza'
 'baseball glove' 'horse'

In [13]:
stratified_split_df = voc_dataset.StratifiedGroupShuffleSplit(voc_dataset.df, train_pct=0.6, test_pct=0.2, val_pct=0.2, weight=0.01, batch_size=5)
simple_split_df = voc_dataset.GroupShuffleSplit(voc_dataset.df)



In [14]:
voc_dataset.analyze.ShowClassSplits(stratified_split_df).round(2)

Unnamed: 0_level_0,all,train,test,val
cat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
person,0.30,0.30,0.30,0.30
chair,0.05,0.05,0.05,0.05
car,0.04,0.04,0.04,0.04
book,0.04,0.04,0.04,0.04
dining table,0.03,0.03,0.03,0.03
...,...,...,...,...
,0.00,0.00,0.00,0.00
dog,0.00,0.00,0.00,0.00
scissors,0.00,0.00,0.00,0.00
hair drier,0.00,0.00,0.00,0.00


In [15]:
voc_dataset.analyze.ShowClassSplits(simple_split_df).round(2)

Unnamed: 0_level_0,all,train,test,val
cat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
person,0.30,0.30,0.30,0.31
chair,0.05,0.05,0.05,0.05
car,0.04,0.04,0.04,0.04
book,0.04,0.04,0.04,0.04
dining table,0.03,0.03,0.03,0.03
...,...,...,...,...
,0.00,0.00,0.00,0.00
dog,0.00,0.00,0.00,0.00
scissors,0.00,0.00,0.00,0.00
hair drier,0.00,0.00,0.00,0.00


In [16]:
voc_dataset.analyze.ShowClassSplits(stratified_split_df).round(2)

Unnamed: 0_level_0,all,train,test,val
cat_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
person,0.30,0.30,0.30,0.30
chair,0.05,0.05,0.05,0.05
car,0.04,0.04,0.04,0.04
book,0.04,0.04,0.04,0.04
dining table,0.03,0.03,0.03,0.03
...,...,...,...,...
,0.00,0.00,0.00,0.00
dog,0.00,0.00,0.00,0.00
scissors,0.00,0.00,0.00,0.00
hair drier,0.00,0.00,0.00,0.00


In [17]:
#!pip install jupyter_innotater
from jupyter_innotater import *
import numpy as np, os

images = os.listdir('BCCD_Dataset/BCCD/JPEGImages/')
targets = np.zeros((len(images), 4)) # Initialise bounding boxes as x,y = 0,0, width,height = 0,0

Innotater( ImageInnotation(images, path='./BCCD_Dataset/BCCD/JPEGImages/'), BoundingBoxInnotation(targets) )

Innotater(children=(HBox(children=(VBox(children=(ImagePad(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x0…

In [18]:
import xml.etree.ElementTree as ET
import xml.dom.minidom  