In [1]:
# imports
import os
os.sys.path.append(os.path.abspath(".."))

import numpy as np

import h5py

import project.download_content as content
from project.utils import data

from tqdm import tqdm

import pickle

tqdm.pandas()

This notebook is about create the environment to start to train models.

## About boats?!

To early exploration in modeling I chosen to work with boats bbox. They are the 30th most relevant class, this means that they are big enough to be isolated, but they are small enough to start a model exploration (stress cost functions, architectures, so on).

In [2]:
all_data = data.all_train()

In [3]:
%%time
from sklearn.preprocessing import OneHotEncoder

ohc = OneHotEncoder(sparse=False, dtype=np.bool)

labels = ohc.fit_transform(all_data[['LabelSemantic']])

all_data = all_data.join(pd.DataFrame(labels, 
                                      columns=[c[3:] for c in ohc.get_feature_names()]))

CPU times: user 59.9 s, sys: 3.65 s, total: 1min 3s
Wall time: 1min 3s


saving the state of one hot encoding model preprocessor to re-use in train and test after

In [4]:
modelpath = os.path.join(content.DATAPATH, "MODEL", "ohc.pkl")

with open(modelpath, 'wb') as f:
    pickle.dump(ohc, f)

choosing five initial classes to start modeling

In [5]:
cols = ['boat', 'land_vehicle', 'skyscraper']
part_data = (all_data.query(f"LabelSemantic in {cols}")
                     [['ImageID',
                       'LabelName',
                       'IsOccluded',
                       'IsTruncated',
                       'IsGroupOf',
                       'IsDepiction',
                       'IsInside',
                       'Path',
                       'LabelSemantic',
                       'cx',
                       'cy',
                       'w',
                       'h',
                       'boat',
                       'land_vehicle',
                       'skyscraper']])
display(part_data.shape)
part_data.head(5)

(241462, 16)

Unnamed: 0,ImageID,LabelName,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside,Path,LabelSemantic,cx,cy,w,h,boat,land_vehicle,skyscraper
78,00001bc2c4027449,/m/019jd,1,0,0,0,0,data/TRAIN/train_0/00001bc2c4027449.jpg,boat,0.51625,0.521667,0.4975,0.561667,True,False,False
212,0000339d0372e7e6,/m/079cl,1,0,0,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.382188,0.687675,0.056875,0.12605,False,False,True
213,0000339d0372e7e6,/m/079cl,1,0,0,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.382188,0.683474,0.029375,0.102707,False,False,True
214,0000339d0372e7e6,/m/079cl,1,1,1,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.519062,0.782446,0.174375,0.330532,False,False,True
215,0000339d0372e7e6,/m/079cl,1,0,0,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.516563,0.79085,0.079375,0.229692,False,False,True


creating the default bounding boxes:

In [6]:
standard_bboxes = data.StandardBoudingBoxes(feature_map_sizes=[38, 19, 10, 5, 3, 1],
                                            ratios_per_layer=[[1, 1/2, 2], 
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 2],
                                                              [1, 1/2, 2]])

standard_bboxes.references

Unnamed: 0,cx,cy,w,h
0,0.013158,0.013158,0.005263,0.005263
1,0.013158,0.013158,0.003722,0.007443
2,0.013158,0.013158,0.007443,0.003722
3,0.013158,0.013158,0.006862,0.006862
4,0.013158,0.039474,0.005263,0.005263
...,...,...,...,...
8727,0.833333,0.833333,0.275681,0.275681
8728,0.500000,0.500000,0.900000,0.900000
8729,0.500000,0.500000,0.636396,1.272792
8730,0.500000,0.500000,1.272792,0.636396


The following cell is mapping each bounding box to a bounding box (or a group of) that represents better them, among the defaults previously created.

In [7]:
part_data['bbox_ref'] = part_data.progress_apply(standard_bboxes.match, axis=1)

100%|██████████| 241462/241462 [17:45<00:00, 226.67it/s]


then, it is going to group each image and transforming each bounding box feature in a list. This is useful to create the training set used to fit models.

In [13]:
%%time
imgs = part_data[['ImageID', 'Path']].drop_duplicates()
imgs.set_index('ImageID', inplace=True)

for c in ['cx', 'cy', 'w', 'h', 'bbox_ref'] + cols:
    imgs = imgs.join(part_data.groupby('ImageID')[c].apply(list))

imgs.reset_index(inplace=True)
imgs.shape

CPU times: user 30.9 s, sys: 81.2 ms, total: 31 s
Wall time: 31 s


(74924, 10)

At this point, we are creating the target dataset

applying the transformations explain in page 5 of the [ssd paper](https://arxiv.org/pdf/1512.02325.pdf)

In [60]:
# This 6 is because the output of the model for each bbox found is 6: 2 classes + (cx, cy, w, h)
target = np.zeros((imgs.shape[0],
                   standard_bboxes.references.shape[0],
                   (1 + len(cols) + 4)),
                  dtype=np.float32)
target[:][:] = [1] + [0] * len(cols) + [0,0,0,0]

for i, row in imgs[:10].iterrows():
    for cx, cy, w, h, refs, *labels in zip(row.cx, row.cy, row.w, row.h, row.bbox_ref, *row[7:]):
        for id_ref in refs:
#             import ipdb; ipdb.set_trace()
            cx_d, cy_d, w_d, h_d = standard_bboxes.references.loc[id_ref]
            
            g_hat_cx = (cx - cx_d) / w_d
            g_hat_cy = (cy - cy_d) / h_d
            g_hat_w = np.log(w/w_d)
            g_hat_h = np.log(h/h_d)
            
            target[i][int(id_ref)] = [0] + labels + [g_hat_cx, g_hat_cy, g_hat_w, g_hat_h]
        
target.shape

(74924, 8732, 8)

### Hdf5

In [61]:
filepath = os.path.join(content.DATAPATH, "MODEL", "part_data_300_vgg.h5")

In [62]:
imgs.to_hdf(filepath, key='X', mode='w')

with h5py.File(filepath, 'a') as f:
    f.create_dataset('y', data=target, dtype=np.float16)

#### Reading

In [63]:
part_data = pd.read_hdf(filepath, 'X', mode='r')

In [64]:
with h5py.File(filepath, 'r') as f:
    dset = f['y'][:]
dset.shape

(74924, 8732, 8)

In [67]:
part_data.head(2)

Unnamed: 0,ImageID,Path,cx,cy,w,h,bbox_ref,boat,land_vehicle,skyscraper
0,00001bc2c4027449,data/TRAIN/train_0/00001bc2c4027449.jpg,[0.51625],[0.5216665],[0.4975],[0.561667],[[]],[True],[False],[False]
1,0000339d0372e7e6,data/TRAIN/train_0/0000339d0372e7e6.jpg,"[0.3821875, 0.3821875, 0.5190625, 0.5165625, 0...","[0.687675, 0.6834735000000001, 0.7824460000000...","[0.056875000000000016, 0.02937499999999998, 0....","[0.12605, 0.102707, 0.33053200000000005, 0.229...","[[], [], [8713], [], [], [], [], []]","[False, False, False, False, False, False, Fal...","[False, False, False, False, False, False, Fal...","[True, True, True, True, True, True, True, True]"


In [68]:
standard_bboxes.references.iloc[8713]

cx    0.500000
cy    0.833333
w     0.179134
h     0.358267
Name: 8713, dtype: float64

In [69]:
dset[1][8713]

array([ 0.     ,  0.     ,  0.     ,  1.     ,  0.10645, -0.1421 ,
       -0.02693, -0.08057], dtype=float16)