In [1]:
# imports
import os
os.sys.path.append(os.path.abspath(".."))

import numpy as np

import h5py

import project.download_content as content
from project.utils import data

from tqdm import tqdm

tqdm.pandas()

This notebook is about create the environment to start to train models.

## About boats?!

To early exploration in modeling I chosen to work with boats bbox. They are the 30th most relevant class, this means that they are big enough to be isolated, but they are small enough to start a model exploration (stress cost functions, architectures, so on).

In [2]:
boats = data.boats()

In [3]:
standard_bboxes = data.StandardBoudingBoxes(feature_map_sizes=[38, 19, 10, 5, 3, 1],
                                            ratios_per_layer=[[1, 1/2, 2], 
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 2],
                                                              [1, 1/2, 2]])

standard_bboxes.references

Unnamed: 0,cx,cy,w,h
0,0.013158,0.013158,0.005263,0.005263
1,0.013158,0.013158,0.003722,0.007443
2,0.013158,0.013158,0.007443,0.003722
3,0.013158,0.013158,0.006862,0.006862
4,0.013158,0.039474,0.005263,0.005263
...,...,...,...,...
8727,0.833333,0.833333,0.275681,0.275681
8728,0.500000,0.500000,0.900000,0.900000
8729,0.500000,0.500000,0.636396,1.272792
8730,0.500000,0.500000,1.272792,0.636396


The following cell is mapping each bounding box to a bounding box (or a group of) that represents better them, among the defaults previously created.

In [4]:
boats['bbox_ref'] = boats.progress_apply(standard_bboxes.match, axis=1)

100%|██████████| 79113/79113 [05:45<00:00, 228.85it/s]


then, it is going to group each image and transforming each bounding box feature in a list. This is useful to create the training set used to fit models.

In [5]:
%%time
imgs = boats[['ImageID', 'Path']].drop_duplicates()
imgs.set_index('ImageID', inplace=True)

for c in ['cx', 'cy', 'w', 'h', 'bbox_ref']:
    imgs = imgs.join(boats.groupby('ImageID')[c].apply(list))

imgs.reset_index(inplace=True)
imgs.shape

CPU times: user 6.87 s, sys: 0 ns, total: 6.87 s
Wall time: 6.88 s


(25478, 7)

At this point, we are creating the target dataset

applying the transformations explain in page 5 of the [ssd paper](https://arxiv.org/pdf/1512.02325.pdf)

In [6]:
# This 6 is because the output of the model for each bbox found is 6: 2 classes + (cx, cy, w, h)
target = np.zeros((imgs.shape[0],
                   standard_bboxes.references.shape[0],
                   6),
                  dtype=np.float32)
target[:][:] = [1,0,0,0,0,0]

for i, r in imgs.iterrows():
    for cx, cy, w, h, refs in zip(r.cx, r.cy, r.w, r.h, r.bbox_ref):
        for id_ref in refs:
            cx_d, cy_d, w_d, h_d = standard_bboxes.references.loc[id_ref]
            
            g_hat_cx = (cx - cx_d) / w_d
            g_hat_cy = (cy - cy_d) / h_d
            g_hat_w = np.log(w/w_d)
            g_hat_h = np.log(h/h_d)
            
            target[i][int(id_ref)] = [0, 1, g_hat_cx, g_hat_cy, g_hat_w, g_hat_h]
        
target.shape

(25478, 8732, 6)

### Hdf5

In [7]:
filepath = os.path.join(content.DATAPATH, "MODEL", "boats_300_vgg.h5")

In [8]:
imgs.to_hdf(filepath, key='X', mode='w')

with h5py.File(filepath, 'a') as f:
    f.create_dataset('y', data=target, dtype=np.float16)

#### Reading

In [9]:
boats = pd.read_hdf(filepath, 'X', mode='r')

In [10]:
with h5py.File(filepath, 'r') as f:
    dset = f['y'][:]
dset.shape

(25478, 8732, 6)

In [11]:
boats.head(2)

Unnamed: 0,ImageID,Path,cx,cy,w,h,bbox_ref
0,00001bc2c4027449,data/TRAIN/train_0/00001bc2c4027449.jpg,[0.51625],[0.5216665],[0.4975],[0.561667],[[]]
1,0000c035a08c3770,data/TRAIN/train_0/0000c035a08c3770.jpg,[0.498124],[0.48625],[0.996248],[0.9725],[[8731]]


In [12]:
standard_bboxes.references.iloc[5740]

cx    0.986842
cy    0.776316
w     0.005263
h     0.005263
Name: 5740, dtype: float64

In [15]:
dset[1][8731]

array([ 0.      ,  1.      , -0.001939, -0.01421 ,  0.02931 ,  0.005184],
      dtype=float16)