In [1]:
# imports
import os
os.sys.path.append(os.path.abspath(".."))

import numpy as np

import h5py

import project.download_content as content
from project.utils import data

from tqdm import tqdm

tqdm.pandas()

This notebook is about create the environment to start to train models.

## About boats?!

To early exploration in modeling I chosen to work with boats bbox. They are the 30th most relevant class, this means that they are big enough to be isolated, but they are small enough to start a model exploration (stress cost functions, architectures, so on).

In [2]:
boats = data.boats()

In [3]:
standard_bboxes = data.StandardBoudingBoxes(with_s1=[11, 7, 5, 3],
                                            with_s2=[75, 39])
standard_bboxes.references.shape

(5770, 4)

The following cell is mapping each bounding box to a bounding box (or a group of) that represents better them, among the defaults previously created.

In [4]:
boats['bbox_ref'] = boats.progress_apply(standard_bboxes.match, axis=1)

100%|██████████| 79113/79113 [03:57<00:00, 333.11it/s]


then, it is going to group each image and transforming each bounding box feature in a list. This is useful to create the training set used to fit models.

In [6]:
%%time
imgs = boats[['ImageID', 'Path']].drop_duplicates()
imgs.set_index('ImageID', inplace=True)

for c in ['cx', 'cy', 'w', 'h', 'bbox_ref']:
    imgs = imgs.join(boats.groupby('ImageID')[c].apply(list))

imgs.reset_index(inplace=True)
imgs.shape

CPU times: user 6.56 s, sys: 0 ns, total: 6.56 s
Wall time: 6.55 s


(25478, 7)

At this point, we are creating the target dataset

applying the transformations explain in page 5 of the [ssd paper](https://arxiv.org/pdf/1512.02325.pdf)

In [17]:
# This 6 is because the output of the model for each bbox found is 6: 2 classes + (cx, cy, w, h)
target = np.zeros((imgs.shape[0],
                   standard_bboxes.references.shape[0],
                   6),
                  dtype=np.float32)
target[:][:] = [1,0,0,0,0,0]

for i, r in imgs.iterrows():
    for cx, cy, w, h, refs in zip(r.cx, r.cy, r.w, r.h, r.bbox_ref):
        for id_ref in refs:
            cx_d, cy_d, w_d, h_d = standard_bboxes.references.loc[id_ref]
            
            g_hat_cx = (cx - cx_d) / w_d
            g_hat_cy = (cy - cy_d) / h_d
            g_hat_w = np.log(w/w_d)
            g_hat_h = np.log(h/h_d)
            
            target[i][int(id_ref)] = [0, 1, g_hat_cx, g_hat_cy, g_hat_w, g_hat_h]
        
target.shape

(25478, 5770, 6)

### Hdf5

In [18]:
filepath = os.path.join(content.DATAPATH, "MODEL", "boats_300_vgg.h5")

In [19]:
imgs.to_hdf(filepath, key='X', mode='w')

with h5py.File(filepath, 'a') as f:
    f.create_dataset('y', data=target, dtype=np.float16)

#### Reading

In [10]:
boats = pd.read_hdf(filepath, 'X', mode='r')

In [11]:
with h5py.File(filepath, 'r') as f:
    dset = f['y'][:]
dset.shape

(25478, 25445, 6)

In [12]:
boats.head(2)

Unnamed: 0,ImageID,Path,cx,cy,w,h,bbox_ref
0,00001bc2c4027449,data/TRAIN/train_0/00001bc2c4027449.jpg,[0.51625],[0.5216665],[0.4975],[0.561667],[[25415]]
1,0000c035a08c3770,data/TRAIN/train_0/0000c035a08c3770.jpg,[0.498124],[0.48625],[0.996248],[0.9725],[[25440]]


In [13]:
standard_bboxes.references.iloc[5740]

cx    0.315287
cy    0.531847
w     0.009554
h     0.019108
Name: 5740, dtype: float64

In [14]:
dset[0][5740]

array([1., 0., 0., 0., 0., 0.], dtype=float16)