In [1]:
# imports
import os
os.sys.path.append(os.path.abspath(".."))

import numpy as np
import pandas as pd

import h5py

import project.download_content as content
from project.utils import data

from tqdm import tqdm

import pickle

modelpath = os.path.join(content.DATAPATH, "MODEL")

tqdm.pandas()

This notebook is about create the environment to start to train models.

## About boats?!

To early exploration in modeling I chosen to work with boats bbox. They are the 30th most relevant class, this means that they are big enough to be isolated, but they are small enough to start a model exploration (stress cost functions, architectures, so on).

In [2]:
#reading all train csvs
all_train = pd.concat(
    [pd.read_csv(os.path.join(modelpath, f'train_preprocessed_{i}.csv')) for i in range(8)]
)
all_train['bbox_ref'] = all_train.bbox_ref.apply(lambda x: [int(k) for k 
                                                            in filter(lambda a: a, x[1:-1].split(' '))])
all_train['bbox_count'] = all_train.bbox_ref.apply(lambda x: len(x))
all_train.head(5)

Unnamed: 0,ImageID,LabelName,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside,Path,LabelSemantic,cx,cy,w,h,boat,land_vehicle,skyscraper,bbox_ref,bbox_count
0,00001bc2c4027449,/m/019jd,1,0,0,0,0,data/TRAIN/train_0/00001bc2c4027449.jpg,boat,0.51625,0.521667,0.4975,0.561667,True,False,False,[],0
1,0000339d0372e7e6,/m/079cl,1,0,0,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.382188,0.687675,0.056875,0.12605,False,False,True,[],0
2,0000339d0372e7e6,/m/079cl,1,0,0,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.382188,0.683474,0.029375,0.102707,False,False,True,[],0
3,0000339d0372e7e6,/m/079cl,1,1,1,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.519062,0.782446,0.174375,0.330532,False,False,True,"[8712, 8713]",2
4,0000339d0372e7e6,/m/079cl,1,0,0,0,0,data/TRAIN/train_0/0000339d0372e7e6.jpg,skyscraper,0.516563,0.79085,0.079375,0.229692,False,False,True,[],0


In [3]:
train = (pd.concat([all_train.query("bbox_count > 0"),
                    all_train.query("bbox_count == 0").sample(12000, random_state=47)])
           .sample(frac=1, random_state=17))
display(train.head(5))
train.shape

Unnamed: 0,ImageID,LabelName,IsOccluded,IsTruncated,IsGroupOf,IsDepiction,IsInside,Path,LabelSemantic,cx,cy,w,h,boat,land_vehicle,skyscraper,bbox_ref,bbox_count
11145,aa8a3208a030e639,/m/019jd,1,0,0,0,0,data/TRAIN/train_a/aa8a3208a030e639.jpg,boat,0.7225,0.751778,0.15125,0.399716,True,False,False,[],0
16304,f0731d01e1aae91e,/m/079cl,0,1,0,0,0,data/TRAIN/train_f/f0731d01e1aae91e.jpg,skyscraper,0.493698,0.535937,0.840337,0.908125,False,False,True,"[8728, 8729, 8730, 8731]",4
18020,2fa9a3dba98197c8,/m/019jd,1,0,1,0,0,data/TRAIN/train_2/2fa9a3dba98197c8.jpg,boat,0.154688,0.55625,0.061875,0.020834,True,False,False,[8035],1
11733,289240aa3b8a1d77,/m/019jd,0,0,0,0,0,data/TRAIN/train_2/289240aa3b8a1d77.jpg,boat,0.12125,0.448334,0.23125,0.413333,True,False,False,"[8697, 8699]",2
4636,e2d1fa94455c112b,/m/019jd,0,0,0,0,0,data/TRAIN/train_e/e2d1fa94455c112b.jpg,boat,0.690625,0.667453,0.175,0.179245,True,False,False,[8655],1


(48480, 18)

creating the default bounding boxes:

In [4]:
standard_bboxes = data.StandardBoudingBoxes(feature_map_sizes=[38, 19, 10, 5, 3, 1],
                                            ratios_per_layer=[[1, 1/2, 2], 
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 1/3, 2, 3],
                                                              [1, 1/2, 2],
                                                              [1, 1/2, 2]])

standard_bboxes.references.values

array([[0.01315789, 0.01315789, 0.00526316, 0.00526316],
       [0.01315789, 0.01315789, 0.00372161, 0.00744323],
       [0.01315789, 0.01315789, 0.00744323, 0.00372161],
       ...,
       [0.5       , 0.5       , 0.6363961 , 1.27279221],
       [0.5       , 0.5       , 1.27279221, 0.6363961 ],
       [0.5       , 0.5       , 0.96747093, 0.96747093]])

In [5]:
%%time
imgs = train[['ImageID', 'Path']].drop_duplicates()
#getting the one hot encodes columns
dummy_classes = list(train.columns[13:-2])

imgs.set_index('ImageID', inplace=True)

for c in ['cx', 'cy', 'w', 'h', 'bbox_ref', 'LabelSemantic'] + dummy_classes:
    imgs = imgs.join(train.groupby('ImageID')[c].apply(list))

imgs.reset_index(inplace=True)
display(imgs.shape)
imgs.head()

(38122, 11)

CPU times: user 17.7 s, sys: 28.9 ms, total: 17.7 s
Wall time: 17.7 s


Unnamed: 0,ImageID,Path,cx,cy,w,h,bbox_ref,LabelSemantic,boat,land_vehicle,skyscraper
0,aa8a3208a030e639,data/TRAIN/train_a/aa8a3208a030e639.jpg,"[0.7224999999999999, 0.2134375]","[0.7517780000000001, 0.8079654999999999]","[0.15125, 0.189375]","[0.3997160000000001, 0.21906099999999984]","[[], [8700, 8703]]","[boat, boat]","[True, True]","[False, False]","[False, False]"
1,f0731d01e1aae91e,data/TRAIN/train_f/f0731d01e1aae91e.jpg,[0.4936975],[0.5359375],[0.8403370000000001],[0.908125],"[[8728, 8729, 8730, 8731]]",[skyscraper],[False],[False],[True]
2,2fa9a3dba98197c8,data/TRAIN/train_2/2fa9a3dba98197c8.jpg,"[0.1546875, 0.284375, 0.338125, 0.411875, 0.65...","[0.55625, 0.6604165, 0.675, 0.5929165000000001...","[0.06187500000000001, 0.025000000000000026, 0....","[0.02083400000000002, 0.019167000000000045, 0....","[[8035], [6423], [], [], []]","[boat, boat, boat, boat, boat]","[True, True, True, True, True]","[False, False, False, False, False]","[False, False, False, False, False]"
3,289240aa3b8a1d77,data/TRAIN/train_2/289240aa3b8a1d77.jpg,[0.12125],[0.4483335000000001],[0.23125],[0.413333],"[[8697, 8699]]",[boat],[True],[False],[False]
4,e2d1fa94455c112b,data/TRAIN/train_e/e2d1fa94455c112b.jpg,[0.690625],[0.6674525],[0.17499999999999993],[0.17924500000000002],[[8655]],[boat],[True],[False],[False]


At this point, we are creating the target dataset

applying the transformations explain in page 5 of the [ssd paper](https://arxiv.org/pdf/1512.02325.pdf)

In [6]:
# This 6 is because the output of the model for each bbox found is 6: 2 classes + (cx, cy, w, h)
target = np.zeros((imgs.shape[0],
                   standard_bboxes.references.shape[0],
                   (1 + len(dummy_classes) + 4)),
                  dtype=np.float32)
target[:][:] = [1] + [0] * len(dummy_classes) + [0,0,0,0]

for i, row in imgs.iterrows():
    for cx, cy, w, h, refs, *labels in zip(row.cx, row.cy, row.w, row.h, row.bbox_ref, *row[8:]):
        for id_ref in refs:
            target[i][int(id_ref)] = [0] + labels + [cx, cy, w, h]
        
target.shape

(38122, 8732, 8)

In [7]:
for i, r in enumerate(target[31000]):
    if r[1] == 1:
        print(1, i)
    if r[2] == 1:
        print(2, i)
    if r[3] == 1:
        print(3, i)

3 8728
3 8729
3 8731


### Hdf5

In [8]:
filepath = os.path.join(content.DATAPATH, "MODEL", "part_data_300_vgg.h5")

In [9]:
imgs.to_hdf(filepath, key='X', mode='w')

with h5py.File(filepath, 'a') as f:
    f.create_dataset('y', data=target, dtype=np.float16)

#### Reading

In [10]:
read_data = pd.read_hdf(filepath, 'X', mode='r')

In [11]:
with h5py.File(filepath, 'r') as f:
    dset = f['y'][:]
dset.shape

(38122, 8732, 8)

In [12]:
read_data.head(2)

Unnamed: 0,ImageID,Path,cx,cy,w,h,bbox_ref,LabelSemantic,boat,land_vehicle,skyscraper
0,aa8a3208a030e639,data/TRAIN/train_a/aa8a3208a030e639.jpg,"[0.7224999999999999, 0.2134375]","[0.7517780000000001, 0.8079654999999999]","[0.15125, 0.189375]","[0.3997160000000001, 0.21906099999999984]","[[], [8700, 8703]]","[boat, boat]","[True, True]","[False, False]","[False, False]"
1,f0731d01e1aae91e,data/TRAIN/train_f/f0731d01e1aae91e.jpg,[0.4936975],[0.5359375],[0.8403370000000001],[0.908125],"[[8728, 8729, 8730, 8731]]",[skyscraper],[False],[False],[True]


In [13]:
standard_bboxes.references.iloc[8713]

cx    0.500000
cy    0.833333
w     0.179134
h     0.358267
Name: 8713, dtype: float64

In [14]:
dset[0][8700]

array([0.    , 1.    , 0.    , 0.    , 0.2134, 0.808 , 0.1893, 0.2191],
      dtype=float16)