In [684]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
from bnr_ml.nnet import layers
from bnr_ml.objectdetect import yolo
import theano
from theano import tensor as T

import pdb

In [648]:
im_size = (100,100)

In [21]:
def gen_num_objs(max_obj):
    return int(1 + max_obj * np.random.rand())

In [355]:
def gen_classes(num_obj, num_class):
    return np.random.randint(num_class, size=(num_obj,))

In [469]:
def gen_circle(shape):
#     pdb.set_trace()
    row, col = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
    row, col = row.astype(np.float64), col.astype(np.float64)
    row = (row - row.shape[1] / 2) / (row.shape[1] / 2)
    col = (col - col.shape[0] / 2) / (col.shape[0] / 2)
    
    
    im = np.zeros(shape + (3,))
    for i in range(3):
        in_ellipse = (row**2 + col**2) < 1
        im[:,:,i][in_ellipse] = (np.random.gamma(.7) + np.random.gamma(.4) * np.random.randn(*shape))[in_ellipse]
    im[im < 0] = 0.
    im[im >= 1] = 1.
    return im

In [470]:
def gen_rect(shape):
    im = np.zeros(shape + (3,))
    for i in range(3):
        im[:,:,i] = (np.random.gamma(.7) + np.random.gamma(.4) * np.random.randn(*shape))
    im[im < 0] = 0.
    im[im > 1] = 1.
    return im

In [471]:
def gen_triangle(shape):
    row, col = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
    row, col = row.astype(np.float64), col.astype(np.float64)
    row = row / row.shape[1]
    col = col / col.shape[0]
    
    
    im = np.zeros(shape + (3,))
    if np.random.rand() < .5:
        in_triangle = row + col < 1
    else:
        in_triangle = row + col > 1
    for i in range(3):
        im[:,:,i][in_triangle] = (np.random.gamma(.7) + np.random.gamma(.4) * np.random.randn(*shape))[in_triangle]
    im[im < 0] = 0.
    im[im >= 1] = 1.
    return im

In [472]:
def gen_pos_shape(min_shape, im_shape):
    shape = (
        int(min_shape[0] + (im_shape[0] - min_shape[0]) * np.random.rand()),
        int(min_shape[1] + (im_shape[1] - min_shape[1]) * np.random.rand())
    )
    pos = (
        int((im_shape[0] - shape[0]) * np.random.rand()),
        int((im_shape[1] - shape[1]) * np.random.rand())
    )
    return shape, pos

In [520]:
def gen_scene(max_obj, num_class, min_shape, im_shape):
    num_obj = gen_num_objs(max_obj)
    classes = gen_classes(num_obj, num_class)
    im = np.random.gamma(.4) * np.random.randn(*(im_shape + (3,)))
    im[im < 0] = 0.
    im[im > 1] = 1.
    y = np.zeros((4 + num_class,))
    
    for cls in classes:
        shape, pos = gen_pos_shape(min_shape, im_shape)
        y[:4] = pos + shape
        y[-num_class + cls] += 1.
        if cls == 0:
            shp = gen_circle(shape)
        elif cls == 1:
            shp = gen_rect(shape)
        elif cls == 2:
            shp = gen_triangle(shape)
        else:
            pass
        im[pos[0]:pos[0]+shape[0],pos[1]:pos[1]+shape[1],:] += shp
    return im / np.max(im), y

In [534]:
def gen_dataset(size):
    X, y = np.zeros((size,3,200,200)), np.zeros((size,4+3))
    for i in range(size):
        dat = gen_scene(1, 3, (8,8), (200,200))
        X[i], y[i] = dat[0].swapaxes(2,1).swapaxes(1,0), dat[1]
    return X, y

In [675]:
X,y = gen_dataset(100)

In [701]:
input = T.tensor4('input')

In [722]:
l1 = layers.ConvolutionalLayer2D(
    (None, 3, 200, 200),
    (11,11),
    16,
    input=input
)
l2 = layers.PoolLayer2D(
    l1.get_output_shape(),
    (5,5),
    input=layers.NonLinLayer(T.nnet.relu, input=l1.output).output
)
l3 = layers.ConvolutionalLayer2D(
    l2.get_output_shape(),
    (7,7),
    16,
    input=l2.output
)
l4 = layers.PoolLayer2D(
    l3.get_output_shape(),
    (3,3),
    input=layers.NonLinLayer(T.nnet.relu, input=l3.output).output
)
l5 = layers.ConvolutionalLayer2D(
    l4.get_output_shape(),
    (5,5),
    16,
    input=l4.output
)
l6 = layers.PoolLayer2D(
    l5.get_output_shape(),
    (5,5),
    input=layers.NonLinLayer(T.nnet.relu, input=l5.output).output
)
l7 = layers.FCLayer(
    l6.get_output_shape(),
    1024,
    input=l6.output
)
l8 = layers.FCLayer(
    l7.get_output_shape(),
    2*2*(2*5 + 3),
    input=layers.NonLinLayer(T.nnet.relu, input=l7.output).output
)

In [719]:
fun = theano.function([l1.input], l6.output)

In [720]:
tmp = fun(np.random.randn(10,3,200,200))

In [721]:
tmp.shape

(10, 16, 2, 2)

In [723]:
net = {}
net['input'] = l1
net['l2'] = l2
net['l3'] = l3
net['l4'] = l4
net['l5'] = l5
net['l6'] = l6
net['l7'] = l7
net['output'] = l8

In [731]:
reload(layers)
reload(yolo)

<module 'bnr_ml.objectdetect.yolo' from '/usr/local/python/bnr_ml/objectdetect/yolo.py'>

In [732]:
yl = yolo.YoloObjectDetector(net, (None,3,200,200), 3, (2,2), 2)

In [733]:
yl.train(
    X,
    y
)

Getting cost...
Compiling...
Beginning training...

                                             


Epoch 0
------
Train Loss: 8.9927, Test Loss: 8.6771

                                             


Epoch 1
------
Train Loss: 8.4867, Test Loss: 8.6705

                                             


Epoch 2
------
Train Loss: 8.6237, Test Loss: 8.6637

                                             


Epoch 3
------
Train Loss: 8.4772, Test Loss: 8.5195

                                             


Epoch 4
------
Train Loss: 8.0808, Test Loss: 8.5265

                                             


Epoch 5
------
Train Loss: 8.5230, Test Loss: 8.5198

                                             


Epoch 6
------
Train Loss: 8.4834, Test Loss: 8.5133

                                             


Epoch 7
------
Train Loss: 9.0903, Test Loss: 8.5070

                                             






KeyboardInterrupt: 

In [734]:
pred_fn = theano.function([yl.input], yl.output)

In [735]:
Xval,yval = gen_dataset(50)

In [736]:
yval[:2]

array([[  14.,    3.,  185.,   40.,    0.,    1.,    0.],
       [  80.,    9.,   75.,  161.,    1.,    0.,    0.]])

In [737]:
pred_fn(Xval[:2])

array([[[[ 0.49745555,  0.4974533 ],
         [ 0.49741852,  0.49743951]],

        [[ 0.49737473,  0.4974235 ],
         [ 0.49741473,  0.49735582]],

        [[ 0.49743842,  0.49735487],
         [ 0.49739547,  0.49743396]],

        [[ 0.49742068,  0.49743535],
         [ 0.49737339,  0.49739198]],

        [[ 0.49746444,  0.49736116],
         [ 0.49744116,  0.49744739]],

        [[ 0.49739518,  0.49740679],
         [ 0.49743355,  0.49742396]],

        [[ 0.49738642,  0.49743611],
         [ 0.49735691,  0.49737037]],

        [[ 0.49745883,  0.4974371 ],
         [ 0.4974248 ,  0.49735731]],

        [[ 0.49744032,  0.49735029],
         [ 0.49746567,  0.49739947]],

        [[ 0.49737944,  0.49733693],
         [ 0.49740581,  0.49737196]],

        [[ 0.33332057,  0.33332404],
         [ 0.33333248,  0.33332339]],

        [[ 0.33334062,  0.33333728],
         [ 0.33333842,  0.33332795]],

        [[ 0.33333881,  0.33333868],
         [ 0.3333291 ,  0.33334866]]],


       [[[