In [1]:
import caffe

from caffe import layers as L, params as P, to_proto

from caffe.proto import caffe_pb2

from caffe.coord_map import crop

import copy
from nbfinder import NotebookFinder
import sys
sys.meta_path.append(NotebookFinder())
from layer_util import *
    

CAFFE=1
SUM=1
PROD = 0
MAX = 2

def encode(n, conv, nfilters_list, num_layers, type_="labelled"):
    encoder_blobs = [conv]
    for i in range(num_layers):
        nfilters = nfilters_list[i]
        conv = conv_relu(conv, ks=5, nout=nfilters, pad=2, stride=2)
        encoder_blobs.append(conv)

    n.encoder = conv
    return n, encoder_blobs
    
def decode(n, encoder, encoder_blobs, nfilters_list, num_layers, num_input_channels):
    #remove last layer and correpsonding number of filters b/c we don't use the last number in reverse
    nfilters_list.pop()
    encoder_blobs.pop()
    
    #reverse the list b/c decoding goes in reverse
    nfilters_list.reverse()
    encoder_blobs.reverse()
    
    # add the channel size of input data for full reconstruction
    nfilters_list.append(num_input_channels)

    conv = encoder
    for i in range(num_layers):
        nfilters = nfilters_list[i]
        conv = deconv_relu(conv,5, nfilters, stride=2)
        conv = L.Crop(conv, encoder_blobs[i], axis=2,offset=1)
    n.reconstruction = conv
    return n

def bbox_reg(n, num_classes):
    n.gxy, n.gwh, n.gobj, n.gcls = L.Slice(n.label, slice_point=[2,4,5], ntop=4)
    

    n.class_scores = conv_relu(n.encoder,ks=3,pad=1,nout=num_classes)

    
    n.obj_scores = conv_relu(n.encoder,ks=3,pad=1,nout=2)

    n = get_coord_scores(n)
    

    return n
    
def get_coord_scores(n):
    # no relu here so we can get negative numbers because inverse log of - negative -> gives us boxes
    # of size smaller than 64x64
    n.xy_param = conv_relu(n.encoder,ks=3,pad=1,nout=2, no_relu=True)
    n.wh_param = conv_relu(n.encoder,ks=3,pad=1,nout=2, no_relu=True)
    
    n.two_d_mask = L.Concat(*2*[n.gobj])
    n.xy_pred = L.Eltwise(n.xy_param,n.two_d_mask, operation=PROD)
    n.wh_pred = L.Eltwise(n.wh_param,n.two_d_mask, operation=PROD)
    n.xy_gt = L.Eltwise(n.gxy,n.two_d_mask, operation=PROD)
    n.wh_gt = L.Eltwise(n.gwh,n.two_d_mask, operation=PROD)
    return n

    
def create_net(n, data ,nfilters_list, num_classes, num_input_channels):

    num_layers = len(nfilters_list)
    n, encoder_blobs = encode(n, data, nfilters_list, num_layers)

    
    n = decode(n, n.encoder, encoder_blobs, nfilters_list, num_layers, num_input_channels)
    
    n=bbox_reg(n, num_classes)

    return n


def make_loss(n):
    alpha = 5.
    beta = 7.
    gamma = 0.5
    lambda_ = 0.1
    
    n.L_cls = L.SoftmaxWithLoss(n.class_scores, n.gcls, loss_param =dict(ignore_label=0))
    n.L_obj = L.SoftmaxWithLoss(n.obj_scores, n.gobj, loss_param =dict(ignore_label=0))
    n.L_xy = L.SmoothL1Loss(n.xy_pred, n.xy_gt)
    n.L_wh = L.SmoothL1Loss(n.wh_pred, n.wh_gt)
    
    n.L_rec = L.EuclideanLoss(n.reconstruction, n.data)
    n.final_loss = L.Eltwise(n.L_cls, n.L_obj, n.L_xy, n.L_wh,n.L_rec, coeff=[1.,1.,alpha, beta,10**-5], operation=SUM)
    return n

importing Jupyter notebook from layer_util.ipynb
