#  import library

In [57]:
caffe_root = "../"
ml_root = 'multi-label/'

import sys 
import os

import numpy as np
import os.path as osp
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontManager
import math
import codecs
import datetime
import random

import copy

% matplotlib inline
plt.rcParams['figure.figsize'] = (6, 6)


sys.path.append(caffe_root + 'python')
import caffe # If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.

from caffe import layers as L, params as P # Shortcuts to define the net prototxt.
sys.path.append(ml_root + "pycaffe/layers") # the datalayers we will use are in this directory.
sys.path.append(ml_root + "pycaffe") # the tools file is in this folder
sys.path.append(ml_root + "pycaffe/net_shrink") # the tools file is in this folder

from nets import *
from shrink_tools import *
from tools import *
from __future__ import print_function

caffe_root = "../"
# initialize caffe for gpu mode
# caffe.set_mode_gpu()
# caffe.set_device(0)

In [58]:
max_iter = 1000000
step = 500

In [59]:
def conv1_autoencoder(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(source=split, batch_size=batch_sz,new_height=160, new_width=160,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    n.flatdata_i = L.Flatten(n.data)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1, relu_param=dict(negative_slope=0.1))
    n.pool1 = max_pool(n.relu1, 2, stride=2)   
    
    n.code = conv(n.pool1, 5, 5, 64, pad=2)
    
    n.upsample1 = L.Deconvolution(n.code, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=64, num_output=64, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv1 = conv(n.upsample1, 5, 5, 1, pad=2)    
    n.debn1 = L.BatchNorm(n.deconv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale1 = L.Scale(n.debn1, bias_term=True, in_place=True) 
    n.derelu1 = L.ReLU(n.descale1, relu_param=dict(negative_slope=0.1))
    
    n.flatdata_o = L.Flatten(n.derelu1)
    n.loss_s = L.SigmoidCrossEntropyLoss(n.flatdata_o, n.flatdata_i, loss_weight=1)
    n.loss_e = L.EuclideanLoss(n.flatdata_o, n.flatdata_i, loss_weight=0)

    return str(n.to_proto())

In [60]:
def conv2_autoencoder(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(source=split, batch_size=batch_sz,new_height=160, new_width=160,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    n.flatdata_i = L.Flatten(n.data)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2, no_back=True)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1,relu_param=dict(negative_slope=0.1))
    n.pool1 = max_pool(n.relu1, 2, stride=2)  
    
    n.conv2 = conv(n.pool1, 5, 5, 128, pad=2)
    n.bn2 = L.BatchNorm(n.conv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale2 = L.Scale(n.bn2, bias_term=True, in_place=True) 
    n.relu2 = L.ReLU(n.scale2, relu_param=dict(negative_slope=0.1))
    n.pool2 = max_pool(n.relu2, 2, stride=2)
    
    n.code = conv(n.pool2, 5, 5, 128, pad=2)

    n.upsample2 = L.Deconvolution(n.code, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=128, num_output=128, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv2 = conv(n.upsample2, 5, 5, 64, pad=2) 
    n.debn2 = L.BatchNorm(n.deconv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale2 = L.Scale(n.debn2, bias_term=True, in_place=True) 
    n.derelu2 = L.ReLU(n.descale2, relu_param=dict(negative_slope=0.1))

    n.upsample1 = L.Deconvolution(n.derelu2, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=64, num_output=64, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv1 = conv(n.upsample1, 5, 5, 1, pad=2, no_back=True)    
    n.debn1 = L.BatchNorm(n.deconv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale1 = L.Scale(n.debn1, bias_term=True, in_place=True) 
    n.derelu1 = L.ReLU(n.descale1, relu_param=dict(negative_slope=0.1))

    n.flatdata_o = L.Flatten(n.derelu1)
    n.loss_s = L.SigmoidCrossEntropyLoss(n.flatdata_o, n.flatdata_i, loss_weight=1)
    n.loss_e = L.EuclideanLoss(n.flatdata_o, n.flatdata_i, loss_weight=0)
    
    
    return str(n.to_proto())

In [61]:
def conv3_autoencoder(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(source=split, batch_size=batch_sz,new_height=160, new_width=160,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    n.flatdata_i = L.Flatten(n.data)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2, no_back=True)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1, relu_param=dict(negative_slope=0.1))
    n.pool1 = max_pool(n.relu1, 2, stride=2)  
    
    n.conv2 = conv(n.pool1, 5, 5, 128, pad=2, no_back=True)
    n.bn2 = L.BatchNorm(n.conv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale2 = L.Scale(n.bn2, bias_term=True, in_place=True) 
    n.relu2 = L.ReLU(n.scale2, relu_param=dict(negative_slope=0.1))
    n.pool2 = max_pool(n.relu2, 2, stride=2)
    
    n.conv3 = conv(n.pool2, 3, 3, 256, pad=1)
    n.bn3 = L.BatchNorm(n.conv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3 = L.Scale(n.bn3, bias_term=True, in_place=True)  
    n.relu3 = L.ReLU(n.scale3, relu_param=dict(negative_slope=0.1))    
    n.conv3_5 = conv(n.relu3, 3, 3, 512, pad=1)
    n.bn3_5 = L.BatchNorm(n.conv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3_5 = L.Scale(n.bn3_5, bias_term=True, in_place=True)  
    n.relu3_5 = L.ReLU(n.scale3_5, relu_param=dict(negative_slope=0.1))
    n.pool3_5 = max_pool(n.relu3_5, 2, stride=2)
    
    n.code = conv(n.pool3_5, 3, 3, 512, pad=1)


    n.upsample3_5 = L.Deconvolution(n.code, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=512, num_output=512, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv3_5 = conv(n.upsample3_5, 3, 3, 256, pad=1, no_back=True)    
    n.debn3_5 = L.BatchNorm(n.deconv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale3_5 = L.Scale(n.debn3_5, bias_term=True, in_place=True) 
    n.derelu3_5 = L.ReLU(n.descale3_5, relu_param=dict(negative_slope=0.1))
                                  
    n.deconv3 = conv(n.derelu3_5, 5, 5,128, pad=2, no_back=True)    
    n.debn3 = L.BatchNorm(n.deconv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale3 = L.Scale(n.debn3, bias_term=True, in_place=True) 
    n.derelu3 = L.ReLU(n.descale3, relu_param=dict(negative_slope=0.1))
    
    n.upsample2 = L.Deconvolution(n.derelu3, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=128, num_output=128, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv2 = conv(n.upsample2, 5, 5, 64, pad=2, no_back=True) 
    n.debn2 = L.BatchNorm(n.deconv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale2 = L.Scale(n.debn2, bias_term=True, in_place=True) 
    n.derelu2 = L.ReLU(n.descale2, relu_param=dict(negative_slope=0.1))

    n.upsample1 = L.Deconvolution(n.derelu2, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=64, num_output=64, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv1 = conv(n.upsample1, 5, 5, 1, pad=2, no_back=True)    
    n.debn1 = L.BatchNorm(n.deconv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale1 = L.Scale(n.debn1, bias_term=True, in_place=True) 
    n.derelu1 = L.ReLU(n.descale1, relu_param=dict(negative_slope=0.1))

    n.flatdata_o = L.Flatten(n.derelu1)
    n.loss_s = L.SigmoidCrossEntropyLoss(n.flatdata_o, n.flatdata_i, loss_weight=1)
    n.loss_e = L.EuclideanLoss(n.flatdata_o, n.flatdata_i, loss_weight=0)
    
    
    return str(n.to_proto())


In [62]:
def conv4_autoencoder(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(source=split, batch_size=batch_sz,new_height=160, new_width=160,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    n.flatdata_i = L.Flatten(n.data)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2, no_back=True)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1, relu_param=dict(negative_slope=0.1))
    n.pool1 = max_pool(n.relu1, 2, stride=2)  
    
    n.conv2 = conv(n.pool1, 5, 5, 128, pad=2, no_back=True)
    n.bn2 = L.BatchNorm(n.conv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale2 = L.Scale(n.bn2, bias_term=True, in_place=True) 
    n.relu2 = L.ReLU(n.scale2, relu_param=dict(negative_slope=0.1))
    n.pool2 = max_pool(n.relu2, 2, stride=2)
    
    n.conv3 = conv(n.pool2, 3, 3, 256, pad=1, no_back=True)
    n.bn3 = L.BatchNorm(n.conv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3 = L.Scale(n.bn3, bias_term=True, in_place=True)  
    n.relu3 = L.ReLU(n.scale3, relu_param=dict(negative_slope=0.1))    
    n.conv3_5 = conv(n.relu3, 3, 3, 512, pad=1, no_back=True)
    n.bn3_5 = L.BatchNorm(n.conv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3_5 = L.Scale(n.bn3_5, bias_term=True, in_place=True)  
    n.relu3_5 = L.ReLU(n.scale3_5, relu_param=dict(negative_slope=0.1))
    n.pool3_5 = max_pool(n.relu3_5, 2, stride=2)
    
    n.conv4 = conv(n.pool3_5, 3, 3, 512, pad=1)
    n.bn4 = L.BatchNorm(n.conv4, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale4 = L.Scale(n.bn4, bias_term=True, in_place=True)  
    n.relu4 = L.ReLU(n.scale4, relu_param=dict(negative_slope=0.1))
                                  
    n.code = conv(n.relu4, 3, 3, 512, pad=1)
    
    n.deconv4 = conv(n.code, 3, 3, 512, pad=1)    
    n.debn4 = L.BatchNorm(n.deconv4, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale4 = L.Scale(n.debn4, bias_term=True, in_place=True) 
    n.derelu4 = L.ReLU(n.descale4, relu_param=dict(negative_slope=0.1))                                      

    n.upsample3_5 = L.Deconvolution(n.derelu4, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=512, num_output=512, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv3_5 = conv(n.upsample3_5, 3, 3, 256, pad=1, no_back=True)    
    n.debn3_5 = L.BatchNorm(n.deconv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale3_5 = L.Scale(n.debn3_5, bias_term=True, in_place=True) 
    n.derelu3_5 = L.ReLU(n.descale3_5, relu_param=dict(negative_slope=0.1))
                                  
    n.deconv3 = conv(n.derelu3_5, 5, 5,128, pad=2, no_back=True)    
    n.debn3 = L.BatchNorm(n.deconv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale3 = L.Scale(n.debn3, bias_term=True, in_place=True) 
    n.derelu3 = L.ReLU(n.descale3, relu_param=dict(negative_slope=0.1))
    
    n.upsample2 = L.Deconvolution(n.derelu3, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=128, num_output=128, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv2 = conv(n.upsample2, 5, 5, 64, pad=2, no_back=True) 
    n.debn2 = L.BatchNorm(n.deconv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale2 = L.Scale(n.debn2, bias_term=True, in_place=True) 
    n.derelu2 = L.ReLU(n.descale2, relu_param=dict(negative_slope=0.1))

    n.upsample1 = L.Deconvolution(n.derelu2, param=dict(lr_mult=0 ,decay_mult = 0), convolution_param=dict(group=64, num_output=64, kernel_size=4, stride=2, pad=1, bias_term=False,weight_filler=dict(type="bilinear")))
    n.deconv1 = conv(n.upsample1, 5, 5, 1, pad=2, no_back=True)    
    n.debn1 = L.BatchNorm(n.deconv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.descale1 = L.Scale(n.debn1, bias_term=True, in_place=True) 
    n.derelu1 = L.ReLU(n.descale1, relu_param=dict(negative_slope=0.1))

    n.flatdata_o = L.Flatten(n.derelu1)
    n.loss_s = L.SigmoidCrossEntropyLoss(n.flatdata_o, n.flatdata_i, loss_weight=1)
    n.loss_e = L.EuclideanLoss(n.flatdata_o, n.flatdata_i, loss_weight=0)
    return str(n.to_proto())


In [63]:
def vgg_fc1_freeze(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(shuffle=True,source=split, batch_size=batch_sz,new_height=32, new_width=100,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2, no_back=True)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1)
    n.pool1 = max_pool(n.relu1, 2, stride=2)  
    
    n.conv2 = conv(n.pool1, 5, 5, 128, pad=2, no_back=True)
    n.bn2 = L.BatchNorm(n.conv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale2 = L.Scale(n.bn2, bias_term=True, in_place=True) 
    n.relu2 = L.ReLU(n.scale2)
    n.pool2 = max_pool(n.relu2, 2, stride=2)
    
    n.conv3 = conv(n.pool2, 3, 3, 256, pad=1, no_back=True)
    n.bn3 = L.BatchNorm(n.conv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3 = L.Scale(n.bn3, bias_term=True, in_place=True)  
    n.relu3 = L.ReLU(n.scale3)    
    n.conv3_5 = conv(n.relu3, 3, 3, 512, pad=1, no_back=True)
    n.bn3_5 = L.BatchNorm(n.conv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3_5 = L.Scale(n.bn3_5, bias_term=True, in_place=True)  
    n.relu3_5 = L.ReLU(n.scale3_5)
    n.pool3_5 = max_pool(n.relu3_5, 2, stride=2)
    
    n.conv4 = conv(n.pool3_5, 3, 3, 512, pad=1, no_back=True)
    n.bn4 = L.BatchNorm(n.conv4, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale4 = L.Scale(n.bn4, bias_term=True, in_place=True)  
    n.relu4 = L.ReLU(n.scale4)
    
    n.fc1 = conv(n.relu4, 13, 4, 4096)
    n.bn5 = L.BatchNorm(n.fc1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale5 = L.Scale(n.bn5, bias_term=True, in_place=True)  
    n.relu5 = L.ReLU(n.scale5)
    n.drop1 = L.Dropout(n.relu5, in_place=True)
        
    n.fc_class = conv(n.drop1, 1, 1, 88172)
    n.bn7 = L.BatchNorm(n.fc_class, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale7 = L.Scale(n.bn7, bias_term=True, in_place=True)  
    n.relu7 = L.ReLU(n.scale7) 

    n.loss = L.SoftmaxWithLoss(n.relu7, n.label, loss_weight=1)
    return str(n.to_proto())


In [64]:
def vgg_fc2_freeze(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(shuffle=True,source=split, batch_size=batch_sz,new_height=32, new_width=100,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2, no_back=True)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1)
    n.pool1 = max_pool(n.relu1, 2, stride=2)  
    
    n.conv2 = conv(n.pool1, 5, 5, 128, pad=2, no_back=True)
    n.bn2 = L.BatchNorm(n.conv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale2 = L.Scale(n.bn2, bias_term=True, in_place=True) 
    n.relu2 = L.ReLU(n.scale2)
    n.pool2 = max_pool(n.relu2, 2, stride=2)
    
    n.conv3 = conv(n.pool2, 3, 3, 256, pad=1, no_back=True)
    n.bn3 = L.BatchNorm(n.conv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3 = L.Scale(n.bn3, bias_term=True, in_place=True)  
    n.relu3 = L.ReLU(n.scale3)    
    n.conv3_5 = conv(n.relu3, 3, 3, 512, pad=1, no_back=True)
    n.bn3_5 = L.BatchNorm(n.conv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3_5 = L.Scale(n.bn3_5, bias_term=True, in_place=True)  
    n.relu3_5 = L.ReLU(n.scale3_5)
    n.pool3_5 = max_pool(n.relu3_5, 2, stride=2)
    
    n.conv4 = conv(n.pool3_5, 3, 3, 512, pad=1, no_back=True)
    n.bn4 = L.BatchNorm(n.conv4, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale4 = L.Scale(n.bn4, bias_term=True, in_place=True)  
    n.relu4 = L.ReLU(n.scale4)
    
    n.fc1 = conv(n.relu4, 13, 4, 4096)
    n.bn5 = L.BatchNorm(n.fc1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale5 = L.Scale(n.bn5, bias_term=True, in_place=True)  
    n.relu5 = L.ReLU(n.scale5)
    n.drop1 = L.Dropout(n.relu5, in_place=True)
    
    n.fc2 = conv(n.drop1, 13, 4, 4096)
    n.bn6 = L.BatchNorm(n.fc2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale6 = L.Scale(n.bn6, bias_term=True, in_place=True)  
    n.relu6 = L.ReLU(n.scale6)
    n.drop2 = L.Dropout(n.relu6, in_place=True)
        
    n.fc_class = conv(n.drop2, 1, 1, 88172)
    n.bn7 = L.BatchNorm(n.fc_class, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale7 = L.Scale(n.bn7, bias_term=True, in_place=True)  
    n.relu7 = L.ReLU(n.scale7) 

    n.loss = L.SoftmaxWithLoss(n.relu7, n.label, loss_weight=1)
    return str(n.to_proto())


In [65]:
def vgg(split, batch_sz):
    n = caffe.NetSpec()
    n.data, n.label = L.ImageData(image_data_param=dict(shuffle=True,source=split, batch_size=batch_sz,new_height=32, new_width=100,is_color=False),ntop=2)
    n.silence = L.Silence(n.label, ntop=0)
    
    n.conv1 = conv(n.data, 5, 5, 64, pad=2)
    n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True)    
    n.relu1 = L.ReLU(n.scale1)
    n.pool1 = max_pool(n.relu1, 2, stride=2)  
    
    n.conv2 = conv(n.pool1, 5, 5, 128, pad=2)
    n.bn2 = L.BatchNorm(n.conv2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale2 = L.Scale(n.bn2, bias_term=True, in_place=True) 
    n.relu2 = L.ReLU(n.scale2)
    n.pool2 = max_pool(n.relu2, 2, stride=2)
    
    n.conv3 = conv(n.pool2, 3, 3, 256, pad=1)
    n.bn3 = L.BatchNorm(n.conv3, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3 = L.Scale(n.bn3, bias_term=True, in_place=True)  
    n.relu3 = L.ReLU(n.scale3)    
    n.conv3_5 = conv(n.relu3, 3, 3, 512, pad=1)
    n.bn3_5 = L.BatchNorm(n.conv3_5, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale3_5 = L.Scale(n.bn3_5, bias_term=True, in_place=True)  
    n.relu3_5 = L.ReLU(n.scale3_5)
    n.pool3_5 = max_pool(n.relu3_5, 2, stride=2)
    
    n.conv4 = conv(n.pool3_5, 3, 3, 512, pad=1)
    n.bn4 = L.BatchNorm(n.conv4, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale4 = L.Scale(n.bn4, bias_term=True, in_place=True)  
    n.relu4 = L.ReLU(n.scale4)
    
    n.fc1 = conv(n.relu4, 13, 4, 4096)
    n.bn5 = L.BatchNorm(n.fc1, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale5 = L.Scale(n.bn5, bias_term=True, in_place=True)  
    n.relu5 = L.ReLU(n.scale5)
    n.drop1 = L.Dropout(n.relu5, in_place=True)
    
    n.fc2 = conv(n.drop1, 1, 1, 4096)
    n.bn6 = L.BatchNorm(n.fc2, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale6 = L.Scale(n.bn6, bias_term=True, in_place=True)  
    n.relu6 = L.ReLU(n.scale6)
    n.drop2 = L.Dropout(n.relu6, in_place=True)
    
    n.fc_class = conv(n.drop2, 1, 1, 88172)
    n.bn7 = L.BatchNorm(n.fc_class, use_global_stats=False, in_place=True, param=[{"lr_mult":0},{"lr_mult":0},{"lr_mult":0}])
    n.scale7 = L.Scale(n.bn7, bias_term=True, in_place=True)  
    n.relu7 = L.ReLU(n.scale7) 

    n.loss = L.SoftmaxWithLoss(n.relu7, n.label, loss_weight=1)
    return str(n.to_proto())


In [66]:
def create_autoencoder_file(net_l, net_f):
    with open('train_%s.prototxt' % (net_l,), 'w') as f:
        print(net_f('../vgg_data/train_autoencoder.txt', 4), file=f)
    with open('val_%s.prototxt' % (net_l,), 'w') as f:
        print(net_f('../vgg_data/train_autoencoder.txt', 4), file=f)
        
    solverprototxt = tools.CaffeSolver(trainnet_prototxt_path = 'train_%s.prototxt' % (net_l,), testnet_prototxt_path = 'val_%s.prototxt' % (net_l,))
    solverprototxt.sp['display'] = "5"
    solverprototxt.sp['base_lr'] = "1e-9"
    solverprototxt.sp['snapshot'] = "200"
    solverprototxt.sp['gamma'] = "1"
    solverprototxt.sp['test_interval'] = "2000"
    solverprototxt.sp['snapshot_prefix'] = "\"models/snapshot\""
    solverprototxt.sp['lr_policy'] = "\"step\""
    solverprototxt.sp['stepsize'] = str(step)
    solverprototxt.sp['max_iter'] = str(max_iter)

    solverprototxt.write('solver_%s.prototxt'%(net_l,))

In [67]:
def autoencoder_surgery(prototxt_s, prototxt_d, model_s, model_d):
    net_o = caffe.Net(prototxt_s, model_s, caffe.TEST)
    net_d = caffe.Net(prototxt_d, caffe.TEST)
    params_o = net_o.params.keys()
    for p in params_o:
        print(p)
        if "conv" in p:
            net_d.params[p][0].data[...] = net_o.params[p][0].data[...]  
            net_d.params[p][1].data[...] = net_o.params[p][1].data[...]  
        elif "upsample" in p:
            net_d.params[p][0].data[...] = net_o.params[p][0].data[...]  

    net_d.save(model_d)

In [68]:
import scipy
import scipy.misc

def autoencoder_transition(prototxt_s, prototxt_d, model_s, model_d):
    net_o = caffe.Net(prototxt_s, model_s, caffe.TEST)
    net_d = caffe.Net(prototxt_d, caffe.TEST)
    params_o = net_o.params.keys()
    for p in params_o:
        print(p)
        if p.startswith("conv") is True:
            net_d.params[p][0].data[...] = net_o.params[p][0].data[...]  
            net_d.params[p][1].data[...] = net_o.params[p][1].data[...]  
        
        elif p.startswith("fc5") is True:
            for i in range(4096):
                for j in range(512):
                    data = net_o.params[p][0].data[i,j,...]
                    data = scipy.misc.imresize(data, (4,13))
                    net_d.params['fc1'][0].data[i,j,...] = data
            net_d.params['fc1'][1].data[...] = net_o.params[p][1].data[...] 

    net_d.save(model_d)

In [69]:
def start_autoencoder():
    net_layers = ['conv1', 'conv2', 'conv3',  'conv4', 'vgg_fc1_freeze', 'vgg_fc2_freeze', 'vgg']
    net_func = [conv1_autoencoder, conv2_autoencoder, conv3_autoencoder, conv4_autoencoder, vgg_fc1_freeze, vgg_fc2_freeze, vgg]
    if os.path.isdir('models') is False:
        os.makedirs('models')

    for e, (layer, func) in enumerate(zip(net_layers, net_func)):
        if os.path.isfile("train_%s.prototxt" % (layer,)) is True:
            os.remove("train_%s.prototxt" % (layer,))
        if os.path.isfile("val_%s.prototxt" % (layer,)) is True:
            os.remove("val_%s.prototxt" % (layer,))
        create_autoencoder_file(layer, func)
        
        solver = caffe.SGDSolver('solver_%s.protoxt'%(l,))
        for b in range(max_iter//200):
            solver.step(b)
        
        solver.net.save("%s.caffemodel"%(l,))
        autoencoder_transition("train_%s.prototxt" % (layer,), "train_%s.prototxt" % (net_layers[e+1],), )

In [None]:
start_autoencoder()

In [None]:
autoencoder_transition("train_conv4.prototxt", "train_vgg_conv4.prototxt", "models/conv4.caffemodel", "models/vgg_conv4_pre.caffemodel")

In [42]:
start_autoencoder()
autoencoder_surgery("train_conv4.prototxt", "train_fc1.prototxt", "models/conv4.caffemodel", "models/fc1_pre.caffemodel")

conv1
bn1
scale1
conv2
bn2
scale2
conv3
bn3
scale3
conv3_5
bn3_5
scale3_5
conv4
bn4
scale4
code
deconv4


ValueError: could not broadcast input array from shape (512,512,3,3) into shape (512,4096,3,3)

In [None]:
autoencoder_transition("train_conv4.prototxt", "train_vgg.prototxt", "models/conv4.caffemodel", "models/vgg_pre.caffemodel")

In [13]:
start_autoencoder()
num = 3
autoencoder_surgery("train_conv"+str(num)+".prototxt", "train_conv"+str(num+1)+".prototxt", "models/conv"+str(num)+".caffemodel", "models/conv"+str(num+1)+"_pre.caffemodel")

conv1
bn1
scale1
conv2
bn2
scale2
conv3
bn3
scale3
conv3_5
bn3_5
scale3_5
code
upsample3_5
deconv3_5
debn3_5
descale3_5
deconv3
debn3
descale3
upsample2
deconv2
debn2
descale2
upsample1
deconv1
debn1
descale1


In [None]:
solver = caffe.SGDSolver('solver.prototxt')
iteration = max_iter // step + 1
for i in range(iteration):
    print(i)
    solver.step(step)
solver.net.save('conv1.caffemodel')