# Adapt pascal_voc network parameters for dstl case
In this notebook the pretrained Pascal_VOC model is adapted so that it can be used in transfer learning for the dstl case. This involves discarding the parameters involved in predicting classes 11-21 in the original network, as we have only 10 classes (+1 background).

In [None]:
import os
import sys
import pprint

import numpy as np
import matplotlib
matplotlib.use('Agg')
%matplotlib inline
from matplotlib import pyplot as plt

In [None]:
def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)

# need to point the correct location of the py-faster-rcnn version of the caffe library
add_path('/home/ubuntu/src/py-faster-rcnn/caffe-fast-rcnn/python')
add_path('/home/ubuntu/src/py-faster-rcnn/lib')
import caffe
from datasets.factory import get_imdb, list_imdbs
from fast_rcnn.test import test_net
from fast_rcnn.train import get_training_roidb, train_net
from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
print "Loaded caffe version {:s} from {:s}.".format(caffe.__version__, caffe.__path__[0])

Load the original model with weights and the new model without weights.

In [None]:
classes = (7, 8, 9)
arch = 'VGG16'
appendix = '' # This will codify which classes to train (if not all, in which case this string should be empty)
infix = '.' # for directories
if classes is not None:
    appendix = '_'+''.join(['{:d}'.format(c) for c in classes])
    infix = '{:d}'.format(len(classes))+'_classes'

print infix, appendix

In [None]:
orig_test_file = '/home/ubuntu/src/py-faster-rcnn/models/pascal_voc/{:s}/faster_rcnn_end2end/test.prototxt'.format(arch)
orig_model = '/home/ubuntu/src/py-faster-rcnn/data/faster_rcnn_models/{:s}_faster_rcnn_final.caffemodel'.format(arch)
orig_net = caffe.Net(orig_test_file, orig_model, caffe.TEST)

In [None]:
test_file = '/home/ubuntu/dstl/models/{:s}/{:s}/test.prototxt'.format(arch, infix)
net = caffe.Net(test_file, caffe.TEST)

Copy the layer parameters from the original network to the new, discarding any parameters that do not 'fit' in the new network.

In [None]:
def copy_data(orig_data, data):
    indices = [slice(0, I) for I in data.shape]
    data[...] = orig_data[indices]

In [None]:
for layer_name in net._layer_names:
    try:
        for orig_data, data in zip(orig_net.params[layer_name], net.params[layer_name]):
            copy_data(orig_data.data, data.data)
            print layer_name
        if data.data.size < orig_data.data.size:
            print "Subsampled parameters for layer {:s}".format(layer_name)
    except KeyError:
        pass

In [None]:
abs(orig_net.params['rpn_conv/3x3'][0].data - net.params['rpn_conv/3x3'][0].data).sum()

Check to see if the new and original networks are indeed identitical up to the discarding of weights. Unforunately this does not take into account error loaded into `orig_net` due to the absense of layers in the caffemodel weights file.

In [None]:
def show_diff(net0, net1):
    for layer_name in net0._layer_names:
        try:
            weights0 = net0.params[layer_name][0].data
            weights1 = net1.params[layer_name][0].data
            biases0 = net0.params[layer_name][1].data
            biases1 = net1.params[layer_name][1].data
            indices = [slice(0, I) for I in net1.params[layer_name][0].data.shape]
            weights0_trunc = weights0[indices]
            indices = [slice(0, I) for I in net1.params[layer_name][1].data.shape]
            biases0_trunc = biases0[indices]

            diff_weights = (weights0_trunc - weights1).std()
            diff_biases = abs(biases0_trunc - biases1).sum()

            print layer_name, diff_weights, diff_biases
        except KeyError:
            pass
        
show_diff(orig_net, net)

Save the output

In [None]:
filename = '/home/ubuntu/dstl/models/{:s}/{:s}/trained/adapted_from_{:s}_faster_rcnn_final.caffemodel'.format(arch, infix, arch)
net.save(filename)
print 'Saved weights file at {:s}'.format(filename)

It appears one layer, `rpn_conv/3x3` is not properly initialized from the weights file, so let's inspect the weights file and the network a little more...

This seems to be an error on the py_faster_rcnn side.

In [None]:
model = orig_model
net = caffe.Net(orig_test_file, model, caffe.TEST)
net_redo = caffe.Net(orig_test_file, model, caffe.TEST)
# model = '/home/ubuntu/dstl/models/{:s}/trained/adapted_from_{:s}_faster_rcnn_final.caffemodel'.format(arch, arch)
# net = caffe.Net(test_file, model, caffe.TEST)
# net_redo = caffe.Net(test_file, model, caffe.TEST)

for layer_name in net._layer_names:
    try:
        diff = abs(net.params[layer_name][0].data - net_redo.params[layer_name][0].data).sum()
        print layer_name, diff
    except KeyError:
        pass

In [None]:
from caffe.proto import caffe_pb2

net_param = caffe_pb2.NetParameter()
net_str = open(orig_model, 'r').read()
net_param.ParseFromString(net_str)

In [None]:
for layer_from_weights in net_param.layer:
    layer_name = layer_from_weights.name
    if layer_name in [net_layer_name for net_layer_name in orig_net._layer_names]:
        pass
#         print layer_name, "found"
    else:
        print layer_name, "not found"
print "==="
for layer_name in orig_net._layer_names:
    if layer_name in [layer.name for layer in net_param.layer]:
        print layer_name, "found" 
        pass
    else:
        print layer_name, "not found"  
        if layer_name in orig_net.params:
            print layer_name, "should have data!!!"

In [None]:
for layer_from_weights in net_param.layer:
    print layer_from_weights.name

In [None]:
VGG_16_test_file = '/home/ubuntu/src/py-faster-rcnn/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt'
VGG_16_model = '/home/ubuntu/src/py-faster-rcnn/data/faster_rcnn_models/VGG16_faster_rcnn_final.caffemodel'
VGG_16_net = caffe.Net(VGG_16_test_file, VGG_16_model, caffe.TEST)

In [None]:
VGG_16_net_param = caffe_pb2.NetParameter()
VGG_16_net_str = open(VGG_16_model, 'r').read()
VGG_16_net_param.ParseFromString(VGG_16_net_str)

In [None]:
for layer_name in VGG_16_net._layer_names:
    if layer_name in [layer.name for layer in VGG_16_net_param.layer]:
        print layer_name, "found"
        pass
    else:
        print layer_name, "not found"  
        if layer_name in VGG_16_net.params:
            print layer_name, "should have data!!!"