# Adapt pascal_voc network parameters for dstl case
In this notebook the pretrained Pascal_VOC model is adapted so that it can be used in transfer learning for the dstl case. This involves discarding the parameters involved in predicting classes 11-21 in the original network, as we have only 10 classes (+1 background).

In [None]:
import os
import sys
import pprint

import numpy as np
import matplotlib
matplotlib.use('Agg')
%matplotlib inline
from matplotlib import pyplot as plt

In [None]:
def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)

# need to point the correct location of the py-faster-rcnn version of the caffe library
add_path('/home/ubuntu/src/py-faster-rcnn/caffe-fast-rcnn/python')
add_path('/home/ubuntu/src/py-faster-rcnn/lib')
import caffe
from datasets.factory import get_imdb, list_imdbs
from fast_rcnn.test import test_net
from fast_rcnn.train import get_training_roidb, train_net
from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
print "Loaded caffe version {:s} from {:s}.".format(caffe.__version__, caffe.__path__[0])

Load the original model with weights and the new model without weights.

In [None]:
orig_test_file = '/home/ubuntu/src/py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_end2end/test.prototxt'
orig_model = '/home/ubuntu/src/py-faster-rcnn/data/faster_rcnn_models/ZF_faster_rcnn_final.caffemodel'
orig_net = caffe.Net(orig_test_file, orig_model, caffe.TEST)

In [None]:
test_file = '/home/ubuntu/dstl/models/ZF/test.prototxt'
net = caffe.Net(test_file, caffe.TEST)

Copy the layer parameters from the original network to the new, discarding any parameters that do not 'fit' in the new network.

In [None]:
def copy_data(orig_data, data):
    indices = [slice(0, I) for I in data.shape]
    data[...] = orig_data[indices]

In [None]:
for layer_name in net._layer_names:
    try:
        for orig_data, data in zip(orig_net.params[layer_name], net.params[layer_name]):
            copy_data(orig_data.data, data.data)
        if data.data.size < orig_data.data.size:
            print "Subsampled parameters for layer {:s}".format(layer_name)
    except KeyError:
        pass

Check to see if the new and original networks are indeed identitical up to the discarding of weights

In [None]:
def show_diff(net0, net1):
    for layer_name in net0._layer_names:
        try:
            weights0 = net0.params[layer_name][0].data
            weights1 = net1.params[layer_name][0].data
            biases0 = net0.params[layer_name][1].data
            biases1 = net1.params[layer_name][1].data
            indices = [slice(0, I) for I in net1.params[layer_name][0].data.shape]
            weights0_trunc = weights0[indices]
            indices = [slice(0, I) for I in net1.params[layer_name][1].data.shape]
            biases0_trunc = biases0[indices]

            diff_weights = (weights0_trunc - weights1).std()
            diff_biases = abs(biases0_trunc - biases1).sum()

            print layer_name, diff_weights, diff_biases
        except KeyError:
            pass
        
show_diff(orig_net, net)

Save the output

In [None]:
net.save('/home/ubuntu/dstl/models/ZF/trained/adapted_from_ZF_faster_rcnn_final.caffemodel')