In [1]:
import numpy as np
import tensorflow as tf
from scipy.misc import imread

import caffe_classes

# TODO: make TensforFlow v1.0 both compatible and a requirement
if tf.__version__ < '0.12.0':
    print('Validated only for TensorFlow v0.12')
if tf.__version__ >= '1.0.0':
    raise BaseException('TensorFlow v1.0 likely requires parameter reordering => https://groups.google.com/a/tensorflow.org/forum/#!msg/discuss/OePXmC9kJ7o/SRErOoYCDQAJ')

In [2]:
def RGB2BGR(img):
    img2 = img.copy()
    img2[:, :, 0], img2[:, :, 2] = img2[:, :, 2], img2[:, :, 0]
    return img2

def load_preprocess_img(fname):
    img = imread(fname)[:,:,:3].astype(np.float32)
    img - np.mean(img)
    RGB2BGR(img)
    return img

In [3]:
image_names = ['dog.png', 'dog.png', 'laska.png', 'poodle.png', 'quail227.JPEG']
images = []
for image_name in image_names:
    images.append(load_preprocess_img('tmp/training_data/alexnet/' + image_name))

In [4]:
net_data = np.load(open("tmp/training_data/alexnet/bvlc_alexnet.npy", "rb"), encoding="latin1").item()

In [5]:
# https://github.com/guerzh/tf_weights/blob/master/myalexnet_forward_newtf.py#L73
# https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/tensorflow/network.py#L105

def conv(input, kernel, biases, c_o, stride_h, stride_w,  padding="VALID", group=1):
    c_i = input.get_shape()[-1]
    assert c_i % group == 0
    assert c_o % group == 0
    convolve = lambda i, k: tf.nn.conv2d(i, k, [1, stride_h, stride_w, 1], padding=padding)
    
    if group==1:
        conv = convolve(input, kernel)
    else:
        input_groups =  tf.split(3, group, input)   #tf.split(3, group, input)
        kernel_groups = tf.split(3, group, kernel)   #tf.split(3, group, kernel) 
        output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)]
        conv = tf.concat(3, output_groups)          #tf.concat(3, output_groups)
    return  tf.reshape(tf.nn.bias_add(conv, biases), [-1]+conv.get_shape().as_list()[1:])

In [6]:
# Heavily modified from: https://github.com/guerzh/tf_weights/blob/master/myalexnet_forward_newtf.py#L93
# TODO: should be shared tf.Variable rather than copying them for each alexnet

def alexnet(net_data):
    """Creates AlexNet TensorFlow graph with weights. Returns input placeholder, maxpool5 and softmax outputs."""
    x = tf.placeholder(tf.float32, (None, 227, 227, 3))

    #conv1
    #conv(11, 11, 96, 4, 4, padding='VALID', name='conv1')
    k_h = 11; k_w = 11; c_o = 96; s_h = 4; s_w = 4
    conv1W = tf.Variable(net_data["conv1"][0], trainable=False)
    conv1b = tf.Variable(net_data["conv1"][1], trainable=False)
    conv1_in = conv(x, conv1W, conv1b, c_o, s_h, s_w, padding="SAME", group=1)
    conv1 = tf.nn.relu(conv1_in)

    #lrn1
    #lrn(2, 2e-05, 0.75, name='norm1')
    radius = 2; alpha = 2e-05; beta = 0.75; bias = 1.0
    lrn1 = tf.nn.local_response_normalization(conv1,
                                              depth_radius=radius,
                                              alpha=alpha,
                                              beta=beta,
                                              bias=bias)

    #maxpool1
    #max_pool(3, 3, 2, 2, padding='VALID', name='pool1')
    k_h = 3; k_w = 3; s_h = 2; s_w = 2; padding = 'VALID'
    maxpool1 = tf.nn.max_pool(lrn1, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding)

    #conv2
    #conv(5, 5, 256, 1, 1, group=2, name='conv2')
    k_h = 5; k_w = 5; c_o = 256; s_h = 1; s_w = 1; group = 2
    conv2W = tf.Variable(net_data["conv2"][0], trainable=False)
    conv2b = tf.Variable(net_data["conv2"][1], trainable=False)
    conv2_in = conv(maxpool1, conv2W, conv2b, c_o, s_h, s_w, padding="SAME", group=group)
    conv2 = tf.nn.relu(conv2_in)

    #lrn2
    #lrn(2, 2e-05, 0.75, name='norm2')
    radius = 2; alpha = 2e-05; beta = 0.75; bias = 1.0
    lrn2 = tf.nn.local_response_normalization(
        conv2,
        depth_radius=radius,
        alpha=alpha,
        beta=beta,
        bias=bias)

    #maxpool2
    #max_pool(3, 3, 2, 2, padding='VALID', name='pool2')                                                  
    k_h = 3; k_w = 3; s_h = 2; s_w = 2; padding = 'VALID'
    maxpool2 = tf.nn.max_pool(lrn2, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding)

    #conv3
    #conv(3, 3, 384, 1, 1, name='conv3')
    k_h = 3; k_w = 3; c_o = 384; s_h = 1; s_w = 1; group = 1
    conv3W = tf.Variable(net_data["conv3"][0], trainable=False)
    conv3b = tf.Variable(net_data["conv3"][1], trainable=False)
    conv3_in = conv(maxpool2, conv3W, conv3b, c_o, s_h, s_w, padding="SAME", group=group)
    conv3 = tf.nn.relu(conv3_in)

    #conv4
    #conv(3, 3, 384, 1, 1, group=2, name='conv4')
    k_h = 3; k_w = 3; c_o = 384; s_h = 1; s_w = 1; group = 2
    conv4W = tf.Variable(net_data["conv4"][0], trainable=False)
    conv4b = tf.Variable(net_data["conv4"][1], trainable=False)
    conv4_in = conv(conv3, conv4W, conv4b, c_o, s_h, s_w, padding="SAME", group=group)
    conv4 = tf.nn.relu(conv4_in)

    #conv5
    #conv(3, 3, 256, 1, 1, group=2, name='conv5')
    k_h = 3; k_w = 3; c_o = 256; s_h = 1; s_w = 1; group = 2
    conv5W = tf.Variable(net_data["conv5"][0], trainable=False)
    conv5b = tf.Variable(net_data["conv5"][1], trainable=False)
    conv5_in = conv(conv4, conv5W, conv5b, c_o, s_h, s_w, padding="SAME", group=group)
    conv5 = tf.nn.relu(conv5_in)

    #maxpool5
    #max_pool(3, 3, 2, 2, padding='VALID', name='pool5')
    k_h = 3; k_w = 3; s_h = 2; s_w = 2; padding = 'VALID'
    maxpool5 = tf.nn.max_pool(conv5, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding)

    #fc6_alexnet
    #fc(4096, name='fc6')
    fc6W = tf.Variable(net_data["fc6"][0])
    fc6b = tf.Variable(net_data["fc6"][1])
    fc6 = tf.nn.relu_layer(tf.reshape(maxpool5, [-1, int(np.prod(maxpool5.get_shape()[1:]))]), fc6W, fc6b)

    #fc7_alexnet
    #fc(4096, name='fc7')
    fc7W = tf.Variable(net_data["fc7"][0])
    fc7b = tf.Variable(net_data["fc7"][1])
    fc7 = tf.nn.relu_layer(fc6, fc7W, fc7b)

    #fc8_alexnet
    #fc(1000, relu=False, name='fc8')
    fc8W = tf.Variable(net_data["fc8"][0])
    fc8b = tf.Variable(net_data["fc8"][1])
    fc8 = tf.nn.xw_plus_b(fc7, fc8W, fc8b)

    #prob_alexnet
    #softmax(name='prob'))
    softmax = tf.nn.softmax(fc8)

    return x, maxpool5, softmax

In [7]:
x, maxpool5, softmax = alexnet(net_data)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

output = sess.run(softmax, feed_dict = {x:images})

for image_idx in range(output.shape[0]):
    inds = np.argsort(output)[image_idx,:]
    print('')
    print('Image: ' + image_names[image_idx])
    for i in range(5):
        print(caffe_classes.class_names[inds[-1-i]], output[image_idx, inds[-1-i]])


Image: dog.png
flat-coated retriever 0.475118
Newfoundland, Newfoundland dog 0.159941
Tibetan mastiff 0.115989
standard poodle 0.0608109
Afghan hound, Afghan 0.0281317

Image: dog.png
flat-coated retriever 0.475118
Newfoundland, Newfoundland dog 0.159941
Tibetan mastiff 0.115989
standard poodle 0.0608109
Afghan hound, Afghan 0.0281317

Image: laska.png
weasel 0.257096
polecat, fitch, foulmart, foumart, Mustela putorius 0.170497
black-footed ferret, ferret, Mustela nigripes 0.124212
llama 0.106355
Arctic fox, white fox, Alopex lagopus 0.0829945

Image: poodle.png
komondor 0.244168
miniature poodle 0.204217
toy poodle 0.112967
Bedlington terrier 0.106496
standard poodle 0.0961162

Image: quail227.JPEG
water ouzel, dipper 0.489476
quail 0.188902
hummingbird 0.0747858
chickadee 0.0405761
American egret, great white heron, Egretta albus 0.0277748


In [8]:
def goturn_model(net_data):
    x0, caffe0, _ = alexnet(net_data)
    x1, caffe1, _ = alexnet(net_data)

    merged = tf.concat(1, (caffe0, caffe1))
    
    #fc6_goturn
    #fc(4096, name='fc6')
    new_shape = int(np.prod(merged.get_shape()[1:]))

    fc6W = tf.Variable(tf.truncated_normal((new_shape, 4096), stddev=0.1))
    fc6b = tf.Variable(tf.truncated_normal((4096,), stddev=0.1))
    fc6 = tf.nn.relu_layer(tf.reshape(merged, [-1, new_shape]), fc6W, fc6b)

    #fc7_goturn
    #fc(4096, name='fc7')
    fc7W = tf.Variable(tf.truncated_normal((4096, 4096), stddev=0.1))
    fc7b = tf.Variable(tf.truncated_normal((4096,), stddev=0.1))
    fc7 = tf.nn.relu_layer(fc6, fc7W, fc7b)

    #bbox_goturn
    #fc(4, relu=False, name='fc8')
    fc8W = tf.Variable(tf.truncated_normal((4096, 4), stddev=0.1))
    fc8b = tf.Variable(tf.truncated_normal((4,), stddev=0.1))
    bbox = tf.nn.xw_plus_b(fc7, fc8W, fc8b)
    
    return x0, x1, bbox

In [9]:
def get_loss(y_true, y_pred):
    # GOTURN uses L1 loss function to promote tighter bounding boxes compared to L2 mean
    loss = tf.reduce_mean(tf.abs(y_true - y_pred))
    return loss

In [10]:
import pandas as pd

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
df = pd.read_csv('tmp/generated/vot/map_all.csv')[:300]
list_train, list_test = train_test_split(df)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 6 columns):
file1    300 non-null object
file2    300 non-null object
bbtlx    300 non-null float64
bblry    300 non-null float64
bbbrx    300 non-null float64
bbbry    300 non-null float64
dtypes: float64(4), object(2)
memory usage: 14.1+ KB


In [14]:
def get_image_tensor(df):
    t0 = [load_preprocess_img(i) for i in df['file1']]
    t1 = [load_preprocess_img(i) for i in df['file2']]
    return np.array(t0), np.array(t1), df[df.columns[2:]].values.astype(np.float32)

In [15]:
X_train0, X_train1, y_train = get_image_tensor(list_train)
X_test0, X_test1, y_test = get_image_tensor(list_test)

In [16]:
X_train0.shape

(225, 227, 227, 3)

In [17]:
y_train.shape

(225, 4)

In [18]:
def train(learn_rate=0.01, batch_size=3, initial_weights=net_data):

    x0, x1, y_pred = goturn_model(initial_weights)
    y_true = tf.placeholder(tf.float32, (None, 4))

    init = tf.global_variables_initializer()
    loss = get_loss(y_true, y_pred)
    train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(init)

        for i in range(int(len(X_train0)/batch_size) - 1):
            start = i*batch_size
            end = i*(batch_size+1)
            batch_x0s = X_train0[start:end]
            batch_x1s = X_train1[start:end]
            batch_ys = y_train[start:end]

            sess.run(train_step, feed_dict={x0: batch_x0s,
                                            x1: batch_x1s,
                                            y_true: batch_ys})
            print(loss)

In [None]:
sess = tf.Session()
train()