# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Собираем-сиамскую-сеть" data-toc-modified-id="Собираем-сиамскую-сеть-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Собираем сиамскую сеть</a></div><div class="lev1 toc-item"><a href="#Проверяем-глубокое-представление" data-toc-modified-id="Проверяем-глубокое-представление-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Проверяем глубокое представление</a></div>

In [1]:
import numpy as np
import lasagne
import theano
import theano.tensor as T
from lasagne.nonlinearities import rectify, sigmoid, linear, tanh
from lasagne.layers import InputLayer, DenseLayer, BatchNormLayer, Upscale2DLayer, NonlinearityLayer, ReshapeLayer
from lasagne.layers import Conv2DLayer, MaxPool2DLayer, dropout

import matplotlib.pyplot as plt
%matplotlib inline

# import gzip, pickle

In [2]:
# Seed for reproducibility
np.random.seed(42)

In [3]:
files = np.load('data_set.npz')
X_train, y_train, X_val, y_val = files['X_train'], files['y_train'], files['X_test'], files['y_test']
# Load training and test splits as numpy arrays
# train, val, test = pickle.load(gzip.open('mnist.pkl.gz'))

# X_train, y_train = train
# X_val, y_val = val

In [4]:
second_dim = 3
img_size = 150
print(X_train.shape, X_val.shape)

# second_dim = 1
# img_size = 28
# X_train = X_train.reshape([-1,1,28,28])
# X_val = X_val.reshape([-1,1,28,28])

((11450, 3, 150, 150), (1783, 3, 150, 150))


# Собираем сиамскую сеть

In [28]:
input_image_left  = T.tensor4('input_left')
input_image_positive = T.tensor4('input_positive')
input_image_negative = T.tensor4('input_negative')

In [29]:
l_input = InputLayer(shape=(None, second_dim, img_size, img_size), input_var=input_image_left)
p_input = InputLayer(shape=(None, second_dim, img_size, img_size), input_var=input_image_positive)
n_input = InputLayer(shape=(None, second_dim, img_size, img_size), input_var=input_image_negative)

In [30]:
conv1_filter_size = (3, 3)
conv1_num_filters = 20
conv2_filter_size = (5, 5)
conv2_num_filters = 20
conv2_stride = (2, 2)
pool1_size = (2, 2)
pool2_size = (4, 4)

In [31]:
my_nonlin = rectify
nn_l_conv1 = Conv2DLayer(l_input, conv1_num_filters, conv1_filter_size, nonlinearity=my_nonlin, W=lasagne.init.GlorotUniform())
nn_l_norm1 = BatchNormLayer(nn_l_conv1)
nn_l_pool1 = MaxPool2DLayer(nn_l_norm1, pool1_size)
nn_l_conv2 = Conv2DLayer(nn_l_pool1, conv2_num_filters, conv2_filter_size, stride=conv2_stride, nonlinearity=my_nonlin)
nn_l_pool2 = MaxPool2DLayer(nn_l_conv2, pool2_size)
nn_l_dense = DenseLayer(dropout(nn_l_pool2, p=.5), num_units=256, nonlinearity=my_nonlin)
nn_l_out = DenseLayer(dropout(nn_l_dense, p=.5), num_units=128, nonlinearity=my_nonlin)

In [32]:
l_params = lasagne.layers.get_all_params(nn_l_out)
l_params

[W, b, beta, gamma, mean, inv_std, W, b, W, b, W, b]

In [33]:
nn_p_conv1 = Conv2DLayer(p_input, conv1_num_filters, conv1_filter_size, nonlinearity=my_nonlin, W=l_params[0], b=l_params[1])
nn_p_norm1 = BatchNormLayer(nn_p_conv1, beta=l_params[2], gamma=l_params[3], mean=l_params[4], inv_std=l_params[5])
nn_p_pool1 = MaxPool2DLayer(nn_p_norm1, pool1_size)
nn_p_conv2 = Conv2DLayer(nn_p_pool1, conv2_num_filters, conv2_filter_size, stride=conv2_stride, nonlinearity=my_nonlin, W=l_params[6], b=l_params[7])
nn_p_pool2 = MaxPool2DLayer(nn_p_conv2, pool2_size)
nn_p_dense = DenseLayer(dropout(nn_p_pool2, p=0.5), num_units=256, nonlinearity=my_nonlin, W=l_params[8], b=l_params[9])
nn_p_out = DenseLayer(dropout(nn_p_dense, p=0.5), num_units=128, nonlinearity=my_nonlin, W=l_params[10], b=l_params[11])

In [34]:
nn_n_conv1 = Conv2DLayer(n_input, conv1_num_filters, conv1_filter_size, nonlinearity=my_nonlin, W=l_params[0], b=l_params[1])
nn_n_norm1 = BatchNormLayer(nn_n_conv1, beta=l_params[2], gamma=l_params[3], mean=l_params[4], inv_std=l_params[5])
nn_n_pool1 = MaxPool2DLayer(nn_n_norm1, pool1_size)
nn_n_conv2 = Conv2DLayer(nn_n_pool1, conv2_num_filters, conv2_filter_size, stride=conv2_stride, nonlinearity=my_nonlin, W=l_params[6], b=l_params[7])
nn_n_pool2 = MaxPool2DLayer(nn_n_conv2, pool2_size)
nn_n_dense = DenseLayer(dropout(nn_n_pool2, p=0.5), num_units=256, nonlinearity=my_nonlin, W=l_params[8], b=l_params[9])
nn_n_out = DenseLayer(dropout(nn_n_dense, p=0.5), num_units=128, nonlinearity=my_nonlin, W=l_params[10], b=l_params[11])

In [35]:
nn_merge = lasagne.layers.concat([nn_l_out, nn_p_out, nn_n_out], axis=1)

In [36]:
nn_out  = lasagne.layers.get_output(nn_merge, deterministic=False)
nn_out_test  = lasagne.layers.get_output(nn_merge, deterministic=True)
nn_out_left = nn_out[:, :128]
nn_out_positive = nn_out[:, 128:256]
nn_out_negative = nn_out[:, 256:]

nn_out_left_test = nn_out_test[:, :128]
nn_out_positive_test = nn_out_test[:, 128:256]
nn_out_negative_test = nn_out_test[:, 256:]

In [37]:
a = T.scalar()

d1 = T.sum(T.sqr(nn_out_left - nn_out_positive), axis=1)
d2 = T.sum(T.sqr(nn_out_left - nn_out_negative), axis=1)

loss = T.sum(T.maximum(T.sqr(d1) - T.sqr(d2) + a, 0.))

In [38]:
d1_test = T.sum(T.sqr(nn_out_left_test - nn_out_positive_test), axis=1)
d2_test = T.sum(T.sqr(nn_out_left_test - nn_out_negative_test), axis=1)

test_loss = T.sum(T.maximum(T.sqr(d1_test) - T.sqr(d2_test) + a, 0.))

In [39]:
params = lasagne.layers.get_all_params(nn_merge)
updates = lasagne.updates.rmsprop(loss, params)
# updates = lasagne.updates.adamax(loss, params)
# updates = lasagne.updates.nesterov_momentum(loss, params, 0.01)

DisconnectedInputError:  
Backtrace when that variable is created:

  File "/home/snipghost/.local/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/snipghost/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/snipghost/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/snipghost/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-31-c0c560a17e5f>", line 3, in <module>
    nn_l_norm1 = BatchNormLayer(nn_l_conv1)
  File "/home/snipghost/.local/lib/python2.7/site-packages/lasagne/layers/normalization.py", line 262, in __init__
    trainable=False, regularizable=False)
  File "/home/snipghost/.local/lib/python2.7/site-packages/lasagne/layers/base.py", line 234, in add_param
    param = utils.create_param(spec, shape, name)
  File "/home/snipghost/.local/lib/python2.7/site-packages/lasagne/utils.py", line 381, in create_param
    spec = theano.shared(spec, broadcastable=bcast)


In [22]:
train_fn = theano.function([input_image_left, input_image_positive, input_image_negative, a], loss, 
                           updates=updates, allow_input_downcast=True)
val_fn = theano.function([input_image_left, input_image_positive, input_image_negative, a], test_loss, 
                         updates=updates, allow_input_downcast=True)
test_fn = theano.function([input_image_left, input_image_positive, input_image_negative], [d1_test, d2_test], 
                          allow_input_downcast=True)
output_fn = theano.function([input_image_left, input_image_positive, input_image_negative], nn_out, 
                            allow_input_downcast=True)

NameError: name 'updates' is not defined

In [None]:
def iterate_minibatches(inputs, targets, batchs_per_epoch=100, batchsize=20, train=True, shuffle=False):
    assert len(inputs) == len(targets)

    left_indices = np.arange(len(inputs))
    
    if shuffle:
        np.random.shuffle(left_indices)
        
    for _ in range(batchs_per_epoch):
        full_lft_indxs = []
        full_pos_indxs = []
        full_neg_indxs = []
        
        for _ in range(batchsize):
            start_idx = np.random.randint(low=0, high=len(left_indices))
            full_lft_indxs.append(start_idx)
            
            pos_idxs = np.where(targets == targets[start_idx])[0]
            b_idxs = np.random.randint(low=0, high=len(pos_idxs), size=1)
            full_pos_indxs.append(pos_idxs[b_idxs[0]])
            
            neg_idxs = np.where(targets != targets[start_idx])[0]
            b_idxs = np.random.randint(low=0, high=len(neg_idxs), size=1)
            full_neg_indxs.append(neg_idxs[b_idxs[0]])

        full_lft_indxs = np.array(full_lft_indxs)
        full_pos_indxs = np.array(full_pos_indxs)
        full_neg_indxs = np.array(full_neg_indxs)
        
        yield inputs[full_lft_indxs], inputs[full_pos_indxs], inputs[full_neg_indxs]

In [None]:
num_epochs = 200
train_errors = []
val_errors = []
epoch = 0
batch_size = 20
batchs_per_epoch = 5

margin = 1.242

In [None]:
import time

for epoch in range(epoch, num_epochs):
    
    train_err = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train, batchs_per_epoch=batchs_per_epoch,
                                     batchsize=batch_size, train=True, shuffle=True):
        inputs_left, inputs_positive, inputs_negative = batch
        err = train_fn(inputs_left, inputs_positive, inputs_negative, margin)
        train_err += err
        train_batches += 1
    print(err)

    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, batchs_per_epoch=batchs_per_epoch,
                                     batchsize=batch_size, train=False, shuffle=True):
        inputs_left, inputs_positive, inputs_negative = batch
        err = val_fn(inputs_left, inputs_positive, inputs_negative, margin)
        val_err += err
        val_batches += 1

    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    train_errors.append(train_err / train_batches)
    val_errors.append(val_err / val_batches)

In [None]:
plt.plot(train_errors, 'r')
plt.plot(val_errors, 'b')
plt.show()

# Проверяем глубокое представление

In [None]:
from sklearn import decomposition

In [None]:
from mpl_toolkits.mplot3d import Axes3D

In [None]:
deep_ids = []

In [None]:
for i in range(X_val.shape[0]):
    deep_ids.append(output_fn([X_val[i]], [X_val[0]], [X_val[0]])[0])

In [None]:
deep_ids = np.array(deep_ids)

In [None]:
pca = decomposition.PCA(n_components=3)
pca.fit(deep_ids)

In [None]:
X_val_vis = pca.transform(deep_ids)

In [None]:
np.save('X_val_vis-2D', arr=X_val_vis)

In [None]:
np.save('y_val_vis', arr=y_val)

In [None]:
X_val_vis

In [None]:
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

for label in range(10):
    ax.text3D(X_val_vis[y_val == label, 0].mean(),
              X_val_vis[y_val == label, 1].mean(),
              X_val_vis[y_val == label, 2].mean(), str(label),
              horizontalalignment='center',
              bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
    
plt.show()