In [None]:
%%time
# Python 2.7

%matplotlib nbagg
%matplotlib inline 

import sys
sys.path
sys.path.append('..')
print(sys.version)

import os
import cPickle 
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import theano
import theano.tensor as T
import lasagne 
import lasagne.layers as L
import parmesan
import cPickle as pickle

from sklearn.utils import shuffle

import tools as tls


from data_loaders import svhn



In [None]:
### GLOBAL PARAMETERS ###

### META - HOW THE PROGRAM WORKS
file_name = 'data_c3'

np.random.seed(1234) # reproducibility


### CONSTANTS
IMG_LEN = 32




In [None]:
%%time
### LOAD DATA


full_path = os.path.join(os.getcwd(), 'data')
full_path = os.path.join(full_path, file_name)
full_path  += '.pkl'
print(full_path)

with open(full_path, 'rb') as f:
    x_trai, t_trai, x_vali, t_vali, x_test, t_test = pickle.load(f)

print('Size of total dataset: {:.2f} MB'.format(
        (
              sys.getsizeof(x_trai.get_value())
            + sys.getsizeof(t_trai.eval())
            + sys.getsizeof(x_vali.get_value())
            + sys.getsizeof(t_vali.eval())
            + sys.getsizeof(x_test.get_value())
            + sys.getsizeof(t_test.eval())
        )/1.0e6))

In [None]:
### CHECK DATA
num_classes = np.unique(np.where(t_trai.eval() == 1)[1]).shape[0]
print('Number of classes {}'.format(num_classes))

num_features = x_trai.get_value()[0].shape[0]
print('Number of features {}'.format(num_features))

print('')
print('Train shape: ', 
      x_trai.get_value().shape, t_trai.eval().shape)

print('Valid shape: ', 
      x_vali.get_value().shape, t_vali.eval().shape)

print('Test shape:  ', 
      x_test.get_value().shape, t_test.eval().shape)

print('{}'.format(type(x_trai)))
print('{}'.format(type(x_vali)))
print('{}'.format(type(x_test)))
print('')


In [None]:
### VISUALIZE 

# TODO: WHEN normalize/renormalize are made DO add color to this function

tls.plot_svhn(x_trai.get_value(), t=10)

In [None]:
### HYPER PARAMETERS
# VOLATILE HP
learning_rate = 1e-2
L1 = 0
L2 = 0


# ARCHITECTURE
num_latent_1 = num_classes
hid_size = 500


# STABLE HP
eq_size = 1
iw_size = 1
batch_size = 128
max_epoch = 100


In [None]:
### HELPER FUNCTIONS
from lasagne.objectives import squared_error

# c = -0.5 * np.log(2*np.pi)
clip = lambda x: T.clip(x,-10,10) #used to limit the variance (why?)

def log_bernoulli(x, p, eps=1e-32):
    """
    Computes the binary cross-entropy between a target and 

    Use eps if you don't want to alow values ==0, ==1
    """

    p = T.clip(p, eps, 1.0 - eps)
    return -T.nnet.binary_crossentropy(p, x)



def kl_normal_2_stdnormal(mu, lv):
    """Compute the KL divergence from the standard normal dist"""
    return - 0.5 * (1 + lv - mu**2 - T.exp(lv))


def LogLikelihood(mux, x, muq, lvq):
    """
    Compute the cost of the network, using 
    """
    #Sum over the latent dimension, mean over the the samples
    reconstruction_cost = squared_error(x, mux).sum(axis=1).mean()
    KL_qp = kl_normal_2_stdnormal(muq, lvq).sum(axis=1).mean()
    
    LL = reconstruction_cost - KL_qp
    
    return LL, reconstruction_cost, KL_qp
    

In [None]:
%%time
### CREATE MODEL
from lasagne.nonlinearities import leaky_rectify, rectify, sigmoid
from parmesan.layers import SampleLayer


### ENCODER
l_in_x   = L.InputLayer(shape=(None, num_features), name='l_in_x')

l_en_1   = L.DenseLayer(l_in_x, 
                        num_units=hid_size,
                        nonlinearity=rectify,
                        name='l_en_1')
l_en_2   = L.DenseLayer(l_en_1,
                        num_units=hid_size,
                        nonlinearity=rectify,
                        name='l_en_2')

# Create latent parameters
l_mu_1   = L.DenseLayer(l_en_2,
                        num_units=num_latent_1,
                        nonlinearity=None,
                        name='l_mu_1')
l_lv_1   = L.DenseLayer(l_en_2,
                        num_units=num_latent_1,
                        nonlinearity=clip,
                        name='l_lv_1')

# sample a latent representation:
#    z ~ q(z|x) = N(mu(x), logvar(x)
l_z_1      = SampleLayer(mean=l_mu_1, 
                         log_var=l_lv_1, 
                         eq_samples=eq_size, 
                         iw_samples=iw_size, 
                         name='l_z_1')

### DECODER
l_in_z   = L.InputLayer(shape=(None, num_latent_1), 
                        name = 'l_in_z')
l_dec_1  = L.DenseLayer(l_in_z, 
                        num_units = hid_size,
                        nonlinearity = rectify,
                        name = 'l_dec_1')
l_dec_2  = L.DenseLayer(l_dec_1, 
                        num_units = hid_size,
                        nonlinearity = rectify,
                       name='l_dec_2')

# Sigmoid is used because the original images are $\in [0,1]$
l_out    = L.DenseLayer(l_dec_2, 
                        num_units=num_features,
                        nonlinearity=sigmoid,
                        name='l_out')

In [None]:
%%time
### CREATE INTERFACE VARIABLES

sym_x = T.matrix('x') # (batch_size x 3072)
sym_z = T.matrix('z') # Latent variable (batch_size x num_latent)

# Training variables
z_train, mu_train, lv_train = L.get_output([l_z_1, l_mu_1, l_lv_1],
                                           {l_in_x:sym_x}, 
                                           deterministic = False)
out_train                   = L.get_output(l_out,
                                           {l_in_z:z_train}, 
                                           deterministic = False)

# Test variables
z_eval, mu_eval, lv_eval    = L.get_output([l_z_1, l_mu_1, l_lv_1],
                                           {l_in_x:sym_x},
                                           deterministic = True)
out_eval                    = L.get_output(l_out,
                                           {l_in_z:z_eval}, 
                                           deterministic = True)

# For generating artificial data (samples)
mux_sample               = L.get_output(l_out, {l_in_z: sym_z})

# Copute the cost
LL_train, log_px_train, KL_train = \
    LogLikelihood(out_train, sym_x, mu_train, lv_train)

LL_eval, log_px_eval, KL_eval = \
    LogLikelihood(out_eval, sym_x, mu_eval, lv_eval)



In [None]:
%%time
### CREATE TRAINING FUNCTIONS

all_params = L.get_all_params([l_z_1, l_out], trainable=True)
all_grads  = T.grad(-LL_train, all_params)

updates    = lasagne.updates.adam(all_grads, all_params,
                                  learning_rate=learning_rate)

# Training function: Return loss, and update weights
f_train = theano.function(inputs=[sym_x],
                          outputs=[LL_train, log_px_train, KL_train],
                          updates=updates)

# Evaluation function: Return loss
f_eval = theano.function(inputs=[sym_x],
                         outputs=[LL_train, log_px_train, KL_train])

# Get latent variable values
f_z              = theano.function(inputs=[sym_x], outputs=[z_eval])

# Return the reconstruction
f_reconstruction = theano.function(inputs=[sym_x], outputs=[out_eval])

# Simulate artificial data, given an artificial latent variable
f_simulate = theano.function(inputs=[sym_z], outputs=[mux_sample])
