In [None]:
%%time
# Python 2.7

%matplotlib nbagg
%matplotlib inline 

import sys
sys.path.append('..')
print(sys.version)

import os
import cPickle 
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import theano
import theano.tensor as T
import lasagne 
import lasagne.layers as L
import parmesan
import cPickle as pickle

from sklearn.utils import shuffle

import tools as tls


from data_loaders import svhn



In [None]:
### GLOBAL PARAMETERS ###
plot_train = True
using_shared = False

### META - HOW THE PROGRAM WORKS
file_name = 'data_no_share_c3' # assumes '.pkl'

np.random.seed(1234) # reproducibility


### CONSTANTS
IMG_LEN = 32




In [None]:
%%time
### LOAD DATA


full_path = os.path.join(os.getcwd(), 'data')
full_path = os.path.join(full_path, file_name)
full_path  += '.pkl'
print(full_path)

with open(full_path, 'rb') as f:
    x_trai, t_trai, x_vali, t_vali, x_test, t_test = pickle.load(f)


print('Size of total dataset: {:.2f} MB'.format(
        (
              sys.getsizeof(x_trai)
            + sys.getsizeof(t_trai)
            + sys.getsizeof(x_vali)
            + sys.getsizeof(t_vali)
            + sys.getsizeof(x_test)
            + sys.getsizeof(t_test)
        )/1.0e6))

In [None]:
### Ensure that we have one hot encoding
t_vali

In [None]:
### CHECK DATA
num_classes = np.unique(np.where(t_trai == 1)[1]).shape[0]
print('Number of classes {}'.format(num_classes))

num_features = x_trai[0].shape[0]
print('Number of features {}'.format(num_features))

print('')
print('Train shape: ', 
      x_trai.shape, t_trai.shape)

print('Valid shape: ', 
      x_vali.shape, t_vali.shape)

print('Test shape:  ', 
      x_test.shape, t_test.shape)

print('{}'.format(type(x_trai)))
print('{}'.format(type(x_vali)))
print('{}'.format(type(x_test)))
print('')

print('Prior')
print(np.sum(t_trai, axis=0)/t_trai.shape[0])
print(np.sum(t_test, axis=0)/t_test.shape[0])


In [None]:
### VISUALIZE 

# TODO: WHEN normalize/renormalize are made DO add color to this function

tls.plot_svhn(x_trai, t=12, gray=False)

In [None]:
### HYPER PARAMETERS
# VOLATILE HP
learning_rate = 1e-2
L1 = 0
L2 = 0

batch_size = 60
samples_to_process = 1e5
val_interval       = 1e3



# ARCHITECTURE
num_latent_1 = num_classes
hid_size = 500


# STABLE HP
eq_size = 1
iw_size = 1
batch_size = 128
max_epoch = 100


In [None]:
### HELPER FUNCTIONS
from lasagne.objectives import squared_error

# c = -0.5 * np.log(2*np.pi)
clip = lambda x: T.clip(x,-10,10) #used to limit the variance (why?)

def log_bernoulli(x, p, eps=1e-32):
    """
    Computes the binary cross-entropy between a target and 

    Use eps if you don't want to alow values ==0, ==1
    """

    p = T.clip(p, eps, 1.0 - eps)
    return -T.nnet.binary_crossentropy(p, x)



def kl_normal_2_stdnormal(mu, lv):
    """Compute the KL divergence from the standard normal dist"""
    return - 0.5 * (1 + lv - mu**2 - T.exp(lv))


def LogLikelihood(mux, x, muq, lvq):
    """
    Compute the cost of the network, using 
    """
    #Sum over the latent dimension, mean over the the samples
    reconstruction_cost = squared_error(x, mux).sum(axis=1).mean()
    KL_qp = kl_normal_2_stdnormal(muq, lvq).sum(axis=1).mean()
    
    LL = reconstruction_cost - KL_qp
    
    return LL, reconstruction_cost, KL_qp
    

In [None]:
%%time
### CREATE MODEL
from lasagne.nonlinearities import leaky_rectify, rectify, sigmoid
from parmesan.layers import SampleLayer


### ENCODER
l_in_x   = L.InputLayer(shape=(None, num_features), name='l_in_x')
# l_in_norm= L.

l_en_1   = L.DenseLayer(l_in_x, 
                        num_units=hid_size,
                        nonlinearity=rectify,
                        name='l_en_1')
l_en_2   = L.DenseLayer(l_en_1,
                        num_units=hid_size,
                        nonlinearity=rectify,
                        name='l_en_2')

# Create latent parameters
l_mu_1   = L.DenseLayer(l_en_2,
                        num_units=num_latent_1,
                        nonlinearity=None,
                        name='l_mu_1')
l_lv_1   = L.DenseLayer(l_en_2,
                        num_units=num_latent_1,
                        nonlinearity=clip,
                        name='l_lv_1')

# sample a latent representation:
#    z ~ q(z|x) = N(mu(x), logvar(x)
l_z_1      = SampleLayer(mean=l_mu_1, 
                         log_var=l_lv_1, 
                         eq_samples=eq_size, 
                         iw_samples=iw_size, 
                         name='l_z_1')

### DECODER
l_in_z   = L.InputLayer(shape=(None, num_latent_1), 
                        name = 'l_in_z')
l_dec_1  = L.DenseLayer(l_in_z, 
                        num_units = hid_size,
                        nonlinearity = rectify,
                        name = 'l_dec_1')
l_dec_2  = L.DenseLayer(l_dec_1, 
                        num_units = hid_size,
                        nonlinearity = rectify,
                       name='l_dec_2')

# Sigmoid is used because the original images are $\in [0,1]$
l_out    = L.DenseLayer(l_dec_2, 
                        num_units=num_features,
                        nonlinearity=sigmoid,
                        name='l_out')

In [None]:
%%time
### CREATE INTERFACE VARIABLES

sym_x = T.matrix('x') # (batch_size x 3072)
sym_z = T.matrix('z') # Latent variable (batch_size x num_latent)

# Training variables
z_train, mu_train, lv_train = L.get_output([l_z_1, l_mu_1, l_lv_1],
                                           {l_in_x:sym_x}, 
                                           deterministic = False)
out_train                   = L.get_output(l_out,
                                           {l_in_z:z_train}, 
                                           deterministic = False)

# Test variables
z_eval, mu_eval, lv_eval    = L.get_output([l_z_1, l_mu_1, l_lv_1],
                                           {l_in_x:sym_x},
                                           deterministic = True)
out_eval                    = L.get_output(l_out,
                                           {l_in_z:z_eval}, 
                                           deterministic = True)

# For generating artificial data (samples)
mux_sample               = L.get_output(l_out, {l_in_z: sym_z})

# Copute the cost
LL_train, log_px_train, KL_train = \
    LogLikelihood(out_train, sym_x, mu_train, lv_train)

LL_eval, log_px_eval, KL_eval = \
    LogLikelihood(out_eval, sym_x, mu_eval, lv_eval)



In [None]:
%%time
### CREATE TRAINING FUNCTIONS

all_params = L.get_all_params([l_z_1, l_out], trainable=True)
all_grads  = T.grad(-LL_train, all_params)

updates    = lasagne.updates.adam(all_grads, all_params,
                                  learning_rate=learning_rate)

# Training function: Return loss, and update weights
f_train = theano.function(inputs=[sym_x],
                          outputs=[LL_train, log_px_train, KL_train],
                          updates=update,
                          givens = )

# Evaluation function: Return loss
f_eval = theano.function(inputs=[sym_x],
                         outputs=[LL_train, log_px_train, KL_train])

# Get latent variable values
f_z              = theano.function(inputs=[sym_x], outputs=[z_eval])

# Return the reconstruction
f_reconstruction = theano.function(inputs=[sym_x], outputs=[out_eval])

# Simulate artificial data, given an artificial latent variable
f_simulate = theano.function(inputs=[sym_z], outputs=[mux_sample])


In [None]:
%%time
### TRAINING

LL_train, KL_train, logpx_train = [],[],[]
LL_valid, KL_valid, logpx_valid = [],[],[]
samples_processed = 0

valid_samples_processed = []


try:
    while samples_processed < samples_to_process:
#         _LL_train, _KL_train, _logpx_train = [],[],[]
#         idxs = np.random.choice(
#                 range(x_trai.get_value().shape[0]), 
#                 size=(batch_size), 
#                 replace=False)  
    
#         x_batch = x_trai[idxs]

        out = f_train(x_batch)
        samples_processed += batch_size
           
            
            
            
            
            
            
        if samples_processed % val_interval == 0:
            valid_samples_processed += [samples_processed]
            out = f_eval(x_train)
            LL_train += [out[0]] 
            logpx_train += [out[1]]
            KL_train += [out[2]]
            
            out = f_eval(x_valid)
            LL_valid += [out[0]]
            logpx_valid += [out[1]]
            KL_valid += [out[2]]
            
            z_eval = f_z(x_valid)[0]
            x_sample = f_sample(np.random.normal(size=(100, num_latent_z)).astype('float32'))[0]
#             x_sample = f_sample(np.random.normal(size=(100, num_latent_z * size_up_factor)).astype('float32'))[0]
            x_recon = f_recon(x_valid)[0]
            
            if plot_train:
                continue
#                 plt.subplot(num_classes+2,2,1)
#                 plt.legend(['LL', 'log(p(x))'], loc=2)
#                 plt.xlabel('Updates')
#                 plt.plot(valid_samples_processed, LL_train, color="black")
#                 plt.plot(valid_samples_processed, logpx_train, color="red")
#                 plt.plot(valid_samples_processed, LL_valid, color="black", linestyle="--")
#                 plt.plot(valid_samples_processed, logpx_valid, color="red", linestyle="--")
#                 plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
#                 plt.grid('on')

#                 plt.subplot(num_classes+2,2,2)
#                 plt.cla()
#                 plt.xlabel('PCA 0'), plt.ylabel('PCA 1')
#                 color = iter(plt.get_cmap('brg')(np.linspace(0, 1.0, num_classes)))
#                 for i in range(num_classes):
#                     clr = next(color)
#                     pca_trans = pca.fit_transform(z_eval)
#                     plt.scatter(pca_trans[targets_valid==i, 0], pca_trans[targets_valid==i, 1], c=clr, s=5., lw=0, marker='o', )
#                 plt.grid('on')


#                 plt.savefig("out52.png")
#                 display(Image(filename="out52.png"))
#                 clear_output(wait=True)

#                 plt.subplot(num_classes+2,2,3)
#                 plt.legend(['KL(q||p)'])
#                 plt.xlabel('Updates')
#                 plt.plot(valid_samples_processed, KL_train, color="blue")
#                 plt.plot(valid_samples_processed, KL_valid, color="blue", linestyle="--")
#                 plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
#                 plt.grid('on')

#                 plt.subplot(num_classes+2,2,4)
#                 plt.cla()
#                 plt.title('Samples')
#                 plt.axis('off')
#                 idx = 0
#                 canvas = np.zeros((28*10, 10*28))
#                 for i in range(10):
#                     for j in range(10):
#                         canvas[i*28:(i+1)*28, j*28:(j+1)*28] = x_sample[idx].reshape((28, 28))
#                         idx += 1
#                 plt.imshow(canvas, cmap='gray')

#                 c=0
#                 for k in range(5, 5 + num_classes*2, 2):
#                     plt.subplot(num_classes+2,2,k)
#                     plt.cla()
#                     plt.title('Inputs for %i' % c)
#                     plt.axis('off')
#                     idx = 0
#                     canvas = np.zeros((28*10, 10*28))
#                     for i in range(10):
#                         for j in range(10):
#                             canvas[i*28:(i+1)*28, j*28:(j+1)*28] = x_valid[targets_valid==c][idx].reshape((28, 28))
#                             idx += 1
#                     plt.imshow(canvas, cmap='gray')

#                     plt.subplot(num_classes+2,2,k+1)
#                     plt.cla()
#                     plt.title('Reconstructions for %i' % c)
#                     plt.axis('off')
#                     idx = 0
#                     canvas = np.zeros((28*10, 10*28))
#                     for i in range(10):
#                         for j in range(10):
#                             canvas[i*28:(i+1)*28, j*28:(j+1)*28] = x_recon[targets_valid==c][idx].reshape((28, 28))
#                             idx += 1
#                     plt.tight_layout()
#                     plt.imshow(canvas, cmap='gray')
#                     c += 1
        
except KeyboardInterrupt:
    pass
