In [1]:
import pickle

import lasagne
import matplotlib.pyplot as plt
import numpy as np
import theano.tensor as T
from lasagne.layers import DenseLayer, DropoutLayer, NonlinearityLayer, InputLayer
from lasagne.layers import Pool2DLayer as PoolLayer
from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
from lasagne.nonlinearities import softmax, leaky_rectify
from lasagne.layers import get_output, get_all_params
import theano

from samplelayer import SimpleSampleLayer

#theano.config.optimizer = 'None'
theano.config.exception_verbosity='high'
theano.config.compute_test_value = 'warn' 

Using gpu device 0: Tesla K80 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5105)


In [2]:
TRAIN_FILE = 'x_train_100Samples.p'
VALIDATION_FILE = 'x_train_100SamplesValidation.p'
TARGET_FILE = 'targets_train_100Samples.p'
TARGET_VALIDATION_FILE = 'targets_train_100SamplesValidation.p'

In [3]:
class VAEHelper:

    @staticmethod
    def create_theano_functions(net):
        sym_x = T.tensor4('x')
        sym_x.tag.test_value = np.zeros((10,3,224,224), dtype = np.float32)
        sym_z = T.matrix('z')
        sym_z.tag.test_value = np.zeros((10,2), dtype = np.float32)
        sym_y = T.matrix('y')
        sym_y.tag.test_value = np.zeros((10,4), dtype = np.float32)
        
        z_train, muq_train, logvarq_train = get_output([net['z_vae'], net['muq_vae'], net['logvarq_vae']],
                                                       {net['input']: sym_x}, deterministic=False)
        prob_train = get_output(net['prob'], {net['in_z_vae']: z_train}, deterministic=False)

        z_eval, muq_eval, logvarq_eval = get_output([net['z_vae'], net['muq_vae'], net['logvarq_vae']],
                                                    {net['input']: sym_x}, deterministic=True)
        prob_eval = get_output(net['prob'], {net['in_z_vae']: z_eval}, deterministic=True)

        prob_sample = get_output(net['prob'], {net['in_z_vae']: sym_z}, deterministic=True)

        LL_train, logpx_train, KL_train = VAEHelper.LogLikelihood(prob_train, sym_y, muq_train, logvarq_train)
        LL_eval, logpx_eval, KL_eval = VAEHelper.LogLikelihood(prob_eval, sym_y, muq_eval, logvarq_eval)

        all_params = get_all_params([net['z_vae'],net['prob']],trainable=True)

        # Let Theano do its magic and get all the gradients we need for training
        all_grads = T.grad(-LL_train, all_params)

        # Set the update function for parameters. The Adam optimizer works really well with VAEs.
        updates = lasagne.updates.adam(all_grads, all_params, learning_rate=1e-2)

        f_train = theano.function(inputs=[sym_x, sym_y],
                                  outputs=[LL_train, logpx_train, KL_train],
                                  updates=updates)

        f_eval = theano.function(inputs=[sym_x, sym_y],
                                 outputs=[LL_eval, logpx_eval, KL_eval])

        f_z = theano.function(inputs=[sym_x],
                              outputs=[z_eval])

        f_sample = theano.function(inputs=[sym_z],
                                   outputs=[prob_sample])

        f_recon = theano.function(inputs=[sym_x],
                                  outputs=[prob_eval])

        return f_train, f_eval, f_z, f_sample, f_recon

    @staticmethod
    def LogLikelihood(mux,x,muq,logvarq):
        log_px_given_z = VAEHelper.log_bernoulli(x, mux, eps=1e-6).sum(axis=1).mean() #note that we sum the latent dimension and mean over the samples
        KL_qp = VAEHelper.kl_normal2_stdnormal(muq, logvarq).sum(axis=1).mean() # * 0 # To ignore the KL term
        LL = log_px_given_z - KL_qp
        return LL, log_px_given_z, KL_qp

    @staticmethod
    def log_bernoulli(x, p, eps=0.0):
        p = T.clip(p, eps, 1.0 - eps)
        return -T.nnet.binary_crossentropy(p, x)

    @staticmethod
    def kl_normal2_stdnormal(mean, log_var):
        return -0.5*(1 + log_var - mean**2 - T.exp(log_var))

In [4]:
NUM_LATENT_Z = 2
NUM_CLASSES = 4

def create_network():
    net = {}

    # VGG Net
    net['input'] = InputLayer((None, 3, 224, 224))
    net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False)
    net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False)
    net['pool1'] = PoolLayer(net['conv1_2'], 2)
    net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False)
    net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False)
    net['pool2'] = PoolLayer(net['conv2_2'], 2)
    net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False)
    net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False)
    net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False)
    net['conv3_4'] = ConvLayer(net['conv3_3'], 256, 3, pad=1, flip_filters=False)
    net['pool3'] = PoolLayer(net['conv3_4'], 2)
    net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False)
    net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False)
    net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False)
    net['conv4_4'] = ConvLayer(net['conv4_3'], 512, 3, pad=1, flip_filters=False)
    net['pool4'] = PoolLayer(net['conv4_4'], 2)
    net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False)
    net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False)
    net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False)
    net['conv5_4'] = ConvLayer(net['conv5_3'], 512, 3, pad=1, flip_filters=False)
    net['pool5'] = PoolLayer(net['conv5_4'], 2)
    net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
    net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
    net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
    net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
    net['fc8_not_used'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None)
    net['prob_not_used'] = NonlinearityLayer(net['fc8_not_used'], softmax)
    
    set_vgg_params(net)

    # VAE
    net['enc_vae'] = DenseLayer(net['fc7_dropout'], num_units=128, nonlinearity=leaky_rectify)
    net['muq_vae'] = DenseLayer(net['enc_vae'], num_units=NUM_LATENT_Z, nonlinearity=None)     #mu(x)
    net['logvarq_vae'] = DenseLayer(net['enc_vae'], num_units=NUM_LATENT_Z, nonlinearity=lambda x: T.clip(x,-10,10)) #logvar(x)
    net['z_vae'] = SimpleSampleLayer(mean=net['muq_vae'], log_var=net['logvarq_vae']) # sample a latent representation z \sim q(z|x) = N(mu(x),logvar(x))
    net['in_z_vae'] = InputLayer(shape=(None, NUM_LATENT_Z))
    #net['dec_vae'] = DenseLayer(net['in_z_vae'], num_units=128, nonlinearity=leaky_rectify)

    # Vanilla network
    net['fc8'] = DenseLayer(net['in_z_vae'], num_units=128)
    net['fc8_dropout'] = DropoutLayer(net['fc8'], p=0.5)
    net['fc9'] = DenseLayer(net['fc8_dropout'], num_units=4, nonlinearity=None)
    net['prob'] = NonlinearityLayer(net['fc9'], softmax)

    return net

def set_vgg_params(net):
    model = pickle.load(open('vgg19.pkl'))

    # Remove the trainable argument from the layers that can potentially have it
    for key, val in net.iteritems():
        if not ('dropout' or 'pool' in key):
            net[key].params[net[key].W].remove("trainable")
            net[key].params[net[key].b].remove("trainable")

    lasagne.layers.set_all_param_values(net['prob_not_used'], model['param values'])


In [5]:
def load_data():
    with open(TRAIN_FILE, 'r') as f:
        x_train = pickle.load(f)
    with open(VALIDATION_FILE, 'r') as f:
        x_valid = pickle.load(f)
    with open(TARGET_FILE, 'r') as f:
        targets_train = pickle.load(f)
    with open(TARGET_VALIDATION_FILE, 'r') as f:
        targets_valid = pickle.load(f)
    

    x_train = np.asarray(x_train).squeeze(axis=1)
    x_valid = np.asarray(x_valid).squeeze(axis=1)
    targets_train = np.asarray(targets_train, dtype=np.float32)
    targets_valid = np.asarray(targets_valid, dtype=np.float32)
    
    return x_train, x_valid, targets_train, targets_valid

In [6]:
# Create network
net = create_network()

# TODO : Call function to get the data
x_train, x_valid, targets_train, targets_valid = load_data()

In [7]:
print x_train.shape
print x_valid.shape
print targets_train.shape
print targets_valid.shape

(394, 3, 224, 224)
(78, 3, 224, 224)
(394, 4)
(78, 4)


In [8]:
f_train, f_eval, f_z, f_sample, f_recon = VAEHelper.create_theano_functions(net)

  mode=self.mode,
  mode=self.mode,
  mode=self.mode,


In [9]:
batch_size = 10
samples_to_process = 1e4
val_interval = 5e2

LL_train, KL_train, logpx_train = [],[],[]
LL_valid, KL_valid, logpx_valid = [],[],[]
samples_processed = 0
plt.figure(figsize=(12, 24))
valid_samples_processed = []

try:
    while samples_processed < samples_to_process:
        _LL_train, _KL_train, _logpx_train = [],[],[]
        idxs = np.random.choice(range(x_train.shape[0]), size=(batch_size), replace=False)
        x_batch = x_train[idxs]
        y_batch = targets_train[idxs]
#         print x_batch.shape
        out = f_train(x_batch, y_batch)
        samples_processed += batch_size

        if samples_processed % val_interval == 0:
            valid_samples_processed += [samples_processed]
            out = f_eval(x_train, y_batch)
            LL_train += [out[0]]
            logpx_train += [out[1][:,0]] # or could mean the log_px_given_z, KL_qp in ll function
            KL_train += [out[2][:,0]]

            out = f_eval(x_valid)
            LL_valid += [out[0]]
            logpx_valid += [out[1][:,0]] #just pick a single sample, or could mean the log_px_given_z, KL_qp in ll function
            KL_valid += [out[2][:,0]]

            z_eval = f_z(x_valid)[0]
            x_sample = f_sample(np.random.normal(size=(batch_size, NUM_LATENT_Z)).astype('float32'))[0]
            x_recon = f_recon(x_valid)[0]

            plt.subplot(NUM_CLASSES+2,2,1)
            plt.legend(['LL', 'log(p(x))'], loc=2)
            plt.xlabel('Updates')
            plt.plot(valid_samples_processed, LL_train, color="black")
            plt.plot(valid_samples_processed, logpx_train, color="red")
            plt.plot(valid_samples_processed, LL_valid, color="black", linestyle="--")
            plt.plot(valid_samples_processed, logpx_valid, color="red", linestyle="--")
            plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
            plt.grid('on')

            plt.subplot(NUM_CLASSES+2,2,2)
            plt.cla()
            plt.xlabel('z0'), plt.ylabel('z1')
            color = iter(plt.get_cmap('brg')(np.linspace(0, 1.0, NUM_CLASSES)))
            for i in range(NUM_CLASSES):
                clr = next(color)
                plt.scatter(z_eval[targets_valid==i, 0], z_eval[targets_valid==i, 1], c=clr, s=5., lw=0, marker='o', )
            plt.grid('on')

            plt.savefig("out52.png")
            display(Image(filename="out52.png"))
            clear_output(wait=True)

            plt.subplot(NUM_CLASSES+2,2,3)
            plt.legend(['KL(q||p)'])
            plt.xlabel('Updates')
            plt.plot(valid_samples_processed, KL_train, color="blue")
            plt.plot(valid_samples_processed, KL_valid, color="blue", linestyle="--")
            plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
            plt.grid('on')

            plt.subplot(NUM_CLASSES+2,2,4)
            plt.cla()
            plt.title('Samples')
            plt.axis('off')
            idx = 0
            canvas = np.zeros((28*10, 10*28))
            for i in range(10):
                for j in range(10):
                    canvas[i*28:(i+1)*28, j*28:(j+1)*28] = x_sample[idx].reshape((28, 28))
                    idx += 1
            plt.imshow(canvas, cmap='gray')

            c=0
            for k in range(5, 5 + NUM_CLASSES*2, 2):
                plt.subplot(NUM_CLASSES+2,2,k)
                plt.cla()
                plt.title('Inputs for %i' % c)
                plt.axis('off')
                idx = 0
                canvas = np.zeros((28*10, 10*28))
                for i in range(10):
                    for j in range(10):
                        canvas[i*28:(i+1)*28, j*28:(j+1)*28] = x_valid[targets_valid==c][idx].reshape((28, 28))
                        idx += 1
                plt.imshow(canvas, cmap='gray')

                plt.subplot(NUM_CLASSES+2,2,k+1)
                plt.cla()
                plt.title('Reconstructions for %i' % c)
                plt.axis('off')
                idx = 0
                canvas = np.zeros((28*10, 10*28))
                for i in range(10):
                    for j in range(10):
                        canvas[i*28:(i+1)*28, j*28:(j+1)*28] = x_recon[targets_valid==c][idx].reshape((28, 28))
                        idx += 1
                plt.imshow(canvas, cmap='gray')
                c += 1

except KeyboardInterrupt:
    pass

MemoryError: Error allocating 5060952064 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY).
Apply node that caused the error: GpuAllocEmpty(Shape_i{0}.0, Shape_i{0}.0, Elemwise{Composite{((i0 + i1) - i2)}}.0, Elemwise{Composite{((i0 + i1) - i2)}}.0)
Toposort index: 139
Inputs types: [TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar)]
Inputs shapes: [(), (), (), ()]
Inputs strides: [(), (), (), ()]
Inputs values: [array(394), array(64), array(224), array(224)]
Inputs type_num: [7, 7, 7, 7]
Outputs clients: [[GpuDnnConv{algo='small', inplace=True}(GpuContiguous.0, GpuContiguous.0, GpuAllocEmpty.0, GpuDnnConvDesc{border_mode=(1, 1), subsample=(1, 1), conv_mode='cross', precision='float32'}.0, Constant{1.0}, Constant{0.0})]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/home/icarus/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/icarus/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/icarus/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-f856271dacf8>", line 1, in <module>
    f_train, f_eval, f_z, f_sample, f_recon = VAEHelper.create_theano_functions(net)
  File "<ipython-input-3-86df75f251ec>", line 17, in create_theano_functions
    {net['input']: sym_x}, deterministic=True)
  File "/home/icarus/anaconda2/lib/python2.7/site-packages/lasagne/layers/helper.py", line 191, in get_output
    all_outputs[layer] = layer.get_output_for(layer_inputs, **kwargs)
  File "/home/icarus/anaconda2/lib/python2.7/site-packages/lasagne/layers/conv.py", line 330, in get_output_for
    conved = self.convolve(input, **kwargs)
  File "/home/icarus/anaconda2/lib/python2.7/site-packages/lasagne/layers/dnn.py", line 380, in convolve
    conv_mode=conv_mode

Debugprint of the apply node: 
GpuAllocEmpty [id A] <CudaNdarrayType(float32, 4D)> ''   
 |Shape_i{0} [id B] <TensorType(int64, scalar)> ''   
 | |x [id C] <TensorType(float32, 4D)>
 |Shape_i{0} [id D] <TensorType(int64, scalar)> ''   
 | |W [id E] <CudaNdarrayType(float32, 4D)>
 |Elemwise{Composite{((i0 + i1) - i2)}} [id F] <TensorType(int64, scalar)> ''   
 | |TensorConstant{3} [id G] <TensorType(int64, scalar)>
 | |Shape_i{2} [id H] <TensorType(int64, scalar)> ''   
 | | |x [id C] <TensorType(float32, 4D)>
 | |Shape_i{2} [id I] <TensorType(int64, scalar)> ''   
 |   |W [id E] <CudaNdarrayType(float32, 4D)>
 |Elemwise{Composite{((i0 + i1) - i2)}} [id J] <TensorType(int64, scalar)> ''   
   |TensorConstant{3} [id G] <TensorType(int64, scalar)>
   |Shape_i{3} [id K] <TensorType(int64, scalar)> ''   
   | |x [id C] <TensorType(float32, 4D)>
   |Shape_i{3} [id L] <TensorType(int64, scalar)> ''   
     |W [id E] <CudaNdarrayType(float32, 4D)>

Storage map footprint:
 - W, Shared Input, Shape: (25088, 4096), ElemSize: 4 Byte(s), TotalSize: 411041792 Byte(s)
 - x, Input, Shape: (394, 3, 224, 224), ElemSize: 4 Byte(s), TotalSize: 237232128 Byte(s)
 - GpuContiguous.0, Shape: (394, 3, 224, 224), ElemSize: 4 Byte(s), TotalSize: 237232128 Byte(s)
 - W, Shared Input, Shape: (4096, 4096), ElemSize: 4 Byte(s), TotalSize: 67108864 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 512, 3, 3), ElemSize: 4 Byte(s), TotalSize: 9437184 Byte(s)
 - W, Shared Input, Shape: (512, 256, 3, 3), ElemSize: 4 Byte(s), TotalSize: 4718592 Byte(s)
 - W, Shared Input, Shape: (256, 256, 3, 3), ElemSize: 4 Byte(s), TotalSize: 2359296 Byte(s)
 - W, Shared Input, Shape: (256, 256, 3, 3), ElemSize: 4 Byte(s), TotalSize: 2359296 Byte(s)
 - W, Shared Input, Shape: (256, 256, 3, 3), ElemSize: 4 Byte(s), TotalSize: 2359296 Byte(s)
 - W, Shared Input, Shape: (4096, 128), ElemSize: 4 Byte(s), TotalSize: 2097152 Byte(s)
 - W, Shared Input, Shape: (256, 128, 3, 3), ElemSize: 4 Byte(s), TotalSize: 1179648 Byte(s)
 - W, Shared Input, Shape: (128, 128, 3, 3), ElemSize: 4 Byte(s), TotalSize: 589824 Byte(s)
 - GPU_mrg_uniform{CudaNdarrayType(float32, vector),inplace}.0, Shape: (92160,), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - <CudaNdarrayType(float32, vector)>, Shared Input, Shape: (92160,), ElemSize: 4 Byte(s), TotalSize: 368640 Byte(s)
 - W, Shared Input, Shape: (128, 64, 3, 3), ElemSize: 4 Byte(s), TotalSize: 294912 Byte(s)
 - W, Shared Input, Shape: (64, 64, 3, 3), ElemSize: 4 Byte(s), TotalSize: 147456 Byte(s)
 - b, Shared Input, Shape: (4096,), ElemSize: 4 Byte(s), TotalSize: 16384 Byte(s)
 - b, Shared Input, Shape: (4096,), ElemSize: 4 Byte(s), TotalSize: 16384 Byte(s)
 - GpuContiguous.0, Shape: (64, 3, 3, 3), ElemSize: 4 Byte(s), TotalSize: 6912 Byte(s)
 - W, Shared Input, Shape: (64, 3, 3, 3), ElemSize: 4 Byte(s), TotalSize: 6912 Byte(s)
 - GpuReshape{2}.0, Shape: (394, 2), ElemSize: 4 Byte(s), TotalSize: 3152 Byte(s)
 - W, Shared Input, Shape: (128, 4), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (512,), ElemSize: 4 Byte(s), TotalSize: 2048 Byte(s)
 - b, Shared Input, Shape: (256,), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - b, Shared Input, Shape: (256,), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - b, Shared Input, Shape: (256,), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - b, Shared Input, Shape: (256,), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - W, Shared Input, Shape: (128, 2), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - W, Shared Input, Shape: (128, 2), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - W, Shared Input, Shape: (2, 128), ElemSize: 4 Byte(s), TotalSize: 1024 Byte(s)
 - b, Shared Input, Shape: (128,), ElemSize: 4 Byte(s), TotalSize: 512 Byte(s)
 - b, Shared Input, Shape: (128,), ElemSize: 4 Byte(s), TotalSize: 512 Byte(s)
 - b, Shared Input, Shape: (128,), ElemSize: 4 Byte(s), TotalSize: 512 Byte(s)
 - b, Shared Input, Shape: (128,), ElemSize: 4 Byte(s), TotalSize: 512 Byte(s)
 - b, Shared Input, Shape: (64,), ElemSize: 4 Byte(s), TotalSize: 256 Byte(s)
 - b, Shared Input, Shape: (64,), ElemSize: 4 Byte(s), TotalSize: 256 Byte(s)
 - GpuFromHost.0, Shape: (10, 4), ElemSize: 4 Byte(s), TotalSize: 160 Byte(s)
 - y, Input, Shape: (10, 4), ElemSize: 4 Byte(s), TotalSize: 160 Byte(s)
 - b, Shared Input, Shape: (4,), ElemSize: 4 Byte(s), TotalSize: 16 Byte(s)
 - TensorConstant{(2,) of 2}, Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s)
 - TensorConstant{(2,) of 0}, Shape: (2,), ElemSize: 8 Byte(s), TotalSize: 16 Byte(s)
 - TensorConstant{4}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Shape_i{3}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Elemwise{Composite{((i0 + i1) - i2)}}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Shape_i{0}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Shape_i{3}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{2}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{6}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Shape_i{2}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{12}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - b, Shared Input, Shape: (2,), ElemSize: 4 Byte(s), TotalSize: 8 Byte(s)
 - b, Shared Input, Shape: (2,), ElemSize: 4 Byte(s), TotalSize: 8 Byte(s)
 - Elemwise{Composite{((i0 + i1) - i2)}}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{0}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Shape_i{0}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - Shape_i{2}.0, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{-1}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{3}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{10}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - TensorConstant{9}, Shape: (), ElemSize: 8 Byte(s), TotalSize: 8.0 Byte(s)
 - CudaNdarrayConstant{[[ 0.505]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[[ 10.]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[[  9.99999997e-07]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[[ 0.99999899]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{-0.5}, Shape: (), ElemSize: 4 Byte(s), TotalSize: 4.0 Byte(s)
 - Constant{1.0}, Shape: (), ElemSize: 4 Byte(s), TotalSize: 4.0 Byte(s)
 - CudaNdarrayConstant{[[-10.]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[-2.]}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[ 6.28318548]}, Shape: (1,), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[[ 0.495]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[[ 0.5]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - Constant{0.0}, Shape: (), ElemSize: 4 Byte(s), TotalSize: 4.0 Byte(s)
 - CudaNdarrayConstant{[[[[ 0.5]]]]}, Shape: (1, 1, 1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - CudaNdarrayConstant{[[ 1.]]}, Shape: (1, 1), ElemSize: 4 Byte(s), TotalSize: 4 Byte(s)
 - TensorConstant{0}, Shape: (), ElemSize: 1 Byte(s), TotalSize: 1.0 Byte(s)
 TotalSize: 1035220889.0 Byte(s) 0.964 GB
 TotalSize inputs: 797985385.0 Byte(s) 0.743 GB

