In [1]:
# This code is adapted from https://github.com/deepmind/conditional-neural-process/blob/master/conditional_neural_process.ipynb
# This is only for my own learning purpose only
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import collections

  from ._conv import register_converters as _register_converters


In [2]:
CNPRegressionDescription = collections.namedtuple(
    "CNPRegressionDescription",
    ("query", "target_y", "num_total_points", "num_context_points"))

In [3]:
CNPRegressionDescription

__main__.CNPRegressionDescription

In [4]:
class Gaussian_Processing_curves_Generation():
    def __init__(self, batch_size, feature_size= 1,
                 output_size = 1, l1_regulation_scale=0.4,
                 sigma_scale=1.0):
        self.batch_size = batch_size
        self.feature_size = feature_size
        self.output_size = output_size
        self.l1 = l1_regulation_scale
        self.sigma = sigma_scale
    
    def cruve(self):
        context_for_X_and_Y = tf.random_uniform((),minval=3, maxval=self.feature_size, dtype=tf.int32)
        target_for_X_and_Y = tf.random_uniform((),minval=3, maxval=self.feature_size, dtype=tf.int32)
        
        x_value = tf.random_uniform([self.batch_size, context_for_X_and_Y+target_for_X_and_Y, self.feature_size], -2, 2)
        l1_re = tf.ones([self.batch_size, self.output_size , self.feature_size])* self.l1
        sig = tf.ones([self.batch_size, self.output_size])* self.sigma
        kernel = self._cruve_kernel(x_value, l1_re, sig)
        cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32)
        print(cholesky)
        y_values = tf.matmul(cholesky,
        tf.random_normal([self.batch_size, self.output_size, context_for_X_and_Y+target_for_X_and_Y, 1]))
        y_value = tf.transpose(tf.squeeze(y_values, 3), [0, 2, 1])
        x_context = x_value[:,:context_for_X_and_Y,:]
        y_context = y_value[:,:context_for_X_and_Y,:]
        query = ((x_context, y_context), x_value)
        return CNPRegressionDescription(query = query, target_y = y_value,
                                       num_total_points= context_for_X_and_Y+target_for_X_and_Y,
                                       num_context_points = context_for_X_and_Y)
        
    def _cruve_kernel(self, x_value, l1_re, sig , noise= 2e-2):
        x_value1 = tf.expand_dims(x_value, axis=1)
        x_value2 = tf.expand_dims(x_value, axis=2)
        differences = x_value1 - x_value2
        normalize = tf.square(differences[:, None, :,:,:]/ l1_re[:,:,None,None,:])
        normalize = tf.reduce_sum(normalize, -1)
        kernel = tf.square(sig)[:,:,None,None] * tf.exp(-0.5*normalize)
        kernel += noise**2 *tf.eye(tf.shape(x_value)[1])
        return kernel


In [5]:
TRAINING_ITERATIONS = int(2e5)
MAX_CONTEXT_POINTS = 10
PLOT_AFTER = int(2e4)

In [6]:
dataset_train = Gaussian_Processing_curves_Generation(
    batch_size=64, feature_size=MAX_CONTEXT_POINTS)
data_train = dataset_train.cruve()
dataset_test = Gaussian_Processing_curves_Generation(
    batch_size=1, feature_size=MAX_CONTEXT_POINTS)
data_test = dataset_test.cruve()

Tensor("Cast_1:0", shape=(64, 1, ?, ?), dtype=float32)
Tensor("Cast_3:0", shape=(1, 1, ?, ?), dtype=float32)


In [7]:
class Encoder():
    def __init__(self, output_r_size):
        self.out = output_r_size
    def __call__(self, x_context, y_context, context_number):
        concat = tf.concat([x_context,y_context], axis=-1)
        batch_size , _, num = concat.shape.as_list()
        hidden = tf.reshape(concat, (batch_size * context_number, -1))
        hidden.set_shape((None, num))
        
        with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
            for index, shape in enumerate(self.out[:-1]):
                hidden = tf.layers.dense(hidden, shape, name="Encoder_"+ str(index))
                hidden = tf.nn.relu(hidden)
            hidden = tf.layers.dense(hidden, self.out[-1], name="Encoder_"+ str(index+1))
        hidden = tf.reshape(hidden , (batch_size, context_number, shape))
        representation = tf.reduce_mean(hidden, axis=1)
        return representation

In [8]:
class Decoder():
    def __init__(self, output_g_size):
        self.out = output_g_size
    def __call__(self, representation, target_x, total_point):
        representation_resize = tf.tile(tf.expand_dims(representation, axis=1),[1,total_point,1])
        input = tf.concat([representation_resize, target_x], axis=-1)
        batch_size, _, num = input.shape.as_list()
        hidden = tf.reshape(input, (batch_size * total_point, -1))
        hidden.set_shape((None, num))
        with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):
            for index, shape in enumerate(self.out[:-1]):
                hidden = tf.layers.dense(hidden, shape, name="Decoder_"+ str(index))
                hidden = tf.nn.relu(hidden)
            hidden = tf.layers.dense(hidden, self.out[-1], name="Decoder_"+ str(index+1))
        hidden = tf.reshape(hidden , (batch_size, total_point, -1))
        mu, log_sigma = tf.split(hidden, 2, axis=-1)
        sigma = 0.1 + 0.9 * tf.nn.softplus(log_sigma)
        
        dist = tfp.distributions.MultivariateNormalDiag(loc=mu, scale_diag=sigma)
        
        return dist,mu,sigma
        

In [9]:
class CNP():
    def __init__(self,encoder_output, decoder_output):
        self.encoder = Encoder(encoder_output)
        self.decoder = Decoder(decoder_output)
    def __call__(self, query, total_points, num_context, target_y = None):
        (context_x, context_y), target_x = query
        representation = self.encoder(context_x, context_y, num_context)
        dist, mu, sigma = self.decoder(representation, target_x, total_points)
        
        if target_y is not None:
            log_p = dist.log_prob(target_y)
        else:
            log_p = None
        return log_p,mu,sigma

In [10]:
encoder_output_sizes = [128, 128, 128, 128]
decoder_output_sizes = [128, 128, 2]

In [11]:
model = CNP(encoder_output_sizes, decoder_output_sizes)

In [12]:
log_prob, _, _ = model(data_train.query, data_train.num_total_points,
                       data_train.num_context_points, data_train.target_y)

In [13]:
loss = -tf.reduce_mean(log_prob)
_, mu, sigma = model(data_test.query, data_test.num_total_points,
                     data_test.num_context_points)

In [14]:
optimizer = tf.train.AdamOptimizer(1e-4)
train_step = optimizer.minimize(loss)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for it in range(TRAINING_ITERATIONS):
        _,loss_reduce=sess.run([train_step, loss])

KeyboardInterrupt: 