<a href="https://colab.research.google.com/github/Spinkk/Implementing-ANNs-with-Tensorflow/blob/main/final/model_minseok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf

# Hyperparam

In [None]:
z_dim = 512  # latent dim z_t
c_dim = 256  # dim of g_ar output c_t

# Model
- Encoder: convolutional
- Autoregressive: GRU
- transformation of context: linear

In [None]:
class Encoder (tf.keras.layers.Layer):
    '''
    g_enc: strided 1d convolution
    '''

    def __init__ (self, z_dim):
        super(Encoder, self).__init__()
        s = [5,4,2,2,2]  # stride sizes
        k = [10,8,4,4,4]  # kernel sizes
        f = [512,512,512,512,512]  # num filters

        # input dim: [batch, T+K*N, d, 1]
        self.layers = []
        for l in range(5):
            self.layers.append(tf.keras.layers.Conv1D(f[l],k[l],s[l]))
            self.layers.append(tf.keras.layers.BatchNormalization())
            self.layers.append(tf.keras.layers.LeakyReLU())
        self.layers.append(tf.keras.layers.GlobalAveragePooling1D())
        self.layers.append(tf.keras.layers.Dense(z_dim, activation='tanh'))
        # ouput dim:[batch, T+K*N, z]

    def call (self, x, training):
        
        for l in self.layers:
            try:  # batch normalization 
                x = l(x, training)
            except:
                x = l(x)
        return x  


class Autoregressive (tf.keras.layers.Layer):
    '''
    g_ar: GRU RNN
    '''

    def __init__ (self, c_dim):
        super(Autoregressive, self).__init__()
        # input dim: [batch, T, z]
        self.l = tf.keras.layers.GRU(c_dim, name='ar_context') 
        # output dim:[batch, c] since return_seq is False

    def call (self, z):
        return self.l(z) 


class Predict_z (tf.keras.layers.Layer):
    '''
    transformation of c_t, currently linear (W_k) for all future timesteps
    '''

    def __init__ (self, z_dim, K):
        super(Predict_z, self).__init__()
        
        # input_dim: [batch, c]
        self.layers = []
        for k in range(K):  # k different layers for each timestep
            self.layers.append(tf.keras.layers.Dense(z_dim)) 

    def call(self, c_t):
        # TODO: maybe size should be multidimensional
        z_pred = tf.TensorArray(tf.float32, size=len(self.layers))
        for l in tf.range(len(self.layers)):  
            z_pred = z_pred.write(l, self.layers[l](c_t))  # apply for each k
            z_pred_t = z_pred.stack()
            # [K, batch, z]
        return tf.transpose(z_pred_t, perm=[1,0,2])  # output_dim: [batch, K, z]


def compute_f (z, z_pred):
    '''
    compute f following eq(3) in the paper to be batch (K x N) matrices.
    First column is the postive sample.
    '''

    # z input dim: [batch, K, N, z], 
    z = tf.expand_dims(z, axis=-2)  # [batch, K, N, 1, z]
    
    # z_pred input dim: [batch, K, z]
    pred = tf.repeat(z_pred, repeats=z.shape[2], axis=-2)  # [batch, K*N, z]
    pred = tf.reshape(pred, shape=[z.shape[0],z.shape[1],z.shape[2],z.shape[-1]])  # [batch, K, N, z]
    pred = tf.expand_dims(pred, axis=-1)  # [batch, K, N, z, 1]

    dot_prod = tf.linalg.matmul(z, pred)  # [batch, K, N, 1, 1]
    dot_prod = tf.squeeze(dot_prod, axis=[-2,-1])  # [batch, K, N]
    dot_prod = tf.exp(dot_prod)
    return dot_prod  # output dim: [batch, K, N]


class CPC (tf.keras.models.Model):
    '''
    put everything together. Return f_k for every k
    '''

    def __init__ (self, num_time_observations, num_time_future, num_negative_samples, z_dim, c_dim):
        super(CPC, self).__init__()
        self.T = num_time_observations
        self.K = num_time_future
        self.N = num_negative_samples
        self.z = z_dim
        self.c = c_dim

        self.g_enc = Encoder(self.z)
        self.g_ar = Autoregressive(self.c)
        self.p_z = Predict_z(z_dim=self.z, K=self.K)

    def call(self, x, training=False):  
        # input dim: [batch, T+K*N, d, 1]
        print('input dim: ', x.shape)
        # Embedding
        z_t = tf.keras.layers.TimeDistributed( # dim 1 is the temporal dim 
            self.g_enc)(x, training=training)  # [batch, T+K*N, z]
        print('embedding dim: ', z_t.shape)    
        

        # Split current observation embeddings and future embeddings
        z_obs = z_t[:, :self.T]  # t = {0,...,T}, dim: [batch, T, z]
        z_future = z_t[:, self.T:]  # t = {T+1,,,T+K} for N samples, dim:[batch, K*N, z]
        z_future = tf.reshape(z_future, [-1, self.K, self.N, self.z])  # [batch, K, N, z]
        print('embedding obs:', z_obs.shape)
        print('embedding pred:', z_future.shape)

        # Predict embeddings
        c_T = self.g_ar(z_obs)  # [batch, c]
        print('context:', c_T.shape)
        z_pred = self.p_z(c_T)  # [batch, K, z]
        print('transformed_context:', z_pred.shape)

        # Compute f matrices
        f_mat = compute_f(z_future, z_pred)  # [batch, K, N]

        return f_mat

In [None]:
batch = 1
T = 8
K = 3
N = 5
d = 1000

data = np.random.rand(batch, T+K*N, d, 1)
data = tf.constant(data)
print('input shape:', data.shape)

cpc = CPC(T, K, N, z_dim, c_dim)
f_mat = cpc(data)


input shape: (1, 23, 1000, 1)
input dim:  (1, 23, 1000, 1)
embedding dim:  (1, 23, 512)
embedding obs: (1, 8, 512)
embedding pred: (1, 3, 5, 512)
context: (1, 256)
transformed_context: (1, 3, 512)


In [None]:
class InfoNCE (tf.keras.losses.Loss):
    '''
    Compute loss given batch times f matrices with dim (K x N)
    '''

    def __call__(self, f):
        # input dim: [batch, K, N]
        denominator = tf.reduce_sum(f, axis=2)  # [batch, K]
        losses = - tf.math.log(f[:,:,0] / denominator)  # first column is positive
        return tf.reduce_mean(losses, axis=1)  # [batch]. Take a mean over k timesteps

In [None]:
losses = InfoNCE()
losses(f_mat)

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.6093172], dtype=float32)>