In [1]:
import tensorflow as tf
import numpy as np

In [2]:
def identity_transpose(A):
    '''Calculate (I - A^T)'''
    return tf.eye(A.shape[0], A.shape[0]) - tf.transpose(A)

def identity_transpose_inverse(A):
    '''Calculate (I - A^T)^(-1)'''
    return tf.linalg.inv(identity_transpose(A))

In [47]:
class Encoder(tf.keras.Model):
    '''
    Encoder class for DAG-GNN method

    Inputs:
    adjA (tensor [d, d]) : current estimated adjascency matrix
    ind_dim (int) : dimension of input layer
    hid_dim (int) : dimension of hidden layer
    out_dim (int) : dimension of output layer

    Outputs:
    out (tensor [batch, d]) : output of neural network
    ligs (tensor [d, d]) : product of (I - A^T @ out)
    adjA (tensor [d, d]) : current estimated adjascency matrix

    '''
    def __init__(self, adjA, in_dim, hid_dim, out_dim):
        super(Encoder, self).__init__()
        self.adjA = tf.Variable(initial_value = adjA, trainable = True)
        #self.Wa = tf.Variable(np.zeros(), trainable = True)

        self.fc1 = tf.keras.layers.Dense(hid_dim, activation= 'relu')
        self.fc2 = tf.keras.layers.Dense(out_dim)

    def call(self, inputs):
        '''Forward process of neural network'''
        #calculate I - A^T
        I_adjA = identity_transpose(self.adjA)
        hidden = self.fc1(inputs)
        outputs = self.fc2(hidden)
        logits = tf.matmul(I_adjA, outputs)
        return outputs, logits, self.adjA


class Decoder(tf.keras.Model):
    '''
    Decoder class for DAG-GNN method

    Inputs:
    ind_dim (int) : dimension of input layer
    out_dim (int) : dimension of output layer
    hid_dim (int) : dimension of hidden layer

    Outputs:
    '''
    def __init__(self, in_dim, hid_dim, out_dim):
        super(Decoder, self).__init__()
        self.fc1 = tf.keras.layers.Dense(hid_dim, activation = 'relu')
        self.fc2 = tf.keras.layers.Dense(out_dim)

    def call(self, z_inputs,  adjA):

        #calculate (I - A^T)^(-1)
        I_adjA = identity_transpose(adjA)
        z = tf.matmul(I_adjA, z_inputs)

        hidden = self.fc1(z)
        outputs = self.fc2(hidden)
        return z, outputs

In [87]:
class DAG_GNN_VAE(tf.keras.Model):
    '''
    Model class for DAG-GNN method training

    Inputs:
    adjA (tensor [d, d]) : current estimated adjascency matrix
    ind_dim (int) : dimension of input layer
    hid_dim (int) : dimension of hidden layer
    out_dim (int) : dimension of output layer

    Outputs:
    out (tensor [batch, d]) : output of neural network
    ligs (tensor [d, d]) : product of (I - A^T @ out)
    adjA (tensor [d, d]) : current estimated adjascency matrix

    '''
    def __init__(self, adjA, in_dim, hid_dim, out_dim):
        super(DAG_GNN_VAE, self).__init__()
        self.encoder = Encoder(adjA, in_dim, hid_dim, out_dim)
        self.decoder = Decoder(in_dim, hid_dim, out_dim)
    
    def call(self, inputs):
        en_outputs, logits, new_adjA = self.encoder(inputs)
        z, de_outputs = self.decoder(logits, new_adjA)
        return en_outputs, logits, new_adjA, z, de_outputs
        
    def _h(A):
        '''Calculate the constraint of A ensure that it's a DAG'''
        #(Yu et al. 2019 DAG-GNN)
        # h(w) = tr[(I + kA*A)^n_variables] - n_variables
        M = tf.eye(n_variables, num_columns = n_variables) + A/n_variables
        E = M
        for _ in range(n_variables - 2):
            E = tf.linalg.matmul(E, M)
        h = tf.math.reduce_sum(tf.transpose(E) * M) - n_variables
        return h
    
    def _loss(A, logits, X, Y):
        '''
        Function that evaluate the model loss
        loss = kl loss + nll loss + dag constraint + l1 reg + l2 reg
        '''
        # h constraint loss
        h = self._h(A)
        h_loss = 0.5 * rho * h * h + alpha * h
        
        #KL divergence
        kl_loss = tf.sum(tf.pow(logits, 2) / ( 2 * logits.shape[0]))
        
        #negative likelihood loss
        nll_loss = tf.sum(tf.pow(X - Y, 2) / (2 * n_variables))
        
        #L1 penalization
        l1_loss = lambda1 * tf.sum(tf.abs(A))
        
        #diagonal penalization
        diag_loss = 100 * tf.linalg.trace(A * A)
        
        loss = h_loss + kl_loss + nll_loss + l1_loss + diag_loss
        return loss
        

In [88]:
model = DAG_GNN_VAE(A, 5, 20, 5)
model.build(data.shape)

In [89]:
model.trainable_variables

[<tf.Variable 'encoder_22/dense_70/kernel:0' shape=(200, 20) dtype=float32, numpy=
 array([[ 0.09150031,  0.05518448,  0.15420642, ..., -0.16292752,
          0.01129237,  0.10451511],
        [-0.0789061 ,  0.02459358, -0.16367133, ...,  0.10761529,
         -0.0877905 , -0.13330042],
        [ 0.13491488, -0.07995654,  0.11635461, ..., -0.14562812,
          0.00401524,  0.09812558],
        ...,
        [ 0.12280121,  0.15659955, -0.1035981 , ...,  0.11804399,
          0.07510434,  0.13293636],
        [ 0.13769978, -0.14725462, -0.0804465 , ...,  0.02154158,
         -0.03118096,  0.05237089],
        [-0.04945815, -0.03308129, -0.01368013, ..., -0.04680222,
         -0.13116941, -0.03467706]], dtype=float32)>,
 <tf.Variable 'encoder_22/dense_70/bias:0' shape=(20,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.], dtype=float32)>,
 <tf.Variable 'encoder_22/dense_71/kernel:0' shape=(20, 5) dtype=float32, numpy=
 a

In [84]:
model(data)

(<tf.Tensor: shape=(5, 5), dtype=float32, numpy=
 array([[ 0.89254594, -0.45507213, -0.7312053 , -0.05200586, -0.5441576 ],
        [ 0.7041065 ,  0.08252943, -0.48136923,  0.20833716, -0.1983127 ],
        [ 0.00293961, -0.10828166, -0.6360076 , -0.00479338, -0.43089932],
        [ 0.5052403 , -0.33157814, -0.9160401 , -0.08124065, -0.4729087 ],
        [ 0.34194863, -0.24013457, -0.6098543 , -0.18414064, -0.1215876 ]],
       dtype=float32)>,
 <tf.Tensor: shape=(5, 5), dtype=float32, numpy=
 array([[ 0.89254594, -0.45507213, -0.7312053 , -0.05200586, -0.5441576 ],
        [ 0.7041065 ,  0.08252943, -0.48136923,  0.20833716, -0.1983127 ],
        [ 0.00293961, -0.10828166, -0.6360076 , -0.00479338, -0.43089932],
        [ 0.5052403 , -0.33157814, -0.9160401 , -0.08124065, -0.4729087 ],
        [ 0.34194863, -0.24013457, -0.6098543 , -0.18414064, -0.1215876 ]],
       dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(5, 5) dtype=float32, numpy=
 array([[0., 0., 0., 0., 0.],
        [0

In [5]:
def dag_gnn(data, hid_dim = 20, max_iter = 10e8, rho_max = 10e20, n_epochs = 20, lambda1 = 0.1):
    '''
    Function for inference of DAG with method DAG-GNN
    
    Inputs:
    
    Outputs:
    
    ''' 
    
    def train():
        '''Model training'''
        for epoch in range(n_epochs):
            for batch_id, batch_data in enumerate(train_loader):
                
                #passing through neural network
                en_outputs, logits, adjA, z, de_outputs = vae(batch_data)
                with tf.GradientTape() as tape:
                    tape.watch(vae.trainable_variables)
                    #calculate loss
                    loss = vae._loss(adjA, logits, decoder_out, batch_data)
                
                
        return adjA
    
    
    #####################
    #Augmented lagrangian
    #####################
        
    n_variables = data.shape[1]
    rho, alpha, h = 1, 1, np.Inf
    
    train_loader, test_loader = setup_data_loader(data)
    
    #setup of neural networks
    new_adj = np.zeros((n_variables, n_variables))
    vae = DAG_GNN_VAE(new_adj, n_variables, hid_dim, n_variables)
    
    for _ in range(int(max_iter)):
        while rho < rho_max:
            A_est = train()
            h_new = _h(A_est)
                
            if h_new > 0.25 * h:
                rho = rho*10
            else:
                break
                
        h = h_new    
        alpha += rho * h
        
        if h <= h_tol:
            break
    
    A_est[A_est < A_threshold] = 0
    return A_est
        