## First, we are generating a mask and then we will use this mask to create the parameters/kernels  
` c_in == c_out`

In [10]:
import numpy as np
import tensorflow as tf

In [23]:
x = np.ones([1,3,3,2], dtype=np.float32) # input: batchsize, height, width, n_channels
ksize=3 # kernel size
ksize = 2 * ksize - 1 
kcent = (ksize - 1) // 2 # 
n_channels = 2 # no of channels

In [12]:
# function to masking the values of kernels crosponding the the block tiangular convolution-matric
def get_conv_square_ar_mask(h, w, n_in, n_out, zerodiagonal=False):
    """
    Function to get autoregressive convolution with square shape.
    """
    l = (h - 1) // 2
    m = (w - 1) // 2
    mask = np.ones([h, w, n_in, n_out], dtype=np.float32)
    mask[:l, :, :, :] = 0
    mask[:, :m, :, :] = 0
    mask[l, m, :, :] = get_linear_ar_mask(n_in, n_out, zerodiagonal)
    return mask 

In [13]:
# function to get weight matrix
def get_conv_weight_np(filter_shape, stable_init=True, unit_testing=False):
    weight_np = np.random.randn(*filter_shape) * 0.02
    kcent = (filter_shape[0] - 1) // 2
    if stable_init or unit_testing:
        weight_np[kcent, kcent, :, :] += 1. * np.eye(filter_shape[3])
    weight_np = weight_np.astype('float32')
    return weight_np
# helper function for the masking     
def get_linear_ar_mask(n_in, n_out, zerodiagonal=False):
    assert n_in % n_out == 0 or n_out % n_in == 0, "%d - %d" % (n_in, n_out)

    mask = np.ones([n_in, n_out], dtype=np.float32)
    if n_out >= n_in:
        k = n_out // n_in
        for i in range(n_in):
            mask[i + 1:, i * k:(i + 1) * k] = 0
            if zerodiagonal:
                mask[i:i + 1, i * k:(i + 1) * k] = 0
    else:
        k = n_in // n_out
        for i in range(n_out):
            mask[(i + 1) * k:, i:i + 1] = 0
            if zerodiagonal:
                mask[i * k:(i + 1) * k:, i:i + 1] = 0
    return mask

In [None]:
# creating mask
mask_np = get_conv_square_ar_mask(ksize, ksize, n_channels, n_channels)
mask__np = mask_np[::-1, ::-1, ::-1, ::-1].copy()
mask_tf = tf.constant(mask__np)
print(mask_tf) 

In [19]:
pad = kcent
# padding the input on the TOP and LEFT
x = tf.pad(x, [[0, 0], [pad, 0], [pad, 0], [0, 0]], 'CONSTANT')

# create the kernals 
kernel_shape = [ksize, ksize, n_channels, n_channels]

# parameters/weights
w_np = get_conv_weight_np(kernel_shape)
w = tf.compat.v1.get_variable('W', dtype=tf.float32, initializer=w_np)
# biases 
b = tf.compat.v1.get_variable('b', [n_channels],
                            initializer=tf.zeros_initializer())
b = tf.reshape(b, [1, 1, 1, -1])


In [21]:
# applying the mask to the parameters or kernels
w = w * mask_tf  
# masked kernal used for the convolution
w_s = w[:-kcent, :-kcent, :, :] 

In [22]:
#convolution
dilation=1
z = tf.nn.conv2d(
                x, w_s, [1, 1, 1, 1],
                dilations=[1, dilation, dilation, 1],
                padding='VALID',
                data_format='NHWC')
print(z)

tf.Tensor(
[[[[0.9964293  1.0109481 ]
   [0.98832613 1.0002538 ]
   [0.9634917  0.95491064]]

  [[1.0288899  0.9684367 ]
   [0.98932797 0.96075577]
   [0.9933346  0.9121895 ]]

  [[1.043368   0.9836762 ]
   [0.94651145 1.0183473 ]
   [0.9376938  0.997674  ]]]], shape=(1, 3, 3, 2), dtype=float32)
