# Translating deep learning code 
### from Pytorch to TensorFlow 


This notebook contains both Tensorflow and Pytorch versions of functions from the Paper: ***Variational fair Information bottlekneck***. 

- The original PyTorch functions were retrieved from the author's GitHub repo, which can be found at: https://github.com/sajadn/Variational-Fair-Information-Bottleneck


Notebook Author : Niloy Purkait

In [1]:
import tensorflow as tf
from tensorflow.keras import backend as K
import numpy as np
import torch



In [2]:
print(f"numpy: {np.__version__}")
print(f"Pytorch: {torch.__version__}")
print(f"TensorFlow: {tf.__version__}")
print(f"keras: {tf.keras.__version__}")

numpy: 1.19.2
Pytorch: 1.7.1
TensorFlow: 2.4.1
keras: 2.4.0


## Make some random arrays

In [3]:
feature_dim = 110
batch_size = 128
latent_size = 50

X = np.random.binomial(1, 0.5, size=(batch_size, feature_dim))
y = np.random.binomial(1, 0.5, size=(batch_size, 1))

z_mean, z_log_sigma, z = np.random.normal(size=(3, batch_size, latent_size))


In [4]:
X_tf = tf.constant(X, dtype=tf.float32)
y_tf = tf.constant(y, dtype=tf.float32)

X_pt = torch.FloatTensor(X)
y_pt = torch.FloatTensor(y)

X_pt.shape, X_tf.shape

(torch.Size([128, 110]), TensorShape([128, 110]))

In [5]:
z_m_tf, z_l_s_tf, z_tf = [tf.constant(x, dtype=tf.float32) for x in [z_mean, z_log_sigma, z]]
z_m_pt, z_l_s_pt, z_pt = [torch.FloatTensor(x) for x in [z_mean, z_log_sigma, z]]

## Gaussian Entropy

In [6]:
# Pytorch
def entropy_gaussian(mu, sigma, mean=True):
    msigma = sigma.view(sigma.shape[0], -1)
    return torch.mean(0.5*(msigma))

In [7]:
#TensorFlow
def tf_entropy_gaussian(mu, sigma, mean=True):
    msigma = tf.reshape(sigma, (K.shape(sigma)[0], -1))
    return tf.reduce_mean(0.5*msigma)

In [8]:
# Pytorch test
entropy_gaussian(z_m_pt, z_l_s_pt)

tensor(-0.0062)

In [9]:
# TensorFlow test
tf_entropy_gaussian(z_m_tf, z_l_s_tf)

<tf.Tensor: shape=(), dtype=float32, numpy=-0.0061686565>

## Negative Log Gaussian

In [10]:

def negative_log_gaussian(data, mu, sigma, mean=True):
    EPSILON = torch.tensor(10e-25).double()
    mdata = data.view(data.shape[0], -1)
    mmu = mu.view(data.shape[0], -1)
    msigma = sigma.view(data.shape[0], -1)
    return 0.5*torch.mean((mdata-mmu)**2/(torch.exp(msigma)+EPSILON) + msigma)

In [11]:


def tf_negative_log_gaussian(data, mu, sigma, mean=True):
    EPSILON = tf.constant([10e-25])
    mdata = tf.reshape(data, (K.shape(data)[0], -1))
    mmu = tf.reshape(mu, (K.shape(data)[0], -1))
    
    msigma = tf.reshape(sigma, (K.shape(data)[0], -1))

    return 0.5 * tf.reduce_mean((mdata-mmu)**2/(K.exp(msigma)+EPSILON) + msigma)


In [12]:

negative_log_gaussian(X_pt,X_pt,X_pt)

tensor(0.2476)

In [13]:
#TensorFlow test
tf_negative_log_gaussian(X_tf,X_tf,X_tf,)

<tf.Tensor: shape=(), dtype=float32, numpy=0.24758522>

## Negative log bernoulli

In [14]:
# Pytorch
log_sigmoid = torch.nn.LogSigmoid()
def negative_log_bernoulli(data, mu, mean=True, clamp=True):
    if clamp:
        mu = torch.clamp(mu, min=-9.5, max=9.5)
    mdata = data.view(data.shape[0], -1)
    mmu = mu.view(data.shape[0], -1)
    log_prob_1 = log_sigmoid(mmu)
    log_prob_2 = log_sigmoid(-mmu)
    log_likelihood = -torch.mean((mdata*log_prob_1)+(1-mdata)*log_prob_2)
    return log_likelihood

In [15]:
def tf_negative_log_bernoulli(data, mu, mean=True, clamp=True):
    cast_shape = lambda x, d : tf.reshape(x, (K.shape(d)[0],-1))
    if clamp:
        mu = K.clip(mu, -9.5, 9.5)

    
    mdata = cast_shape(data, data)

    mmu = cast_shape(mu, data)

    log_prob_1 = tf.math.log_sigmoid(mmu)
    log_prob_2 = tf.math.log_sigmoid(-mmu)
    log_likelihood = -tf.reduce_mean((mdata*log_prob_1)+(1-mdata)*log_prob_2)
    return log_likelihood

In [16]:
# Pytorch test
negative_log_bernoulli(y_pt, y_pt) 

tensor(0.5032)

In [17]:
# TensorFlow test
tf_negative_log_bernoulli(y_tf, y_tf)

<tf.Tensor: shape=(), dtype=float32, numpy=0.50320446>

## KL Divergence loss

In [18]:
#Pytorch
def KL(mu, log_sigma):
    return 0.5*(-log_sigma + mu**2 + log_sigma.exp()).mean()

#TensorFlow
def tf_KL(mu, log_sigma):
    kl_loss = 0.5 * tf.reduce_mean(( - log_sigma + K.square(mu) + K.exp(log_sigma)))
    return kl_loss

In [19]:
#Pytorch test
KL(z_m_pt, z_l_s_pt)

tensor(1.3114)

In [20]:
#TensorFlow test
tf_KL(z_m_tf,z_l_s_tf)

<tf.Tensor: shape=(), dtype=float32, numpy=1.3113883>

## Kernel function


In [21]:
# Pytorch
def kernel(a, b): #N x M, K x M
    dist1 = (a**2).sum(dim=1).unsqueeze(1).expand(-1, b.shape[0]) #N x C
    dist2 = (b**2).sum(dim=1).unsqueeze(0).expand(a.shape[0], -1) #N x C
    dist3 = torch.mm(a, b.transpose(0, 1))
    dist = (dist1 + dist2) - (2 * dist3)
    return torch.mean(torch.exp(-dist))

In [22]:
# TensorFlow
def tf_kernel(a,b):
    dist1 = tf.expand_dims(tf.math.reduce_sum((a**2), axis=1), axis=1) * tf.ones(shape=(1,b.shape[0]))
    dist2 = tf.expand_dims(tf.math.reduce_sum((b**2), axis=1), axis=0)* tf.ones(shape=(a.shape[0], 1))
    dist3 = tf.matmul(a, tf.transpose(b, perm=[1, 0]))
    dist = (dist1 + dist2) - (2 * dist3)
    return tf.reduce_mean(tf.math.exp(-dist))

In [23]:
# And more random tensors
array_pt_1 = torch.normal(mean=0, std=1, size=(4,1))
array_pt_2 = torch.normal(mean=0, std=1, size=(2,1))

# Convert to tensors
array_tf_1, array_tf_2 = [tf.constant(x.numpy()) for x in [array_pt_1, array_pt_2]]

In [24]:
# TensorFlow test
tf_kernel(array_tf_1,array_tf_2)

<tf.Tensor: shape=(), dtype=float32, numpy=0.31417778>

In [25]:
# Pytorch Test
kernel(array_pt_1,array_pt_2)

tensor(0.3142)

## Maximum Mean Discrepancy

In [26]:
def mmd(X, z):    
    sens_attr = X[:, 0].unsqueeze(1)
    
    z_s_0 = z[sens_attr.bool().squeeze(), :]
    z_s_1 = z[~sens_attr.bool().squeeze(), :]
    
    mmd_loss = kernel(z_s_0, z_s_0) + kernel(z_s_1, z_s_1) - 2 * kernel(z_s_0, z_s_1)
    return mmd_loss

In [27]:
def tf_mmd(X, z):
    z_s_0 = tf.gather(z, tf.where(X[:,0]==0))
    z_s_1 = tf.gather(z, tf.where(X[:,0]==1))
    
    z_s_0 = tf.reshape(z_s_0, (K.shape(z_s_0)[0], K.shape(z_s_0)[-1]))
    z_s_1 = tf.reshape(z_s_1, (K.shape(z_s_1)[0], K.shape(z_s_1)[-1]))

    loss = tf_kernel(z_s_0, z_s_0) + tf_kernel(z_s_1, z_s_1) - 2 * tf_kernel(z_s_0, z_s_1)
    return loss

In [28]:
mmd(X_pt, z_pt)

tensor(0.0313)

In [29]:
tf_mmd(X_tf, z_tf)

<tf.Tensor: shape=(), dtype=float32, numpy=0.031257637>