##c.2) use the pure tensorflow tensor data structure to build the same but with auto differentiation primitives

---



---






In [9]:
import numpy as np
import random
from numpy import array
from numpy.random import uniform
from numpy import hstack
import tensorflow as tf
from tensorflow.keras.layers import Layer

In [10]:

n, d = 400, 3
num_hidden1=5 # number of neurons for first hidden layer
num_hidden2=4 # numbe rof neurons for second hidden layer
x = tf.random.uniform(minval=-1, maxval=1, shape = (n, d))
x2 = tf.cast(x**2, tf.float32)
weights_true = tf.cast(tf.constant([[5,1,1],[4,1,1]]),tf.float32)
bias_true = tf.cast(tf.constant([1,2]), tf.float32)
y_true = tf.matmul(x2, weights_true, transpose_b=True) + tf.matmul(x,weights_true,transpose_b=True) + bias_true

print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')


x: (400, 3), weights: (2, 3), bias: (2,), y: (400, 2)


In [11]:

class Linear(Layer):
  """y = w.x + b"""

  def __init__(self, units=32):
      super(Linear, self).__init__()
      self.units = units

  def build(self, input_shape):
      self.w = self.add_weight(shape=(input_shape[-1], self.units),
                               initializer='random_normal',
                               trainable=True)
      self.b = self.add_weight(shape=(self.units,),
                               initializer='random_normal',
                               trainable=True)

  def call(self, inputs):
      return tf.matmul(inputs, self.w) + self.b

In [12]:
class MLP(Layer):
    """Simple stack of Linear layers."""

    def __init__(self):
        super(MLP, self).__init__()
        self.linear_1 = Linear(22)
        self.linear_2 = Linear(12)
        self.linear_3 = Linear(2)

    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.linear_2(x)
        x = tf.nn.relu(x)
        return self.linear_3(x)

In [13]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)
mse_loss_fn = tf.keras.losses.MeanSquaredError()
loss_metric = tf.keras.metrics.Mean()

dataset = tf.data.Dataset.from_tensor_slices((x, y_true))
dataset = dataset.shuffle(buffer_size=1).batch(30)

epochs = 1000
mlp = MLP()
# Iterate over epochs.
for epoch in range(epochs):

    # Iterate over the batches of the dataset.
    for step, (x_batch, y_batch) in enumerate(dataset):
        with tf.GradientTape() as tape:
            # Forward Pass
            y_pred_batch = mlp(x_batch)
            # Compute loss
            loss = mse_loss_fn(y_batch, y_pred_batch)
            loss += sum(mlp.losses)  # Add regularization loss

        grads = tape.gradient(loss, mlp.trainable_weights) # Use autograd
        optimizer.apply_gradients(zip(grads, mlp.trainable_weights)) # Update learnable parameter

        loss_metric(loss)
    if epoch % 100 == 0:
      print(f'Epoch {epoch}, loss {loss_metric.result()}')

Epoch 0, loss 20.22142219543457
Epoch 100, loss 10.22322940826416
Epoch 200, loss 5.639126777648926
Epoch 300, loss 3.9238080978393555
Epoch 400, loss 3.03381609916687
Epoch 500, loss 2.4840896129608154
Epoch 600, loss 2.109863758087158
Epoch 700, loss 1.8381578922271729
Epoch 800, loss 1.631091594696045
Epoch 900, loss 1.4671825170516968


In [14]:
def plot_intereactive_3d(x, y, y_pred=None):
  import plotly.graph_objects as go

  fig = go.Figure()
  fig.add_trace(go.Scatter3d(x = x[:,0],
                    y = x[:,1],
                    z = y.reshape([-1]),
                    opacity=0.5, mode='markers', name='Underlying Function'
                    ))
 
  if y_pred is not None:
    fig.add_trace(go.Scatter3d(x = x[:,0],
                   y = x[:,1],
                   z = y_pred.reshape([-1]),
                   opacity=0.5, mode='markers', name='Predicted Function'
                  ))
    
  fig.update_layout(scene = dict(
                    xaxis_title='X1',
                    yaxis_title='X2',
                    zaxis_title='Y'),
                    width=700,
                    margin=dict(r=20, b=10, l=10, t=10))
  fig.show()
  

In [15]:
from sklearn.manifold import TSNE
X_red = TSNE(n_components=2).fit_transform(x)
y_true_red = TSNE(n_components=1).fit_transform(y_true)
y_pred_red = TSNE(n_components=1).fit_transform(mlp(x))
print(f'X_red: {X_red.shape}, y_true_red: {y_true_red.shape}, y_pred_red: {y_pred_red.shape}')
plot_intereactive_3d(X_red,y_true_red,y_pred_red)

X_red: (400, 2), y_true_red: (400, 1), y_pred_red: (400, 1)


In [16]:
X_red

array([[ 19.77658   ,  -6.925228  ],
       [ -6.547952  ,  15.624225  ],
       [  9.609563  ,  14.7231245 ],
       [ -6.839604  ,  17.177654  ],
       [-13.452702  ,   4.9885554 ],
       [-10.812914  ,  14.31919   ],
       [  0.6373818 ,   3.787793  ],
       [  2.3624618 ,  15.367981  ],
       [-15.399062  , -17.66101   ],
       [  5.7396097 ,   3.9930198 ],
       [  5.6510396 ,  12.846828  ],
       [ -2.539715  ,  13.457198  ],
       [  8.238734  , -11.03813   ],
       [ -7.431726  ,  10.197801  ],
       [  1.9483135 ,  13.263455  ],
       [  4.489432  ,  -8.554598  ],
       [  7.680925  , -11.831121  ],
       [-17.844534  , -17.294296  ],
       [-13.54578   ,  -6.567796  ],
       [ 14.465738  ,  -6.3270736 ],
       [  2.8764226 ,  13.337353  ],
       [ 17.927467  ,  -1.2556547 ],
       [-15.53695   ,  10.428744  ],
       [ -9.406345  ,  15.569482  ],
       [ 17.674187  , -11.593698  ],
       [ -5.3498187 ,  10.751678  ],
       [ -8.09183   ,  17.185707  ],
 