### Generate some dummy data and fix neural net architecture

In [1]:
import numpy as np
np.random.seed(0)

m = 5  # number of training examples
n_x = 2 # number of features
n_y = 1 
layer_dims = [ 5, 2, n_y ]
activation_functions = ['relu', 'relu', 'sigmoid']

learning_rate = 0.1
X = np.random.rand(n_x, m)
Y = np.random.rand(n_y, m)


### Learn the parameters

In [2]:
from simple import *
num_epochs = 2000
params, final_cost = learn_params(X, Y, layer_dims, activation_functions, learning_rate, num_epochs)
final_cost

0.5594831590697362

#### Now we will compute (1) the loss for one forward pass through the model and (2) the gradients of the loss wrt the model parameters.

#### We confirm that similar calculations in tensorflow and pytorch yield the same results

In [9]:
 
from simple_external import *

# run one forward and backward pass to compute the loss and gradients
params = initialize_params(n_x, layer_dims)
AL, cache = forward_prop(X, params, activation_functions)
loss = compute_cost(Y,AL)
grads = compute_grads(AL, params, cache, X, Y)

# calculate loss and grads with tensorflow
tf_loss_tensor, tf_grads_tensors, x_ph, y_ph = loss_and_grads_from_tf(X, Y, params, layer_dims, activation_functions )
tf_loss, tf_grads = calculate_loss_and_grads_tf(X, Y, x_ph, y_ph, tf_loss_tensor, tf_grads_tensors)

# calculate loss and grads with pytorch (we call with 1 epoch and zero learning rate)
pytorch_loss, pytorch_tensor_params = calculate_loss_and_grads_pytorch(X, Y, params, activation_functions, 1, 0)
pytorch_grads = {name: value.grad for name,value in pytorch_tensor_params.items()}

# assert that the tf loss and gradients match my own ones
np.testing.assert_array_almost_equal(loss, tf_loss)
np.testing.assert_array_almost_equal(loss, pytorch_loss)

for param_name in grads.keys():
    np.testing.assert_array_almost_equal(grads[param_name], tf_grads[param_name])
    np.testing.assert_array_almost_equal(grads[param_name], pytorch_grads[param_name])


In [None]:

#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(tf_loss_tensor)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_loss_tensor)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for i in range(num_epochs):
        _ , tf_loss = sess.run([optimizer, tf_loss_tensor], feed_dict={x_ph: X, y_ph: Y})
        if i%100 is 0:
            print( tf_loss )
