In [1]:
import numpy as np
from tensorflow.keras.datasets import mnist

In [2]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [3]:
m, h, w = train_X.shape
num_classes = 10

In [5]:
print("num examples:", m)
print("height of the image:", h)
print("width of the image:", w)

num examples: 60000
height of the image: 28
width of the image: 28


In [7]:
'''
This notebook implements a plain NN from scratch, using numpy as the only dependency.
Algo: batch gradient descent.
Each layer uses sigmoid activation function. The last layer - softmax activation
'''
def train_nn(train_X, train_y, layers, num_epochs, learning_rate):
    #layers -> an array where len(layers) == num_layers, 
    #and layers[i] -> number of hidden units in i+1-th layer 
    parameters = initialize_parameters(layers)#use xavier initialization
    for i in range(num_epochs):
        cache = for_prop()
        d_params = back_prop(cache)#this updates parameters
        update_params(d_params)
        compute_loss()
        

'\nThis notebook implements a plain NN from scratch, using numpy as the only dependency\n'

In [None]:
initialize_parameters()
sigmoid()
one_hot_encoding()
# sigmoid_derivative()
for_prop()
# back_prop()
# softmax_activation?
# compute_loss()
# update_params(d_params)
# gradient_checking()

In [17]:
#given an np array, compute the elementwise sigmoid
def sigmoid(z):
    return 1/(1+np.exp(-z))

import math
sigmoid(np.array([math.inf, -math.inf]))
sigmoid(np.array([0,0]))

array([0.5, 0.5])

In [24]:
def initialize_parameters(layers):
    L = len(layers)
    parameters = {}
    for l in range(L-1):
        n_in = layers[l]#number of input units
        n_out = layers[l+1]#number of output units
        #xavier initialization for W
        parameters["W"+str(l+1)] = np.random.randn(n_out, n_in)*np.sqrt(1/(n_out+n_in))
        parameters["b"+str(l+1)] = np.zeros((n_out, 1))
    return parameters

params = initialize_parameters([5,4,4,3])
for key in params:
    print(key, "associated mat:\n", params[key])

W1 associated mat:
 [[-0.30975372 -0.02570985  0.03452575 -0.0338122  -0.15374349]
 [ 0.14178843  0.39882064  0.28817877  0.2410015   0.21077359]
 [-0.17032731 -0.34827163  0.32654291  0.33598577  0.32584743]
 [ 0.06765052 -0.20619857 -0.53953438 -0.2516183  -0.26522662]]
b1 associated mat:
 [[0.]
 [0.]
 [0.]
 [0.]]
W2 associated mat:
 [[-0.17150038  0.12210365 -0.01334078 -0.21464894]
 [-0.26890502  0.4802588  -0.23172553 -0.25110254]
 [-0.04777167  0.02828064  0.32422867 -0.50301718]
 [ 0.30523133  0.20739956  0.10669511  0.05081905]]
b2 associated mat:
 [[0.]
 [0.]
 [0.]
 [0.]]
W3 associated mat:
 [[-0.25802778 -0.41020653 -0.09972408 -0.13473209]
 [ 0.26012368  0.01810268  0.4960611   0.07065452]
 [-0.43824347 -0.39730782 -0.15219173  0.19756516]]
b3 associated mat:
 [[0.]
 [0.]
 [0.]]


In [37]:
def for_prop_l(A_prev, W, b):
    Z = np.dot(W,A_prev)+b
    A = sigmoid(Z)
    return A
    
def for_prop(X, parameters, layers):
    L = len(layers)
    A = X
    for l in range(L-1):
        W = parameters['W'+str(l+1)]
        b = parameters['b'+str(l+1)]
        A = for_prop_l(A, W, b)
    return A

In [38]:
X = np.array([[1,1,1,1,1],[2,2,2,2,2]])
X = X.T
print(X.shape)
A = for_prop(X, params,[5,4,4,3])
print(A)

(5, 2)
[[0.38437521 0.37996516]
 [0.60776942 0.61237131]
 [0.40488275 0.39941156]]


In [46]:
def softmax(x):#do columnwise-softmax
    t = np.exp(x)
    return t/t.sum(axis=0)
x = np.array([[5,2,-1,3],[1,1,1,1]])
x = x.T
print(softmax(x))

[[0.84203357 0.25      ]
 [0.04192238 0.25      ]
 [0.00208719 0.25      ]
 [0.11395685 0.25      ]]


In [43]:
def cross_entropy_cost()

[148.4131591    7.3890561    0.36787944  20.08553692]
