In [None]:
# https://victorzhou.com/blog/intro-to-neural-networks/?fbclid=IwAR23zBcCqAXvMsc7KiB_Uyveik7mSDyrFVsBMbSzr5uQKDYvXmqHgtczQRI

In [3]:
import numpy as np

In [4]:
# coding a Neuron

# sigmoid func

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class Neuro:
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias
        
    def feedforward(self, x):
        return sigmoid(np.dot(self.weights, x) + self.bias)

my_weights = np.array([0,1]) # w1 = 0, w2 = 1
my_bias = 4
my_neuro = Neuro(my_weights, my_bias)
x = np.array([2,3])
my_res = my_neuro.feedforward(x)
print(my_res)

0.9990889488055994


In [5]:
# connect Neurons into Neural Network

# feedforward: outputs of X are inputs for hidden layer, outputs of hidden layer are inputs for classificaition layer
# X --> H --> O

# hidden layer: any layers btw first layer (input) and the last layer (output)

# coding a Neural Network

class NeuralNetwork:
    # global variable weights and bias
    def __init__(self):
        weights = np.array([0,1])
        bias = 0
        
        self.h1 = Neuro(weights, bias)
        self.h2 = Neuro(weights, bias)
        self.o1 = Neuro(weights, bias)
        
    def feedforward(self, x):
        out_h1 = self.h1.feedforward(x)
        out_h2 = self.h2.feedforward(x)
        
        out_o1 = self.o1.feedforward(np.array([out_h1, out_h2]))
        
        return out_o1
    
    
my_neural_network = NeuralNetwork()
x = np.array([2,3])
res = my_neural_network.feedforward(x)
print(res)
        

0.7216325609518421


In [7]:
# Training a NN
# train a Neural Network to predict a person's gender based on height and weight


# Step 1: define the loss
def mse_loss(y_true, y_pred):
    # y_true is np.array() object
    return ((y_true - y_pred)**2).mean()

y_true = np.array([1,0,0,1])
y_pred = np.array([0,0,0,0])
print(mse_loss(y_true, y_pred))

0.5


In [None]:
# Step 2: minimize the loss
# update model's weights and bias to minimize loss
# take (multivariate) derivatives 

# 2 input units, 2 units in one hidden layer, 1 output unit
# L(w1, w2, w3, w4, w5, w6, b1, b2, b3)

# How would L change by w1? Take partial derivative!
# dL / dw_1
# use chain rule:
# dL / dw_1 = dL / dy_pred * dy_pred / dw_1


# take single case as example

# L = (1-y_pred)**2
# dL / dy_pred = -2(1 - y_pred)

# y_pred = o1 = f(w5*h1 + w6*h2 + b3)
# h1 = f(w1*x1 + w2*x2 + b1)

# back-propagation
# to calculate partial derivative by working backwards
# dL / dw_1 = dL / dy_pred * dy_pred / dh_1 * dh_1 / dw_1

# Example
# calculate partial derivative

# Alice weight: -2, height: -1, gender: 1
# all weights set as 1, bias as 0
# h1 = f(w1*x1 + w2*x2 + b1) = f(-2 - 1 + 0) = f(-3) = 0.047
# h2 = f(w3*x1 + w4*x2 + b2) = 0.047
# o1 = f(w5*h1 + w6*h2 + b3) = f(0.047*2) = 0.524

# 0.52 does not quite favor Male (0) nor Female, we need to update the w and b
# how? compute loss !

# dL / dw_1 = dL/dy_pred * dy_pred/dh_1 * dh_1/dw_1

# dL/dy_pred = d(1-y_pred)**2/dy_pred = -2*(1-y_pred) = -0.952

# dy_pred/dh_1 = df(w5*h1 + w6*h2 + b3)/dh_1 
# = w5 * f'(w5*h1 + w6*h2 + b3) = 1 * f'(1*0.047 + 1*0.047 + 0) = f'(0.094)
# = f(0.094) * (1 - f(0.094))
# = 0.52 * 0.477 = 0.248

# dh_1/dw_1 = df(w1*x1 + w2*x2 + b1)/dw_1
# = x1 * f'(w1*x1 + w2*x2 + b1) = -2 * f'(-2-1) 
# = -2*(f(-3) * (1 - f(-3))) = -2 *(0.047 * 0.95) = -0.0895

# dL/dw_1 = dL/dy_pred * dy_pred/dh_1 * dh_1/dw_1
# = (-0.952) * 0.248 * -0.0895 = 0.021

# so dL/dw_1 = 0.021, which means L will increase tiny if w_1 is increased

In [None]:
# Step 3: Stochastic Gradient Descent SGD

# strategy to update w
# w += w - alpha * (dL/dw)
# alpha: learning rate, controls how fast we train

# update each w and b so that the loss will slowly decrease and the NN is improved

# how it works
# 1. choose one sample randomly - stochastic
# 2. calculate the partial derivatives of loss with respect to weight and bias
# e.g. dL/dw_1, dl/dw_2,  ... and dL/db
# 3. use the update equation to update each weight and bias
# 4. go back to step 1

In [1]:
# Code a Complete Neural Network
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def deriv_sigmoid(z):
    fx = sigmoid(z)
    return fx * (1 - fx)

def mse_loss(y_true, y_pred):
    return ((y_true - y_pred)**2).mean()

class MyNeuralNetwork:
    
    def __init__(self):
        # init weights
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()
        
        # init 
    

In [19]:
print(np.random.normal())

0.9825736874211026
