# neural network

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

- forward:
    - 
    - 
- gradient descent:
    - gradient:
        - $G_{W}:$
        - $G_{b}:$
      
    - loss function
        -  $Loss =- \sum_{i=1}^{m} ylog(y_{hat}) + (1-y)log(1-y_{hat}) * (1/m) $
    - update
        - $W \leftarrow  W- \alpha G_{W}$
        - $b \leftarrow b- \alpha G_{b}$


In [2]:
np.random.seed(0)

In [41]:
def linear(A_prev,W,b):
     return np.dot(A_prev,W)+b
    
def der_linear(A_prev):
#     np.dot(A_prev,W)+b
#  derivative respect to w and b 
     return A_prev,1
    
def relu(Z):
    return np.maximum(Z,0)
    
def der_relu(Z):
    return np.where(Z>=0,1,0)

def sigmoid(Z):
    return 1/(1+np.exp(-Z))
    
def der_sigmoid(Z):
    A = sigmoid(Z)
    return A*(1-A)
    
def loss(A,Y):
    m = Y.shape[0]
    loss = -(1/m)*np.sum(Y*np.log(A) + (1-Y)*np.log(1-A))
    return loss

def loss_der(A,Y):
    m = Y.shape[0]
    loss_der = - (1/m) * np.sum((Y/A)  + (1-Y)/(1-A)) 
    return loss_der

In [None]:
def initialize_parameter(n_x, n_h, n_y):
    W1 = np.random.rand(n_x, n_h)
    b1 =  np.random.rand(1，n_h)
    W2 = np.random.rand(n_h, n_y)
    b2 =  np.random.rand(1，n_y)
    
    parameters = {
        "W1": W1,
         "b1": b1,
         "W2": W2,
         "b2": b2
                 }
    return parameters


In [None]:
def forward(X,params):
    # define params
    W1 = params['W1']
    b1 = params['b1']
    W2 = params['W2']
    b2 = params['b2']
    
    ## layer 1 linear+relu
    Z1 = linear(X,W1,b1)
    A1 = relu(Z1)

    ## layer 2:linear+sigmoid
    Z2 = linear(A1,W2,b2)
    A2 = sigmoid(Z2)
    
    cache= {
        'X':X
        'Z1':Z1
        'A1':A1
        'Z2':Z2
    }
    return A2,cache

def predict(X,params):
    y_test_hat,_ = forward(X,params)
    y_test_hat = np.where(y_test_hat > 0.5, 1,0)
    return y_test_hat

- Z= WX+b → A = σ(Z) → L=loss(A,Y)
- dZ/dW ← dA/dZ ←  dL/dA


- update parameters W and b use gradient descent
- gradient descent：
    - Wn = Wn - α dL/dw
    - bn = bn - α dL/db
- want：dL/dw , dL/db
- use chain rule：dL/db = dL/dA * dA/dZ * dZ/dw
    - dL/dA :
        - (1/m) * sum((Y/A)  + (1-Y)/(1-A)) 
    - dA/dZ :
        - relu : 1 if a >0 else 0 
        - sigmoid:A(1-A)
    - dZ/dw :
        - X

In [45]:
np.array([2,1])*np.array([2,1])

array([4, 1])

In [49]:
A = np.array([10,1,-1])
der_relu(A)

array([1, 1, 0])

In [50]:
def backward(Y_hat , Y, cache):
    
    m = Y.shape[0]
#     layer 2
    dL_dA2 = der_loss(Y_hat,Y)
    dA2_dZ2 = der_sigmoid(cache['Z2'])
    dZ2_dW2,dZ2_db2 = der_linear(cache['A1'])
    
    dL_dW2 = dL_dA2*dA2_dZ2*dZ2_dW2
    dL_db2 =  dL_dA2*dA2_dZ2*dZ2_db2
    
#     layer 1
    dZ2_dA1 = W2
    dA1_dZ1 = der_relu(cache['Z1'])
    dZ1_dW1,dZ1_db1= der_linear(cache['X'])
    
    dL_dW1 = dL_dA2 *dA2_dZ2 * dZ2_dA1*dA1_dZ1*dZ1_dW1
    dL_db1 =  dL_dA2 *dA2_dZ2 * dZ2_dA1*dA1_dZ1*dZ1_db1
    
    grads = {
        'dW1':dL_dW1
        ,'db1':dL_db1
        ,'dW2':dL_dW2
        ,'db2':dL_db2
    }
    return grads
    

def update(params,grads):
    
    params['W1'] -= LR * grads['dW1']  
    params['b1'] -= LR * grads['db1']  
    params['W2'] -= LR * grads['dW2']  
    params['b2'] -= LR * grads['db2']  
    return params

In [51]:
### initialize parameter
ITER = 5
LR = 0.01
TEST_SIZE = 0.3
grads = {
    'dw':0
    ,'db':0
}

### load data
data_iris = load_iris()
X,y = data_iris['data'],data_iris['target']
# idx = y!=2
# X,y =X[idx],y[idx]
# idx_shuffle = np.random.permutation(range(X.shape[0]))
# X,y =X[idx_shuffle],y[idx_shuffle]
X,X_test ,y, y_test = train_test_split(X,y ,test_size = TEST_SIZE ,random_state = 0)

print(X.shape)
print(y.shape)
print(X_test.shape)
print(y_test.shape)

(105, 4)
(105,)
(45, 4)
(45,)


In [52]:
#initialize
w,b = initialize_parameter(X.shape[1])

for i in range(100):
#     forward
    y_hat = forward(X,w,b)
#     calculate grad,cost
    grads = backward(y_hat , y)
#     update
    w = w - LR * grads['dw']  
    b = b - LR * grads['db']
    #calculate loss,matrics
    if i%10 == 0 :
        cost = loss(y_hat , y)
        y_test_hat = predict(X_test,w,b)
        accuracy =  accuracy_score(y_test_hat,y_test)
        print('iter:{} , cost:{} ,accuracy:{} '.format(i,cost,accuracy))


TypeError: initialize_parameter() missing 2 required positional arguments: 'n_h' and 'n_y'

### test array 

In [6]:
a1 = np.array([[1,2,3],[4,5,6]])
a2 = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(a1.shape,a2.shape)

(2, 3) (3, 4)


In [7]:
np.dot(a1,a2)

array([[ 38,  44,  50,  56],
       [ 83,  98, 113, 128]])