# Neural Network

In [2]:
import numpy as np
import pandas as pd
import scipy.io as scio

In [3]:
data = scio.loadmat('ex3data1.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [84]:
X = data['X']
y = data['y']

In [19]:
def sigmoidFunc(x):
    z = 1 / (1 + np.exp(-x))
    return z

In [27]:
def costFunc(theta,X,y):
    X = np.matrix(X)
    y = np.matrix(y)
    theta = np.matrix(theta)

    J = np.multiply(-y,np.log(sigmoidFunc(X * theta.T))) - np.multiply((1-y), np.log(1 - sigmoidFunc(X * theta.T)))

    cost = np.sum(J) / len(X)
    return cost

In [65]:
def costFuncReg(theta,X,y,lamb):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    
    J = np.multiply(-y, np.log(sigmoidFunc(X * theta.T))) - np.multiply(1-y, np.log(1 - sigmoidFunc(X * theta.T)))
    R = lamb / (len(X) * 2) * np.sum(np.power(theta[:, 1:theta.shape[1]], 2))

    cost = np.sum(J)/len(X) + R
    return cost

In [29]:
def loopGradientDescent(theta,X,y,lamb):
    X = np.matrix(X)
    y = np.matrix(y)
    theta = np.matrix(theta)

    parameters = int(theta.ravel().shape[1])
    grad = np.zeros(parameters)

    error = sigmoidFunc(X * theta.T) - y

    for j in range(parameters):
        term = np.multiply(error, X[:,j])
        if j == 0:
            grad[j] = np.sum(term)/len(X)
        else:
            grad[j] = (np.sum(term) / len(X)) + (lamb / len(X)) * theta[:,j]
    return grad

In [50]:
def gradientDescent(theta,X,y,lamb):
    X = np.matrix(X)
    y = np.matrix(y)

    theta = np.matrix(theta)

    error = sigmoidFunc(X * theta.T) - y

    grad =  ((X.T * error) / len(X)).T + (lamb / len(X)) * theta
    grad[0,0] = np.sum(np.multiply(error, X[:,0])) / len(X)

    return np.array(grad).ravel()

In [71]:
from scipy.optimize import minimize

def oneVsAll(X,y,k_nums,lamb):
    rows = X.shape[0]
    features = X.shape[1]

    parameters = np.zeros((k_nums, features+1))

    X = np.insert(X, 0, values = np.ones(rows), axis = 1)

    for i in range(1, k_nums+1):
        theta = np.zeros(features+1)
        y_i = np.array([1 if item == i else 0 for item in y])
        y_i = np.reshape(y_i,(rows,1))
        
        theta_i = minimize(fun=costFuncReg, x0=theta, args=(X, y_i, lamb), method='TNC', jac=gradientDescent)

        parameters[i-1,:] = theta_i.x
    return parameters

In [78]:
def predictAll(X,parameters):
    rows = X.shape[0]
    features = X.shape[1]
    k_nums = parameters.shape[0]

    X = np.insert(X, 0, values = np.ones(rows), axis = 1)
    theta = np.matrix(parameters)

    a = sigmoidFunc(X * theta.T)
    out = np.argmax(a, axis = 1)
    return out

In [76]:
k_nums = 10
lamb = 0.01
para = oneVsAll(X, y, k_nums,lamb)
# para计算量很大，运行很慢
para

array([[-4.29071517e+00, -2.24312238e-02,  0.00000000e+00, ...,
         1.63869050e-02, -1.56160749e-12,  0.00000000e+00],
       [-4.70758625e+00, -4.60171322e-02,  0.00000000e+00, ...,
         9.91064290e-02, -5.19164526e-03,  0.00000000e+00],
       [-7.10248566e+00, -2.86020206e-02,  0.00000000e+00, ...,
        -1.76555125e-05, -1.68290820e-08,  0.00000000e+00],
       ...,
       [-1.01939780e+01, -1.30686656e-01,  0.00000000e+00, ...,
        -5.71690820e-03,  6.60325036e-04,  0.00000000e+00],
       [-6.75038923e+00, -9.86252408e-02,  0.00000000e+00, ...,
        -2.22479316e-02,  1.25931747e-03,  0.00000000e+00],
       [-1.32933641e+01, -5.77002255e-02,  0.00000000e+00, ...,
        -4.26986342e-05,  3.00124699e-09,  0.00000000e+00]])

In [83]:
y_pre = predictAll(X,para)

# pra的索引位为0~9，label为1~10.因此a+1 == b ?
precision = [1 if a+1 == b else 0 for (a,b) in zip(y_pre,y)]
accuary = sum((map(int,precision))) / float(len(precision))
accuary

0.9734