# 神经网络

## 一、概念

神经网络由多层神经元组织，最初的神经网络相当于多个逻辑回归的合并，现在已经有了长足的发展

In [6]:
import pandas as pd
import numpy as np

data = pd.read_csv('winequality-white.csv', sep=';')
data.head(5)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [10]:
n = data.shape[0]
p = data.shape[1]-1

np.random.seed(2099)
index = np.random.permutation(n)

n_train = int(0.7*n)
train_index = index[0: n_train]
test_index = index[n_train: n]

train = data.iloc[train_index]
test = data.iloc[test_index]

train_x = train.drop('quality', axis=1)
train_x = np.array(train_x).reshape([n_train, p])
test_x = test.drop('quality', axis=1)
test_x = np.array(test_x).reshape([n - n_train, p])

train_y = np.array(train['quality']).reshape([n_train, 1])
test_y = np.array(test['quality']).reshape([n-n_train, 1])

In [14]:
def soft_max(z):
    """
    :param z: input, an p*n matrix
    :return: p*n matrix
    """
    e = np.exp(z)
    total = np.sum(e, axis=0, keepdims=True)
    weight = e / total

    return weight

In [15]:
def accuracy(y, y_hat):
    y = np.argmax(y, axis=0)
    y_hat = np.argmax(y_hat, axis=0)

    return sum(y == y_hat)/len(y)

In [16]:
def likelihood(y, y_hat):
    """
    :param y: the ture value
    :param y_hat: the predicted value
    :return: minimizing loss is the same as maximizing likelihood function, so we spare np.log
    """
    number = y.shape[1]
    
    return np.sum(y * y_hat) / number

In [17]:
def leaky_relu(x, k=0.3):
    
    return (x>0)*x + k*(x<0)*x

In [18]:
def d_leaky_relu(x, k=0.3):

    return (x > 0) + k*(x<0)

In [19]:
def init_w(b, a):
    w = np.random.randn(a * b)
    w = np.reshape(w, [b, a])
    return w

In [20]:
def init_b(b):
    bias = np.zeros([b, 1])
    return bias

In [21]:
def forward(x, parameter, cache):
    """
    :param x:input data p*n matrix
    :param parameter: a dict storing parameters
    :param cache: a dict storing computation result of each layer
    :return: the predicted value
    """
    cache['C1'] = np.dot(parameter['W1'], x) + parameter['b1']
    cache['A1'] = leaky_relu(cache['C1'])
    cache['C2'] = np.dot(parameter['W2'], cache['A1']) + parameter['b2']
    cache['A2'] = leaky_relu(cache['C2'])
    cache['C3'] = np.dot(parameter['W3'], cache['A2']) + parameter['b3']
    cache['A3'] = soft_max(cache['C3'])

    return cache

In [22]:
def back_propagation(x, y, parameter, cache, step):
    """
    X 784*n / Y 10*n
    dW1 W1 800*784, db1 b1 800*1, A1 C1, 800*n
    dW2 W2 400*800, db2 b2 400*1, A2 C2, 400*n
    dW3 W3 10*400, db3 b3 10*1, A3 C3, 10*n
    :param y: true value
    :param parameter: dictionary storing all parameters
    :param cache: dictionary storing all the computation in process
    :param step: learning rate
    :return: updated parameters
    """
    number = y.shape[1]
    cache['dC3'] = cache['A3'] - y  # 10*n
    cache['dW3'] = np.dot(cache['dC3'], cache['A2'].T)/number  # 10*400
    cache['db3'] = np.sum(cache['dC3'], axis=1, keepdims=True)/number  # 10*1
    
    parameter['W3'] = parameter['W3'] - step*cache['dW3']  # 10*400
    parameter['b3'] = parameter['b3'] - step*cache['db3']  # 10*1

    cache['dC2'] = np.dot(parameter['W3'].T, cache['dC3'])*d_leaky_relu(cache['C2'])  # 400*n
    cache['dW2'] = np.dot(cache['dC2'], cache['A1'].T)/number  # 400*800
    cache['db2'] = np.sum(cache['dC2'], axis=1, keepdims=True)/number  # 400*1

    parameter['W2'] = parameter['W2'] - step*cache['dW2']  # 400*800
    parameter['b2'] = parameter['b2'] - step*cache['db2']  # 400*1

    cache['dC1'] = np.dot(parameter['W2'].T, cache['dC2'])*d_leaky_relu(cache['C1'])  # 800*n
    cache['dW1'] = np.dot(cache['dC1'], x.T)/number  # 800*784
    cache['db1'] = np.sum(cache['dC1'], axis=1, keepdims=True)  # 800*1

    parameter['W1'] = parameter['W1'] - step*cache['dW1']  # 800*784
    parameter['b1'] = parameter['b1'] - step*cache['db1']  # 800*1

    return cache, parameter

In [23]:
def train(x, y, learning_rate=0.001, batch_size=128, epoch=5):
    """
    :param x: training data
    :param y: training label
    :param learning_rate: the length of a step
    :param batch_size: numbers of samples we train in a round
    :param epoch: rounds we train through training data
    :return: a trained set of parameters
    """
    parameter = dict()
    nx = x.shape[1]

    parameter['W1'] = init_w(800, 784)/100
    parameter['b1'] = init_b(800)
    parameter['W2'] = init_w(400, 800)/100
    parameter['b2'] = init_b(400)
    parameter['W3'] = init_w(10, 400)/100
    parameter['b3'] = init_b(10)

    index = np.array([], dtype='int')

    for i in range(0, nx, batch_size):
        index = np.append(index, i)

    index = np.append(index, nx)

    cache = dict()

    for i in range(0, epoch):
        for j in range(0, int(nx/batch_size)+1):
            one_batch_x = x[:, index[j]:index[j+1]]
            one_batch_y = y[:, index[j]:index[j+1]]

            cache = forward(one_batch_x, parameter, cache)
            prob = likelihood(one_batch_y, cache['A3'])

            acc = accuracy(one_batch_y, cache['A3'])
            
            print(str(i)+'--'+str(j)+'--'+str(index[j+1]))
            print('likelihood'+str(prob))
            print('accuracy'+str(acc))

            [cache, parameter] = back_propagation(one_batch_x, one_batch_y, parameter, cache, step=learning_rate)

    return cache, parameter

In [None]:
[cache, parameter] = train(x=train_data.T, y=labels.T, epoch=5)