<strong style="font-size: 125%">第八单元：神经网络模型</strong>
<br><br>
神经网络模型是构成深度学习的核心。这一单元我们介绍大名鼎鼎的前馈神经网络。和以往一样，我们先导入一些工具函数和Lending Club的数据。

In [None]:
import numpy as np
import math

def gini(actual, pred, cmpcol = 0, sortcol = 1):
    assert (len(actual) == len(pred))
    all = np.asarray(np.c_[actual, pred, np.arange(len(actual))], dtype=np.float)
    all = all[np.lexsort((all[:, 2], -1 * all[:, 1]))]
    totalLosses = all[:, 0].sum()
    giniSum = all[:, 0].cumsum().sum() / totalLosses

    giniSum -= (len(actual) + 1) / 2.
    return giniSum / len(actual)


def gini_normalized(actual, pred):
    return gini(actual, pred) / gini(actual, actual)

def accuracy(prediction, actual):
    pred_actual_pairs = list(zip(prediction, actual))
    sorted_pred_actual_pairs = sorted(pred_actual_pairs, key = lambda x: x[0])
    pos = [int(len(sorted_pred_actual_pairs)*t) for t in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]]
    cutoffs = [sorted_pred_actual_pairs[t][0] for t in pos]
    cutoffs.insert(0, 0)
    cutoffs.append(1)
    pred_actual_groups = [[(x[0], x[1]) for x in sorted_pred_actual_pairs if x[0]>cutoffs[t] and x[0]<=cutoffs[t+1]] for t in range(10)]
    pred_actual_group_average = [(mean([t[0] for t in group]), mean([t[1] for t in group])) for group in pred_actual_groups]
    acc = 1 - sum([abs(t[0]-t[1]) for t in pred_actual_group_average])/(10*mean([t[1] for t in pred_actual_group_average]))
    return acc, pred_actual_group_average

def mean(v):
    return sum(v) / len(v)

def variance(v):
    n = len(v)
    ave = mean(v)
    deviation = [x-ave for x in v]
    if n == 0:
        return 0
    else:
        return sum([x**2 for x in deviation]) / (n-1)

def std_dev(v):
    return math.sqrt(variance(v))

def scale(v):
    ave = mean(v)
    std = std_dev(v)
    return [(x-ave)/std for x in v]

def dot(v1, v2):
    return sum([a*b for a, b in zip(v1, v2)])

In [None]:
import csv
with open('/home/nbuser/library/lending_club_train.csv', 'r') as f:
    reader = csv.reader(f)
    lending_club_train = list(reader)
    
loan_amnt = [int(x[0]) for x in lending_club_train]
term = [x[1] for x in lending_club_train]
int_rate = [float(x[2]) for x in lending_club_train]
installment = [float(x[3]) for x in lending_club_train]
grade = [x[4] for x in lending_club_train]
sub_grade = [x[5] for x in lending_club_train]
emp_length = [x[6] for x in lending_club_train]
home_ownership = [x[7] for x in lending_club_train]
annual_income = [float(x[8]) for x in lending_club_train]
verification_status = [x[9] for x in lending_club_train]
purpose = [x[10] for x in lending_club_train]
dti = [-99999 if x[11]=='' else float(x[11]) for x in lending_club_train]
delinq_2yrs = [x[12] for x in lending_club_train]
loan_status = [x[13] for x in lending_club_train]
issue_d = [x[14] for x in lending_club_train]

dep_var = [1 if x in ("Charged Off", "Default") else 0 for x in loan_status]

In [None]:
def category_to_rate_mapper (list_x, list_y):
    from collections import defaultdict
    y_by_x  = defaultdict(list)
    AveDepVar_by_x = defaultdict(float)
    data_xy = zip(list_x, list_y)
    for x, y in data_xy:
        y_by_x[x].append(y)
    AveDepVar_by_x = {x: sum(y_by_x[x])/len(y_by_x[x]) for x in y_by_x}
    return AveDepVar_by_x

grade_mapper = category_to_rate_mapper(grade, dep_var)
grade_rate = [grade_mapper[x] for x in grade]

dti_2 = [x if x>0 else 0 for x in dti]

emp_length_A, emp_length_B = [], []
for x in emp_length:
    if x =='':
        emp_length_A.append(1)
        emp_length_B.append(0)
    elif x=='10+ years':
        emp_length_A.append(0)
        emp_length_B.append(1)
    else:
        emp_length_A.append(0)
        emp_length_B.append(0)

In [None]:
import csv
with open('/home/nbuser/library/lending_club_test.csv', 'r') as f:
    reader = csv.reader(f)
    lending_club_test = list(reader)

t_loan_amnt = [int(x[0]) for x in lending_club_test]
t_term = [x[1] for x in lending_club_test]
t_int_rate = [float(x[2]) for x in lending_club_test]
t_installment = [float(x[3]) for x in lending_club_test]
t_grade = [x[4] for x in lending_club_test]
t_sub_grade = [x[5] for x in lending_club_test]
t_emp_length = [x[6] for x in lending_club_test]
t_home_ownership = [x[7] for x in lending_club_test]
t_annual_income = [float(x[8]) for x in lending_club_test]
t_verification_status = [x[9] for x in lending_club_test]
t_purpose = [x[10] for x in lending_club_test]
t_dti = [-99999 if x[11]=='' else float(x[11]) for x in lending_club_test]
t_delinq_2yrs = [x[12] for x in lending_club_test]
t_loan_status = [x[13] for x in lending_club_test]
t_issue_d = [x[14] for x in lending_club_test]

t_dep_var = [1 if x in ("Charged Off", "Default") else 0 for x in t_loan_status]

In [None]:
t_grade_rate = [grade_mapper[x] for x in t_grade]

t_dti_2 = [x if x>0 else 0 for x in t_dti]

t_emp_length_A, t_emp_length_B = [], []
for x in t_emp_length:
    if x =='':
        t_emp_length_A.append(1)
        t_emp_length_B.append(0)
    elif x=='10+ years':
        t_emp_length_A.append(0)
        t_emp_length_B.append(1)
    else:
        t_emp_length_A.append(0)
        t_emp_length_B.append(0)

前馈神经网络就是由神经单元组成一个前向传播的网络结构。下面是神经网络的一个图示。
<br>

<div>
    <img src="attachment:image.png" width="50%">
</div>

数据或者信息在神经网络中是如何传播的呢？这涉及到每个神经单元的具体结构。下面是神经单元如何将输入信息转换成输出。
<br><br><br>

<div>
    <img src="attachment:image.png" width="35%">
</div>
<br>

神经单元中的函数$f(x)$称为激励函数，一种比较流行的做法是将激励函数取成逻辑(logistic)函数，即$f(x)=\frac{1}{1+e^{-x}}$。

下面看一下一个标准的前馈神经网络是如何实现的。

In [None]:
import math
def sigmoid(t):
    return 1 / (1 + math.exp(-t))

In [None]:
def neuron_output(weights, inputs): 
    return sigmoid(dot(weights, inputs))

In [None]:
def feed_forward(neural_network, input_vector):
    outputs = []
    for layer in neural_network:
        input_with_bias = input_vector + [1] 
        output = [neuron_output(neuron, input_with_bias) for neuron in layer] 
        outputs.append(output) #
        input_vector = output 
    return outputs

我们用刚才写的函数来模拟著名的异或门逻辑。下图是异或门的神经网络图示。

<div>
    <img src="attachment:image.png" width="50%">
</div>
<br>

下面是对于此神经网络的模拟。

In [None]:
xor_network = [[[20, 20, -30],[20, 20, -10]], [[-60, 60, -30]]]


In [None]:
for x in [0,1]: 
    for y in [0,1]:
        print(x, y, feed_forward(xor_network,[x, y])[-1])

那么给定一个神经网络的结构以及输入和输出的数据，如何对神经网络中具体的参数进行优化，使得输出数据和具体的目标值相吻合。和前几个单元一样，我们用梯度下降法来对误差函数或者损失函数最小化。这儿的关键是如何求整个神经网络误差函数的梯度，也就是神经网络的误差函数对于任意一个参数（即任意两个神经单元之间的连接权重）的导数

假设误差函数是均方差，即$E=(output - target)^2$，那么用链式法则可以得到以下结论。

如果$w_{ji}$是隐层和输出层之间的连接权重， 那我们有$\frac{\partial E}{\partial w_{ji}} = (y_i-t_i)y_i(1-y_i)y_j$。我们也可以把这个式子写成下面的形式：
<br><br>
$\delta_i=(y_i-t_i)y_i(1-y_i)$, 
<br><br>
$\frac{\partial E}{\partial w_{ji}} = \delta_i y_j$
<br><br>

如果$w_{ji}$是隐层和隐层(或输入层和隐层)之间的连接权重，那我们可把$\frac{\partial E}{\partial w_{ji}}$表示成下式：
<br><br>
$\delta_i=y_i(1-y_i)\sum{\delta_{i^\prime}w_{ii^\prime}}$, 
<br><br>
$\frac{\partial E}{\partial w_{ji}} = \delta_i y_j$
<br><br>

这儿对前一层（隐层）求$\delta_i$，就是将后一层的$\delta_{i^\prime}$通过神经元之间的连接反向传播回来，所以此算法又称反向传播算法。有了上面的知识后，我们可以来实现前馈神经网络的训练算法了。

In [None]:
lr = 0.1
def backpropagate(network, input_vector, targets):
    hidden_outputs, outputs = feed_forward(network, input_vector)
    output_deltas = [output * (1 - output) * (output - target) 
                     for output, target in zip(outputs, targets)]
    for i, output_neuron in enumerate(network[-1]):
        for j, hidden_output in enumerate(hidden_outputs + [1]):
            output_neuron[j] -= output_deltas[i] * hidden_output * lr
            
    hidden_deltas = [hidden_output * (1 - hidden_output) * 
                     dot(output_deltas, [n[i] for n in output_layer])
                     for i, hidden_output in enumerate(hidden_outputs)]
    
    for i, hidden_neuron in enumerate(network[0]): 
        for j, input in enumerate(input_vector + [1]):
            hidden_neuron[j] -= hidden_deltas[i] * input * lr

下面我们用前馈神经网络对Lending Club的数据进行拟合。

In [None]:
import random
random.seed(50)
input_size = 4
num_hidden = 2
output_size = 1

In [None]:
hidden_layer = [[random.random() for __ in range(input_size + 1)] 
                for __ in range(num_hidden)]
output_layer = [[random.random() for __ in range(num_hidden + 1)] 
                for __ in range(output_size)]
network = [hidden_layer, output_layer]

In [None]:
inputs = [list(row) for row in zip(emp_length_A, emp_length_B, grade_rate, scale(dti_2))]
targets = [[row] for row in dep_var]

In [None]:
t_inputs = [list(row) for row in zip(t_emp_length_A, t_emp_length_B, t_grade_rate, scale(t_dti_2))]

In [None]:
for __ in range(20):
    for input_vector, target_vector in zip(inputs, targets):
        backpropagate(network, input_vector, target_vector)

In [None]:
def predict(input):
    return feed_forward(network, input)[-1]

In [None]:
prediction = [predict(input)[0] for input in inputs]

In [None]:
gini_normalized(dep_var, prediction)

In [None]:
t_prediction = [predict(input)[0] for input in t_inputs]

In [None]:
gini_normalized(t_dep_var, t_prediction)

In [None]:
accuracy(prediction, dep_var)

In [None]:
accuracy(t_prediction, t_dep_var)