In [13]:
import numpy as np
import math

In [15]:
# 数据处理
def get_data():
    train_data = np.genfromtxt('iris_training.csv', delimiter=',', skip_header=1, dtype=float)
    test_data = np.genfromtxt('iris_test.csv', delimiter=',', skip_header=1, dtype=float)

    train_x = train_data[:, :-1]
    train_y = train_data[:,-1].astype(np.int64)
    test_x = test_data[:, :-1]
    test_y = test_data[:,-1].astype(np.int64)


    return train_x, train_y, test_x, test_y
    

In [17]:
# 损失计算
def compute_neural_net_loss(params, X, y, reg=0.0):
    
    # 前向传播
    W1 = params['W1']
    b1 = params['b1']
    W2 = params['W2']
    b2 = params['b2']

    num, dim = X.shape

    z1 = np.dot(X, W1) + b1
    relu = lambda x: x * (x > 0)
    h = relu(z1)

    z2 = np.dot(h, W2) + b2

    y_pred = (np.exp(z2) - np.max(z2, axis=1, keepdims=True)) / np.sum(np.exp(z2), axis=1, keepdims=True)

    # 计算损失
    loss = -np.sum(np.log(y_pred[range(num), y])) / num
    loss += 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
    # 反向传播

    dz2 = y_pred.copy()
    dz2[range(num), y] -= 1 # 损失函数对z2的导数
    dz2 /= num # 平均化，防止数值过大

    dW2 = np.dot(h.T, dz2) + reg * W2 # 损失函数对W2的导数
    db2 = np.sum(dz2, axis=0) # 损失函数对b2的导数
    dh = np.dot(dz2, W2.T) # 损失函数对h的导数
    dz1 = dh * (z1 > 0)
    dW1 = np.dot(X.T, dz1) + reg * W1
    db1 = np.sum(dz1, axis=0)

    grads = {}
    grads['W2'] = dW2
    grads['b2'] = db2
    grads['W1'] = dW1
    grads['b1'] = db1

    return loss, grads

In [9]:
# 预测
def predict(params, X):

    W1, b1 = params['W1'], params['b1']
    W2, b2 = params['W2'], params['b2']

    relu = lambda x: x * (x > 0)

    z1 = np.dot(X, W1) + b1
    h = relu(z1)
    z2 = np.dot(h, W2) + b2
    y_pred = np.argmax(z2, axis=1)

    return y_pred

In [10]:
# 准确率
def acc(y, y_pred):
    return np.mean(y == y_pred)

In [19]:
# 随机梯度下降
def sgd_update(params, grads, learning_rate):
    """
    Perform sgd update for parameters in params.
    """
    for key in params:
        params[key] += -learning_rate * grads[key]


In [20]:
# 训练
def train(X, y, Xtest, ytest, learning_rate=1e-3, reg=1e-5, epochs=100, batch_size=20):
    num_train, dim = X.shape
    num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
    num_iters_per_epoch = int(math.floor(1.0*num_train/batch_size))
    
    # In this exercise, we are going to work with a two-layer neural network
    # first layer is a relu layer with 10 units, and second one is a softmax layer.
    # randomly initialize parameters
    params = {}
    std = 0.001
    params['W1'] = std * np.random.randn(dim, 10)
    params['b1'] = np.zeros(10)
    params['W2'] = std * np.random.randn(10, num_classes)
    params['b2'] = np.zeros(num_classes)

    for epoch in range(max_epochs):
        perm_idx = np.random.permutation(num_train) 
        '''
        在每个 epoch 开始时，使用 np.random.permutation 函数对训练样本的索引进行随机打乱
        '''
        # perform mini-batch SGD update
        for it in range(num_iters_per_epoch):
            idx = perm_idx[it*batch_size:(it+1)*batch_size]
            batch_x = X[idx]
            batch_y = y[idx]
            
            # evaluate loss and gradient
            loss, grads = compute_neural_net_loss(params, batch_x, batch_y, reg)

            # update parameters
            sgd_update(params, grads, learning_rate)
            
        # evaluate and print every 10 steps
        if epoch % 10 == 0:
            train_acc = acc(y, predict(params, X))
            test_acc = acc(ytest, predict(params, Xtest))
            print('Epoch %4d: loss = %.2f, train_acc = %.4f, test_acc = %.4f' \
                % (epoch, loss, train_acc, test_acc))
    
    return params


In [21]:
# 测试
max_epochs = 200
batch_size = 20
learning_rate = 0.1
reg = 0.001

train_x, train_y, test_x, test_y = get_data()
params = train(train_x, train_y, test_x, test_y, learning_rate, reg, max_epochs, batch_size)

# Classify two new flower samples.
def new_samples():
    return np.array(
      [[6.4, 3.2, 4.5, 1.5],
       [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
new_x = new_samples()
predictions = predict(params, new_x)

print("New Samples, Class Predictions:    {}\n".format(predictions))

Epoch    0: loss = 1.14, train_acc = 0.3500, test_acc = 0.2667
Epoch   10: loss = 1.12, train_acc = 0.3500, test_acc = 0.2667
Epoch   20: loss = 0.55, train_acc = 0.7583, test_acc = 0.6000
Epoch   30: loss = 0.34, train_acc = 0.7917, test_acc = 0.8667
Epoch   40: loss = 0.45, train_acc = 0.9333, test_acc = 0.9333
Epoch   50: loss = 0.15, train_acc = 0.9667, test_acc = 0.9667
Epoch   60: loss = 0.19, train_acc = 0.9583, test_acc = 0.9667
Epoch   70: loss = 0.08, train_acc = 0.9500, test_acc = 0.9333
Epoch   80: loss = 0.12, train_acc = 0.9333, test_acc = 0.9333
Epoch   90: loss = 0.10, train_acc = 0.9917, test_acc = 0.9667
Epoch  100: loss = 0.19, train_acc = 0.9667, test_acc = 0.9667
Epoch  110: loss = 0.60, train_acc = 0.6917, test_acc = 0.7333
Epoch  120: loss = 0.37, train_acc = 0.9333, test_acc = 0.9333
Epoch  130: loss = 0.19, train_acc = 0.9750, test_acc = 0.9667
Epoch  140: loss = 0.07, train_acc = 0.9417, test_acc = 0.9667
Epoch  150: loss = 0.18, train_acc = 0.9083, test_acc =