##  手写一个浅层神经网络
1. 定义网络结构
2. 初始化参数
3. 循环
    1. 前向传播
    2. 计算损失
    3. 反向传播
    4. 更新参数
4. 结束

In [86]:
import numpy as np
import h5py
import scipy
from PIL import Image
from scipy import ndimage
import data
load_dataset = data.load_dataset

train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
print("训练集的样本数", train_set_x_orig.shape[0])
print("测试集的样本数", test_set_x_orig.shape[0])
print("train_set_y的维度", train_set_y.shape)
print("test_set_y的维度", test_set_y.shape)
print("train_set_x_orig的维度", train_set_x_orig.shape)
print("test_set_x_orig的维度", test_set_x_orig.shape)
#转换成一维向量
train_x = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
print(train_x.shape, test_x.shape)

训练集的样本数 209
测试集的样本数 50
train_set_y的维度 (1, 209)
test_set_y的维度 (1, 50)
train_set_x_orig的维度 (209, 64, 64, 3)
test_set_x_orig的维度 (50, 64, 64, 3)
(12288, 209) (12288, 50)


In [87]:
import numpy as np
#sigmoid function 
def sigmoid(x):
    # 对大的正值和负值进行特殊处理，避免溢出
     # 限制输入范围，防止exp计算溢出
    return 1 / (1 + np.exp(-x))

#sigmoid derivative
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))  # Derivative of sigmoid function: f'(x) = f(x) * (1 - f(x))

### 定义网络结构

In [88]:
def layer_sizes(X,Y):
    n_x = X.shape[0]
    n_h = 4  # 假设隐藏层有4个神经元
    n_y = Y.shape[0]
    return (n_x,n_h,n_y)

### 初始化模型参数

In [89]:
def init_param(n_x,n_h,n_y):
    np.random.seed(2)
    #创建 隐藏层参数
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros((n_h,1))
    #创建 输出层参数
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.zeros((n_y,1))

    #检查
    assert(W1.shape == (n_h,n_x))
    assert(b1.shape == (n_h,1))
    assert(W2.shape == (n_y,n_h))
    assert(b2.shape == (n_y,1))

    parameters = {"W1":W1,
    "b1":b1,
    "W2":W2,
    "b2":b2}

    print(W1.shape,b1.shape,W2.shape,b2.shape)

    return parameters


### 前向传播

In [90]:
def propagate(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = {"Z1": Z1,
            "A1": A1,
            "Z2": Z2,
            "A2": A2}

    return A2, cache

### 计算损失
多样本计算交叉熵损失 求和求平均

In [91]:
def count_loss(A2,Y):
    m = Y.shape[1]
    J = -1/m*np.sum(np.multiply(np.log(A2),Y) + np.multiply((1 - Y),np.log(1 - A2))) #最大化对数似然 -> 最小化损失函数
    cost = np.squeeze(J) 

    assert(isinstance(cost, float))

    return cost

### 反向传播

In [92]:
def backward_propagation(X, Y, cache,parameters):
    m = X.shape[1]
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    A1 = cache["A1"]
    A2 = cache["A2"]
    dZ2 = A2 - Y #公式推到得到
    dW2 = 1/m * np.dot(dZ2,A1.T) 
    db2 = 1/m * np.sum(dZ2,axis = 1,keepdims = True)
    dZ1 = W2.T * dZ2 * (1- np.power(A1,2))
    dW1 = 1/m * np.dot(dZ1,X.T)
    db1 = 1/m * np.sum(dZ1,axis = 1,keepdims = True)

    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}    
    return grads

### 更新参数

In [93]:
def update_parameters(parameters, grads, learning_rate):
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']

    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}

    return parameters
    

### 建立网络模型训练逻辑

In [94]:
def model(X,Y,num_iterations = 10000,learning_rate=0.05,print_cost = False):
    n_x,n_h,n_y = layer_sizes(X,Y) #定义网格结构
    parameters = init_param(n_x,n_h,n_y) #初始化参数
    for i in range(0,num_iterations): #迭代训练
        A2,cache = propagate(X,parameters) #前向传播
        cost = count_loss(A2,Y) #计算损失
        grads = backward_propagation(X,Y,cache,parameters) #反向传播
        parameters  = update_parameters(parameters,grads,learning_rate) #更新参数
        if i % 100 == 0 and print_cost: #每1000次打印损失
            print("第%i代: %f" %(i, cost))
    return parameters


### 预测结果

In [95]:
def predict(parameters,X):
    A2,cache = propogate(parameters,X) #前向传播预测概率
    predictions = np.array(
        [1 if A2[0,i] > 0.5 else 0 for i in range(A2.shape[1])]
    ).reshape(A2.shape) #概率大于0.5为1，小于0.5为0

    return predictions

In [None]:
num_iterations = 1000
learning_rate = 0.005
parameters  = model(train_x, train_set_y, num_iterations, learning_rate,True)
predictions = predict(test_x,parameters)
print('准确率: %d' % float((np.dot(test_set_y,predictions.T) + np.dot(1-test_set_y,1-predictions.T))/float(test_set_y.size)*100) + '%')

(4, 12288) (4, 1) (1, 4) (1, 1)
第0代: 0.696842


第100代: 0.662961
第200代: 0.649107
第300代: 0.644402
第400代: 0.642669
第500代: 0.641921
第600代: 0.641510
第700代: 0.641218
第800代: 0.640972
第900代: 0.640746
准确率: 34%


  print('准确率: %d' % float((np.dot(test_set_y,predictions.T) + np.dot(1-test_set_y,1-predictions.T))/float(test_set_y.size)*100) + '%')


: 