# 导包

In [None]:
import numpy as np
# 基本矩阵运算
import math
# 基本数学运算
import matplotlib.pyplot as plt
# 图片绘制
import h5py
# 数据集读取
import copy
# 深浅拷贝

# 函数定义

In [None]:
# 图片绘制

def paint(X):
    plt.figure()
    #创建画布
    plt.imshow(X,cmap='gray')
    plt.show()

In [None]:
# 图片格式转化

def dataset_rgb_to_gray(X):
    size = len(X)
    width = len(X[0])
    height = len(X[0][0])
    X_tmp = np.zeros([size,width,height])
    for i in range(len(X)):
        for j in range(len(X[i])):
            for k in range(len(X[i][j])):
                r = X[i][j][k][0]
                g = X[i][j][k][1]
                b = X[i][j][k][2]
                X_tmp[i][j][k] = np.array(r*0.299 + g*0.587 + b*0.114)
    return X_tmp

In [None]:
# 激活函数定义
def null(x):
    return x

def relu(x):
    return np.maximum(0,x)

def relu_back(x):
    dx = x
    dx[x<=0] = 0
    dx[x>0] = 1
    return dx

def sigmoid(x):    
    return 1.0/(1+np.exp(-x))

def sigmoid_back(x):
    s = sigmoid(x)
    return s * (1 - s)

In [None]:
# 读取数据集
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")

X_train=np.array(train_dataset["train_set_x"][:])
Y_train=np.array(train_dataset["train_set_y"][:])

X_test=np.array(test_dataset["test_set_x"][:])
Y_test=np.array(test_dataset["test_set_y"][:])

# 图片通道转换
X_train = dataset_rgb_to_gray(X_train)
X_test = dataset_rgb_to_gray(X_test)

# 图片一维化
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

train_num = len(X_train)
test_num = len(X_test)

In [None]:
# 参数初始化

def init_parameters(dimensions):
    parameters = {}
    layer_num = len(dimensions)
    for index in range(layer_num):
        if index == 0:
            parameters['b0'] = np.zeros(np.array(dimensions[0]))
        else:
            parameters['b'+str(index)] = np.zeros(np.array(dimensions[index]))
            parameters['w'+str(index)] = np.random.randn(dimensions[index],dimensions[index-1]) * 0.01
    return parameters

In [None]:
# 正向传播

def forward(dimensions,activation,X_img_array,parameters):
    cache = {}
    for i in range(len(dimensions)):
        if i == 0:
            cache['z'+str(i)] = X_img_array + parameters['b'+str(i)]
        else:
            cache['z'+str(i)] = np.dot(parameters['w'+str(i)],cache['a'+str(i-1)]) + parameters['b'+str(i)]
        cache['a'+str(i)] = activation[i](cache['z'+str(i)])
    return cache

In [None]:
# 梯度下降

def grad_parameters(dimensions,activation,differentiator,cache,Y_lab,parameters):
    y_pre = cache['a'+str(len(dimensions)-1)]
    y = Y_lab
    j_y = ((1 - y) / (1 - y_pre)) - (y / y_pre)
    grad_parameters = {}
    layer_num = len(cache)//2 - 1
    for i in range(layer_num,-1,-1):
        if i == layer_num:
            grad_parameters['db'+str(i)] = j_y * differentiator[activation[i]](cache['z'+str(i)])
        else:
            grad_parameters['db'+str(i)] = j_y * differentiator[activation[i]](cache['z'+str(i)]) *\
                np.dot(parameters['w'+str(i+1)].T,grad_parameters['db'+str(i+1)])
        if i != 0:
            grad_parameters['dw'+str(i)] = j_y * np.outer(grad_parameters['db'+str(i)],cache['a'+str(i-1)])
    return grad_parameters

In [None]:
# 损失计算

def J (y,y_pre):
    return -(y * np.log(y_pre) + (1 - y) * np.log(1 - y_pre))

def one_loss(dimensions,cache,Y_lab):
    y_pre = cache['a'+str(len(dimensions)-1)]
    y = Y_lab
    j = -(y * np.log(y_pre) + (1 - y) * np.log(1 - y_pre))
    return j

def avg_loss(dimensions,activation,X_train,Y_train,parameters):
    avg_loss = 0
    for i in range(len(X_train)):
        cache =forward(dimensions,activation,X_train[i],parameters)
        avg_loss += one_loss(dimensions,cache,Y_train[i])
    avg_loss /= len(X_train)
    return avg_loss

In [None]:
# # 梯度下降验证

# def db_check (b,db,dimensions,activation,X_img_array,Y_lab,parameters,theta = 0.00001):
#     y = Y_lab
#     parameters_min = parameters
#     parameters_max = parameters
#     parameters_min[str(b)] -= theta
#     parameters_max[str(b)] += theta
#     cache_min = forward(dimensions,activation,X_img_array,parameters_min)
#     cache_max = forward(dimensions,activation,X_img_array,parameters_max)
#     y_min = cache_min['a'+str(len(dimensions)-1)]
#     y_max = cache_max['a'+str(len(dimensions)-1)]
#     db_pre = (J(y,y_max) - J(y,y_min)) / (theta * 2)
#     db = db.reshape(db[0],1)
#     return db - db_pre

In [None]:
# dimensions = [64*64,20,5,1]
# activation = [relu,relu,relu,sigmoid]
# differentiator = {sigmoid:sigmoid_back,relu:relu_back}

# index = 4
# parameters = init_parameters(dimensions)
# grad_parameters = grad_parameters(dimensions,activation,differentiator,forward(dimensions,activation,X_train[index],parameters),Y_train[index],parameters)
# print(parameters['b1'],grad_parameters['db1'],dimensions,activation,X_train[index],Y_train[index],parameters)

In [None]:
# 参数更新

def update_parameters(grad_parameters,parameters,learn_rate):
    up_parameters=copy.deepcopy(parameters)
    for key in parameters.keys():
        if key[0] == 'w':
            up_parameters[key] = up_parameters[key] - learn_rate * grad_parameters['d'+key]
        elif key[0] == 'b':
            up_parameters[key] = up_parameters[key] - learn_rate * grad_parameters['d'+key]
    return up_parameters

In [None]:
# 分批量训练

def train_batch(dimensions,activation,differentiator,X_train,Y_train,batch_size,current_batch,parameters):
    cache = forward(dimensions,activation,X_train[current_batch * batch_size],parameters)
    grad_accu =  grad_parameters(dimensions,activation,differentiator,cache,Y_train[current_batch * batch_size],parameters)
    train_num = len(X_train)
    if current_batch == train_num//batch_size:
        batch_max = train_num - current_batch * batch_size
    else:
         batch_max = batch_size
    for i in range(1,batch_max):
        index = current_batch * batch_size + i
        cache = forward(dimensions,activation,X_train[index],parameters)
        for key in grad_accu.keys():
            grad_accu[key] += grad_parameters(dimensions,activation,differentiator,cache,Y_train[index],parameters)[key]
    for key in grad_accu.keys():
            grad_accu[key] /= batch_max
    return grad_accu

In [None]:
# 预测函数

def is_cat(dimensions,cache):
    return cache['a'+str(len(dimensions)-1)] > 0.5

def Test_accuracy(dimensions,activation,X_test,Y_test,parameters):
   correct_num = 0
   ac_rate = 0
   all_num = len(X_test)
   for i in range(0,all_num):
       if is_cat(dimensions,forward(dimensions,activation,X_test[i],parameters)) == Y_test[i] :
           correct_num += 1
       else:
           continue
   ac_rate = correct_num / all_num * 100
   return str(ac_rate) + ' %'

In [None]:
# 完整模型

def nn_module(dimensions,activation,differentiator,X_train,Y_train,batch_num,iter,learn_rate):
    parameters = init_parameters(dimensions)
    batch_size = len(X_train) // batch_num
    i_num = 0
    while(i_num < iter):
        for current_batch in range(batch_num):
            grad_parameters = train_batch(dimensions,activation,differentiator,X_train,Y_train,batch_size,current_batch,parameters)
            parameters = update_parameters(grad_parameters,parameters,learn_rate)
            print('第'+str(i_num)+'次迭代：')
            print('模型准确率：',Test_accuracy(dimensions,activation,X_train,Y_train,parameters))
            print('模型损失：',avg_loss(dimensions,activation,X_train,Y_train,parameters))
        i_num += 1
    return parameters

In [None]:
dimensions = [64*64,55*55,44*44,33*33,28*28,16*16,9*9,5*5,3*3,1]
activation = [relu,relu,relu,relu,relu,relu,relu,relu,relu,sigmoid]
differentiator = {null:null,sigmoid:sigmoid_back,relu:relu_back}
parameters = nn_module(dimensions,activation,differentiator,X_train,Y_train,batch_num=10,iter=20000,learn_rate=0.0075)