In [49]:
import math
import h5py
import torch
import numpy as np
import copy

# 读取数据集
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")

X_train = torch.tensor(train_dataset["train_set_x"][:]).cuda(0)
Y_train = torch.tensor(train_dataset["train_set_y"][:]).cuda(0)

X_train = X_train.reshape(209,64*64*3).cuda(0)

RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


![img](img/%E7%A6%BB%E5%B7%AE%E5%BD%92%E4%B8%80%E5%8C%96.png)

In [None]:
# 离差归一化

def min_max_data(x):
    return (x - x.min()) / (x.max() - x.min())

X_train = min_max_data(X_train)

In [None]:
# 激活函数定义

def relu(x):
    return torch.relu(x)

def sigmoid(x):
    return torch.sigmoid(x)

In [None]:
# 参数初始化

def init_parameters(dimensions):
    parameters = {}
    layer_num = len(dimensions)
    Xavier_rand = math.sqrt(6 / (dimensions[0]+dimensions[layer_num-1]))
    for i in range(layer_num):
        if i != 0:
            parameters['w'+str(i)] = torch.Tensor(dimensions[i],dimensions[i-1]).uniform_(-Xavier_rand, Xavier_rand).cuda(0)
            parameters['w'+str(i)].requires_grad = False
        else:
            pass
        parameters['b'+str(i)] = torch.Tensor(dimensions[i]).zero_().cuda(0)
        parameters['b'+str(i)].requires_grad = False
    return parameters

In [None]:
# 损失计算

def loss_func(y,y_pre):
    j = -(y * torch.log(y_pre) + (1 - y) * torch.log(1 - y_pre))
    return j

In [None]:
# 前向传播

def forward_propagation(X_img,dimensions,activation,parameters):
    for key in parameters.keys():
        parameters[key] = parameters[key].detach()
        parameters[key].requires_grad = True
        parameters[key].retain_grad = True
    cache = {}
    layer_num = len(dimensions)
    for i in range(layer_num):
        if i == 0:
            cache['z'+str(i)] = X_img + parameters['b'+str(i)]
        else:
            cache['z'+str(i)] = (parameters['w'+str(i)] @ cache['a'+str(i-1)]) + parameters['b'+str(i)]
        cache['a'+str(i)] = activation[i](cache['z'+str(i)])
    return cache

In [None]:
# 反向传播

def backward_propagation(cache,Y_lab,parameters):
    grad_parameters = {}
    layer_num = len(cache)//2
    y = Y_lab
    y_pre = cache['a'+str(layer_num-1)]
    j = loss_func(y,y_pre)
    j.retain_graph = True
    j.backward()
    for i in range(layer_num):
        grad_parameters['db'+str(i)] = parameters['b'+str(i)].grad
        if i != 0:
            grad_parameters['dw'+str(i)] = parameters['w'+str(i)].grad
        else:
            pass
    j.retain_graph = False
    for key in parameters.keys():
        parameters[key].requires_grad = False
        parameters[key].retain_grad = False
    return grad_parameters

In [None]:
# 更新参数

def update_parameters(parameters,grad_parameters,learn_rate = 0.01):
    for key in parameters.keys():
        parameters[key] = parameters[key] - grad_parameters['d'+key] * learn_rate
    return parameters

In [None]:
# 预测函数

def predict(dimensions,activation,X_img,parameters,cat=False):
    cache = {}
    layer_num = len(dimensions)
    for i in range(layer_num):
        if i == 0:
            cache['z'+str(i)] = X_img + parameters['b'+str(i)]
        else:
            cache['z'+str(i)] = (parameters['w'+str(i)] @ cache['a'+str(i-1)]) + parameters['b'+str(i)]
        cache['a'+str(i)] = activation[i](cache['z'+str(i)])
    if cat:
        if cache['a'+str(layer_num-1)] > 0.5:
            return 1
        else:
            return 0
    else:
        return cache['a'+str(layer_num-1)]

In [None]:
# 准确度评估

def model_accuracy(X,Y,dimensions,activation,parameters):
    img_num = torch.numel(Y)
    ac_num = 0
    for i in range(img_num):
        y_pre = predict(dimensions,activation,X[i],parameters,cat=True)
        y = Y[i]
        if y_pre == y:
            ac_num += 1
        else:
            continue
    return (ac_num / img_num)

In [None]:
# 损失评估

def loss_accuracy(X,Y,dimensions,activation,parameters):
    img_num = torch.numel(Y)
    loss_sum = 0
    for i in range(img_num):
        y_pre = predict(dimensions,activation,X[i],parameters,cat=False)
        y = Y[i]
        loss_sum += loss_func(y,y_pre)
    return (loss_sum / img_num)

In [None]:
# dimensions = [64*64*3,20,7,1]
# activation = [relu,relu,relu,sigmoid]

# parameters = init_parameters(dimensions)

# index = 4
# y = Y_train[index]
# print(y)
# y_pre = predict(dimensions,activation,X_train[index],parameters,cat=False)
# print(y_pre)
# loss_func(y,y_pre)

In [None]:
# dimensions = [64*64*3,20,7,1]
# activation = [relu,relu,relu,sigmoid]

# index = 4

# parameters = init_parameters(dimensions)
# predict(dimensions,activation,X_train[index],parameters,cat=False)
# print(loss_accuracy(X_train,Y_train,dimensions,activation,parameters))

In [None]:
# 小批量训练

def batch_train(current_batch,X_train,Y_train,batch_size,dimensions,activation,parameters):
    num = len(Y_train)
    if current_batch == num // batch_size:
        batch_max = batch_size
    else:
        batch_max = num - current_batch * batch_size
    index = current_batch * batch_size
    cache = forward_propagation(X_train[index],dimensions,activation,parameters)
    grad_parameters = backward_propagation(cache,Y_train[index],parameters)
    grad_sum = copy.deepcopy(grad_parameters)
    for i in range(batch_max):
        index = current_batch * batch_size + i
        cache = forward_propagation(X_train[index],dimensions,activation,parameters)
        grad_parameters = backward_propagation(cache,Y_train[index],parameters)
        for key in grad_sum.keys():
            grad_sum[key] += grad_parameters[key]
    for keys in grad_sum.keys():
        grad_sum[keys] /= batch_max
    return grad_sum

In [None]:
# 完整模型

def nn_model(X_train,Y_train,batch_size,dimensions,activation,iter,iter_print,learn_rate):
    num = len(Y_train)
    parameters = init_parameters(dimensions)
    iter_num = 0
    while(iter_num <= iter):
        for i in range(num//batch_size):
            grad_parameters = batch_train(i,X_train,Y_train,batch_size,dimensions,activation,parameters)
            parameters = update_parameters(parameters,grad_parameters,learn_rate)
        if iter_num % iter_print == 0:
            print('第',iter_num,'次迭代')
            print('模型准确率：',model_accuracy(X_train,Y_train,dimensions,activation,parameters)*100,' %')
            print('模型损失：',loss_accuracy(X_train,Y_train,dimensions,activation,parameters))
        iter_num += 1
    return parameters

In [None]:
# dimensions = [64*64*3,20,7,1]
# activation = [relu,relu,relu,sigmoid]

# index = 4

# parameters = init_parameters(dimensions)
# cache = forward_propagation(X_train[index],dimensions,activation,parameters)
# grad = backward_propagation(cache,Y_train[index],parameters)
# print(grad['dw1'])
# parameters = update_parameters(parameters,grad,0.01)
# print('-----------------------------------------------------------')
# print(loss_accuracy(X_train,Y_train,dimensions,activation,parameters))
# print('-----------------------------------------------------------')

# index = 3
# cache = forward_propagation(X_train[index],dimensions,activation,parameters)
# grad = backward_propagation(cache,Y_train[index],parameters)
# print(grad['dw1'])
# parameters = update_parameters(parameters,grad,0.01)

# print('-----------------------------------------------------------')
# print(loss_accuracy(X_train,Y_train,dimensions,activation,parameters))
# print('-----------------------------------------------------------')

In [None]:
# cache = forward_propagation(X_train[index],dimensions,activation,parameters)
# grad = backward_propagation(cache,Y_train[index],parameters)
# print(grad['dw1'])
# # parameters = update_parameters(parameters,grad,0.01)
# print('-----------------------------------------------------------------------------------')
# print(parameters['w1'])
# parameters = update_parameters(parameters,grad,0.01)
# print('-----------------------------------------------------------------------------------')
# print(parameters['w1'])
# cache = forward_propagation(X_train[index],dimensions,activation,parameters)
# grad = backward_propagation(cache,Y_train[index],parameters)
# # print(grad)

In [None]:
# print(grad['dw1'])

# iter = 2000
# while(iter):
#     cache = forward_propagation(X_train[index],dimensions,activation,parameters)
#     grad = backward_propagation(cache,Y_train[index],parameters)
#     parameters = update_parameters(parameters,grad,0.001)
#     iter = iter - 1

# print(grad['dw1'])

In [None]:
dimensions = [64*64*3,45*45,5*5,7,1]
activation = [relu,relu,relu,relu,sigmoid]

parameters = nn_model(X_train,Y_train,5,dimensions,activation,2000,5,0.01)

第 0 次迭代
模型准确率： 65.55023923444976  %
模型损失： tensor([0.6552], device='cuda:0')


RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemv(handle, op, m, n, &alpha, a, lda, x, incx, &beta, y, incy)`