In [1]:
import numpy as np
import copy
import torch
from numba import jit, prange
import os

# 是否有 GPU，需要配置 PyTorch GPU 环境
has_gpu = torch.cuda.is_available()

@jit
def im2col(data, kernel_size, stride):
    batch_size, input_channel, height, width = data.shape
    # 根据公式计算 feature map 的大小
    feature_height = int((height-kernel_size)/stride)+1
    feature_width = int((width-kernel_size)/stride)+1 
    
    # 初始化展平矩阵的大小, B*(H*W)*(C*K*K)
    col_data = np.zeros((batch_size, feature_height*feature_width, kernel_size*kernel_size*input_channel), dtype=np.float32)
    
    # 卷积的滑窗
    for n in prange(batch_size):
        for i in prange(feature_height):
            for j in prange(feature_width):
                # 将该窗口的数据展平保存
                col_data[n, i*feature_width+j, :] = np.ravel(data[n, :, i*stride: i*stride+kernel_size, j*stride: j*stride+kernel_size])
            
    # 返回展平后的结果，和 feature map 的高、宽
    return col_data, feature_height, feature_width

def matmul(input1, input2):
    assert input1.shape[0]==input2.shape[0], '必须相等'
    if has_gpu:
        grad = torch.sum(torch.einsum('ijk,ikl->ijl', (torch.from_numpy(input1).cuda(), torch.from_numpy(input2).cuda())), dim=0).cpu().numpy()
    else:
        grad = np.sum(np.einsum('ijk,ikl->ijl', input1, input2), axis=0)
    return grad

# 计算输入的梯度，将展开的梯度还原
def col2im(col_data, top_grad, weight, shape):
    # 参数
    batch_size, input_channel, width, height, feature_height, feature_width, kernel_size, stride= shape
    # 初始化原始梯度，和输入数据一样
    grad = np.zeros((batch_size, input_channel, width, height), dtype=np.float32)
    # 对每个样本的计算梯度
    if has_gpu:
        grad_one_ = torch.matmul(torch.from_numpy(top_grad).cuda(), torch.from_numpy(weight).cuda()).cpu().numpy()
    else:
        grad_one_ = np.matmul(top_grad, weight)
    for n in prange(batch_size):
        for i in prange(feature_height):
            for j in prange(feature_width):
                # 每个样本的梯度累加
                # 被展开的梯度，还原成原始数据的梯度
                grad[n, :, i*stride:i*stride+kernel_size, j*stride:j*stride+kernel_size] += np.reshape(grad_one_[n, i*feature_width+j, :], (input_channel, kernel_size, kernel_size))
    return grad

class conv2d(object):
    '''
    output_channel: 该层卷积核的数量
    input_channel: 输入数据的通道数，例如灰度图为 1，彩色图为 3，又或者上一层的卷积数量为 12，那下一层卷积的输入通道为 12
    kernel_size: 卷积核大小
    stride: 卷积移动的步长
    padding: 补齐
    '''
    def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0):
        # 初始化网络权重，卷积核数量*输入通道数*卷积核大小*卷积核大小，一个卷积核应为输入通道数*卷积核大小*卷积核大小，每一层有多个卷积核
        # self.weight = (np.random.randn(output_channel, input_channel, kernel_size, kernel_size)*0.01).astype(np.float32)
        self.weight = copy.deepcopy(torch.nn.Conv2d(input_channel, output_channel, kernel_size, stride).weight.data.numpy())
        # 偏置项，每一个卷积核都对应一个偏置项，y=xW+b
        self.bias = np.zeros((output_channel), dtype=np.float32)
        # 保存各项参数
        self.output_channel = output_channel
        self.input_channel = input_channel
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        
    def forward(self, input):
        # 输入数据各个维度代表的含义: batch_size, input_channel, height, width
        batch_size, input_channel, height, width = input.shape
        # 防止一些错误的调用
        assert len(input.shape)==4, '输入必须是四维的，batch_size*channel*width*height'
        # 上一层的卷积核数量必须等于这一层的输入通道数
        assert input_channel==self.input_channel, '网络输入通道数必须与网络定义的一致'
        
        # 对输入数据进行补齐
        input = np.pad(input, [(0, ), (0, ), (self.padding, ), (self.padding, )], 'constant', constant_values=0)
        
        # 保存输入数据，后续需要计算梯度
        self.data = input
        
        # 展开数据
        self.col_data, self.feature_height, self.feature_width = im2col(input, self.kernel_size, self.stride)
        # 计算输出，是否有 GPU，如果有则使用 GPU 计算
        if has_gpu:
            feature_maps = torch.matmul(torch.from_numpy(self.col_data).cuda(), torch.from_numpy(self.weight.reshape(self.output_channel, -1).T).cuda()) + torch.from_numpy(self.bias).cuda().unsqueeze(0)
            feature_maps = torch.reshape(feature_maps, (batch_size, self.feature_height, self.feature_width, self.output_channel)).permute(0, 3, 1, 2).cpu().numpy()
        else:
            # 计算输出之后同时 reshape, transpose 还原成本来应该的输出大小
            # feature_maps = np.dot(self.col_data, self.weight.reshape(self.output_channel, -1).T) + self.bias[np.newaxis, :]
            feature_maps = np.matmul(self.col_data, self.weight.reshape(self.output_channel, -1).T) + self.bias[np.newaxis, :]
            feature_maps = np.reshape(feature_maps, (batch_size, self.feature_height, self.feature_width, self.output_channel)).transpose(0, 3, 1, 2)
        return feature_maps
    
    '''
    top_grad: 上一层的梯度，维度为 batch_size, output_channel, feature_height, feature_width，与该层输出一致
    '''
    def backward(self, top_grad, lr):
        batch_size, output_channel, feature_height, feature_width = top_grad.shape
        # 将梯度展开
        top_grad = top_grad.transpose((0, 2, 3, 1)).reshape(batch_size, feature_height*feature_width, output_channel)
        # 计算权重、偏置的梯度
        self.grad_w = matmul(self.col_data.transpose(0, 2, 1), top_grad).T.reshape(output_channel, self.input_channel, self.kernel_size, self.kernel_size)
        self.grad_b = np.sum(top_grad, axis=(0, 1))
        # 各项参数
        shape = list(self.data.shape) + [self.feature_height, self.feature_width, self.kernel_size, self.stride]
        # 计算输入的梯度
        self.grad = col2im(self.col_data, top_grad, self.weight.reshape(self.output_channel, -1), shape)    
        # 更新参数
        self.weight -= lr*self.grad_w
        self.bias -= lr*self.grad_b
        
        
class MaxPool2d(object):
    
    # 各项参数
    def __init__(self, kernel_size, stride, padding=0):
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        
    # 前向传播
    def forward(self, input):
        # input: batch_size, input channel, height, width
        assert len(input.shape)==4, '输入必须是四维的，batch_size*channel*width*height'
        batch_size, input_channel, height, width = input.shape
        
        # 计算输出的大小，和卷积层一样
        feature_height = int((height-self.kernel_size+2*self.padding)/self.stride)+1
        feature_width = int((width-self.kernel_size+2*self.padding)/self.stride)+1 
        # 初始化输出矩阵
        pooled_feature_map = np.zeros((batch_size, input_channel, feature_height, feature_width), dtype=np.float32)
        # 按设置的 padding 补齐
        input = np.pad(input, [(0, ), (0, ), (self.padding, ), (self.padding, )], 'constant', constant_values=0)
        self.data = input
        # 窗口滑动
        for i in range(feature_height):
            for j in range(feature_width):
                # 最大池化
                pooled_feature_map[:, :, i, j] = np.max(input[:, :, i*self.stride: i*self.stride+self.kernel_size, j*self.stride: j*self.stride+self.kernel_size], axis=(2, 3))
        return pooled_feature_map
    
    # 后向传播
    def backward(self, top_grad):
        # 输入数据梯度
        self.grad = np.zeros_like(self.data, dtype=np.float32)
        # 上一层梯度的维度
        batch_size, output_channel, feature_height, feature_width = top_grad.shape
        
        # 遍历输出 feature map 的每一个点
        for n in range(batch_size):
            for i in range(feature_height):
                for j in range(feature_width):        
                    # 取出这一块的数据
                    one_kernel_data = self.data[n, :, i*self.stride: i*self.stride+self.kernel_size, j*self.stride: j*self.stride+self.kernel_size]
                    # 下面三行代码都是获得最大值点的坐标
                    # 将一个矩形展开
                    channels = one_kernel_data.shape[0]
                    one_kernel_data = np.reshape(one_kernel_data, (one_kernel_data.shape[0], -1))
                    # 获取该矩形最大值所在的坐标
                    one_kernel_featuremap_argmax = np.argmax(one_kernel_data, axis=1)
                    # np.unravel_index 将被展开的坐标换算成对应的矩形内的坐标
                    argmax1, argmax2 = np.unravel_index(one_kernel_featuremap_argmax, (self.kernel_size, self.kernel_size))
                    # 误差将会被分散到最大值所在的点上
                    self.grad[n, :, i*self.stride: i*self.stride+self.kernel_size, j*self.stride: j*self.stride+self.kernel_size][np.arange(channels), argmax1, argmax2] += top_grad[n, :, i, j]
        # 去除 padding 的影响
        self.grad = self.grad[:, :, self.padding: self.grad.shape[2]-self.padding, self.padding: self.grad.shape[3]-self.padding]
        
        
import struct

# 读取 mnist 数据集
def read_mnist(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)


# softmax 函数
def softmax(input):
    exp_value = np.exp(input) #首先计算指数
    output = exp_value/np.sum(exp_value, axis=1)[:, np.newaxis] # 然后按行标准化
    return output

# 交叉熵损失函数
class CrossEntropyLossLayer():
    def __init__(self):
        pass
    
    def forward(self, input, labels):
        # 做一些防止误用的措施，输入数据必须是二维的，且标签和数据必须维度一致
        assert len(input.shape)==2, '输入的数据必须是一个二维矩阵'
        assert len(labels.shape)==2, '输入的标签必须是独热编码'
        assert labels.shape==input.shape, '数据和标签数量必须一致'
        self.data = input
        self.labels = labels
        self.prob = np.clip(softmax(input), 1e-9, 1.0) #在取对数时不能为 0，所以用极小数代替 0
        loss = -np.sum(np.multiply(self.labels, np.log(self.prob)))/self.labels.shape[0]
        return loss
    
    def backward(self):
        self.grad = (self.prob - self.labels)/self.labels.shape[0] # 根据公式计算梯度
        
# 学习率衰减
class lr_scheduler(object):
    def __init__(self, base_lr, step_size, deacy_factor=0.1):
        self.base_lr = base_lr # 最初的学习率
        self.deacy_factor = deacy_factor # 学习率衰减因子
        self.step_count = 0 # 当前的迭代次数
        self.lr = base_lr # 当前学习率
        self.step_size = step_size # 步长
        
    def step(self, step_count=1): # 默认 1 次
        self.step_count += step_count
    
    def get_lr(self):
        self.lr = self.base_lr*(self.deacy_factor**(self.step_count//self.step_size))
        return self.lr

class ReLU(object):
    def forward(self, input):
        self.data = input
        # 按照公式实现 
        return np.maximum(0, input)
    def backward(self, top_grad):
        if has_gpu:
            self.grad = (((torch.from_numpy(self.data).cuda()>0).float())*torch.from_numpy(top_grad).cuda()).cpu().numpy()
        else:
            self.grad = (self.data>0)*top_grad
        # relu 没有需要更新的参数
        
class Linear(object):
    def __init__(self, D_in, D_out):
        # 初始化权重和偏置的维度，高斯初始化权重，零初始化偏置
        # self.weight = np.random.randn(D_in, D_out).astype(np.float32)*0.01
        self.weight = copy.deepcopy(torch.nn.Linear(D_in, D_out).weight.data.numpy().transpose((1, 0)))
#         self.weight = np.random.randn(D_in, D_out)/np.sqrt(D_in).astype(np.float32)
        self.bias = np.zeros((1, D_out), dtype=np.float32)

    def forward(self, input):
        # 前行传播保存输入数据，并做线性分类
        self.data = input
        if has_gpu:
            output = (torch.matmul(torch.from_numpy(self.data).cuda(), torch.from_numpy(self.weight).cuda())+torch.from_numpy(self.bias).cuda()).cpu().numpy()
        else:
            output = np.dot(self.data, self.weight)+self.bias
        return output

    def backward(self, top_grad, lr):
        # 后向传播计算梯度，前面已经介绍了如何关于输入计算梯度
        if has_gpu:
            self.grad = (torch.matmul(torch.from_numpy(top_grad).cuda(), torch.from_numpy(self.weight.T).cuda())).cpu().numpy()
            grad_w = torch.matmul(torch.from_numpy(self.data.T).cuda(), torch.from_numpy(top_grad).cuda())
        else:
            self.grad = np.dot(top_grad, self.weight.T)
            grad_w = np.dot(self.data.T, top_grad)
        grad_b = np.mean(top_grad, axis=0)
        # 更新参数，最后一项为损失关于当前权重的梯度
        self.weight -= lr*grad_w
        # y=xW+b 关于 b 的偏导为 1，已经介绍过了
        self.bias -= lr*grad_b

class Dataloader(object):
    def __init__(self, data, labels, batch_size, shuffle=True):
        # 初始数据和标签
        self.data = data
        self.labels = labels
        # 批量大小
        self.batch_size = batch_size
        # 是否打乱，默认打乱数据集，只针对训练集
        self.shuffle = shuffle

    def __getitem__(self, index):
        # 根据下标返回数据
        return self.data[index], self.labels[index]

    def __iter__(self):
        datasize = self.data.shape[0]
        # 生成迭代序列
        data_seq = np.arange(datasize)
        if self.shuffle:
            # 打乱迭代序列
            np.random.shuffle(data_seq)
        # 生成的是 Batch 序列
        interval_list = np.append(np.arange(0, datasize, self.batch_size), datasize)
        for index in range(interval_list.shape[0]-1):
            s = data_seq[interval_list[index]:interval_list[index+1]]
            # 返回 batch 的数据
            yield self.data[s], self.labels[s]

    def __len__(self):
        # 返回数据集长度
        return self.data.shape[0]

from tqdm import tqdm_notebook
import copy

# Net 为网络结构，需要定义 backward 和 forward 操作
def train_and_test(loss_layer, net, scheduler, max_iter, train_dataloader, test_dataloader, batch_size):
    test_loss_list, train_loss_list, train_acc_list, test_acc_list = [], [], [], []
    best_net = None
    # 最高准确度，和对应权重
    best_acc = -float('inf')
    for epoch in range(max_iter):
        # 训练
        correct = 0
        total_loss = 0
        with tqdm_notebook(total=len(train_dataloader)//batch_size+1) as pbar:
            for data, labels in train_dataloader:
                # 前向输出概率
                train_pred = net.forward(data)

                # 计算准确度
                pred_labels = np.argmax(train_pred, axis=1)
                real_labels = np.argmax(labels, axis=1)
                correct += np.sum(pred_labels==real_labels)

                # 前向输出损失
                loss = loss_layer.forward(train_pred, labels)
                total_loss += loss*data.shape[0]

                # 反向更新参数
                loss_layer.backward()
                # print(epoch, loss, correct)
                net.backward(loss_layer.grad, scheduler.get_lr())
                pbar.update(1)
            
        acc = correct/len(train_dataloader)
        print('Epoch {}/{}: train accuracy, {},  train loss: {}'.format(epoch+1, max_iter, acc, total_loss/len(train_dataloader)))
        train_acc_list.append(acc)
        train_loss_list.append(total_loss/len(train_dataloader))
        scheduler.step()
        
        # 测试
        correct = 0
        total_loss = 0
        for data, labels in test_dataloader:
            # 前向输出概率
            test_pred = net.forward(data)

            # 前向输出损失
            loss = loss_layer.forward(test_pred, labels)
            total_loss += loss*data.shape[0]

            # 计算准确度
            pred_labels = np.argmax(test_pred, axis=1)
            real_labels = np.argmax(labels, axis=1)
            correct += np.sum(pred_labels==real_labels)
        acc = correct/len(test_dataloader)
        test_acc_list.append(acc)
        test_loss_list.append(total_loss/len(test_dataloader))
        print('Epoch {}/{}: test accuracy, {},  test loss: {}'.format(epoch+1, max_iter, acc, total_loss/len(test_dataloader)))

        if acc > best_acc: 
            best_acc = acc
            best_net = copy.deepcopy(net)
    return test_loss_list, train_loss_list, train_acc_list, test_acc_list, best_net


class LeNet(object):
    def __init__(self):
        self.conv1 = conv2d(1, 20, 5, 1)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2d(2, 2)
        self.conv2 = conv2d(20, 50, 5, 1)
        self.relu2 = ReLU()
        self.pool2 = MaxPool2d(2, 2)
        self.fc1 = Linear(800, 500)
        self.relu3 = ReLU()
        self.fc2 = Linear(500, 10)
        
    def forward(self, input):
        input = self.relu1.forward(self.conv1.forward(input))
        input = self.pool1.forward(input)
        input = self.relu2.forward(self.conv2.forward(input))
        input = self.pool2.forward(input)
        # 展开
        self.flatten_shape = input.shape
        input = np.reshape(input, (input.shape[0], -1))
        input = self.relu3.forward(self.fc1.forward(input))
        output = self.fc2.forward(input)
        return output
    
    def backward(self, top_grad, lr):
        self.fc2.backward(top_grad, lr)
        self.relu3.backward(self.fc2.grad)
        self.fc1.backward(self.relu3.grad, lr)
        unflattened_grad = np.reshape(self.fc1.grad, self.flatten_shape)
        self.pool2.backward(unflattened_grad)
        self.relu2.backward(self.pool2.grad)
        self.conv2.backward(self.relu2.grad, lr)
        self.pool1.backward(self.conv2.grad)
        self.relu1.backward(self.pool1.grad)
        self.conv1.backward(self.relu1.grad, lr)

# 读取 cifar10 数据集，传入 cifar10 所在目录
def read_cifar10(filepath):
    import pickle
    # 读取压缩文件
    def unpickle(file):
        with open(file, 'rb') as fo:
            file_dict = pickle.load(fo, encoding='bytes')
        return file_dict
    train_data = []
    train_labels = []
    # 读取训练数据
    for i in range(1, 6):
        file_dict = unpickle(os.path.join(filepath, 'data_batch_%d'%i))
        train_data.append(file_dict[b'data'])
        train_labels += file_dict[b'labels']
    train_data = np.concatenate(train_data).astype(np.float32)
    # 读取测试数据
    file_dict = unpickle(os.path.join(filepath, 'test_batch'))
    test_data, test_labels = file_dict[b'data'].astype(np.float32), file_dict[b'labels']

    return train_data.reshape(train_data.shape[0], 3, 32, 32), np.array(train_labels), test_data.reshape(test_data.shape[0], 3, 32, 32), np.array(test_labels)


class Cifar10Net(object):
    def __init__(self):
        self.conv1 = conv2d(3, 20, 5, 1)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2d(2, 2)
        self.conv2 = conv2d(20, 50, 5, 1)
        self.relu2 = ReLU()
        self.pool2 = MaxPool2d(2, 2)
        self.fc1 = Linear(1250, 500)
        self.relu3 = ReLU()
        self.fc2 = Linear(500, 10)
        
    def forward(self, input):
        input = self.relu1.forward(self.conv1.forward(input))
        input = self.pool1.forward(input)
        input = self.relu2.forward(self.conv2.forward(input))
        input = self.pool2.forward(input)
        # 展开
        self.flatten_shape = input.shape
        input = np.reshape(input, (input.shape[0], -1))
        input = self.relu3.forward(self.fc1.forward(input))
        output = self.fc2.forward(input)
        return output
    
    def backward(self, top_grad, lr):
        self.fc2.backward(top_grad, lr)
        self.relu3.backward(self.fc2.grad)
        self.fc1.backward(self.relu3.grad, lr)
        unflattened_grad = np.reshape(self.fc1.grad, self.flatten_shape)
        self.pool2.backward(unflattened_grad)
        self.relu2.backward(self.pool2.grad)
        self.conv2.backward(self.relu2.grad, lr)
        self.pool1.backward(self.conv2.grad)
        self.relu1.backward(self.pool1.grad)
        self.conv1.backward(self.relu1.grad, lr)

In [2]:
"""SimpleNet 实现
各项参数如下:

输入数据为 $B110*10，，B$ 为批量大小，即输入通道数为 1。
第一层卷积层个数为 20，卷积核大小为 5，步长为 1。
ReLU 激活层。
步长为 2，核大小为 2 的最大池化层。
Flatten 之后接入一层线性层、ReLU 激活的模块，线性层输入为 180，输出为 100。
最后一层进行分类，无 ReLU，输出为 10.
**提示**：请注意完成 forward 和 backward 两个函数，并注意池化层到线性层数据需要被展开。
"""

import numpy as np
import nn
from torchvision import datasets, transforms
import torch.nn.functional as F
from tqdm import tqdm

class SimpleNet(object):
    def __init__(self):
        # 代码开始 ### (≈ 6 行代码)
        self.conv1 = conv2d(1,20,5,1)
        self.relu1 = ReLU()
        self.pool1 = MaxPool2d(2,2)
        self.fc1 = Linear(180,100)
        self.relu2 = ReLU()
        self.fc2 = Linear(100,10)
        ### 代码结束 ###

    def forward(self, input):
        """
        参数:
        input -- 输入数据
        返回:
        output -- 神经网络输出数据
        """
        # 代码开始 ### (≈ 10 行代码)
        input = self.relu1.forward(self.conv1.forward(input))
        input = self.pool1.forward(input)
        self.flatten_shape = input.shape
        input = np.reshape(input,(input.shape[0],-1))
        input = self.relu2.forward(self.fc1.forward(input))
        output = self.fc2.forward(input)
        return output
        ### 代码结束 ###

    def backward(self, top_grad, lr):
        """
        参数:
        top_grad -- 上一层梯度
        lr -- 初始学习率
        返回:
        无
        """
        # 代码开始 ### (≈ 7 行代码)
        self.fc2.backward(top_grad, lr)
        self.relu2.backward(self.fc2.grad)
        self.fc1.backward(self.relu2.grad, lr)
        unflattened_grad = np.reshape(self.fc1.grad, self.flatten_shape)
        self.pool1.backward(unflattened_grad)
        self.relu1.backward(self.pool1.grad)
        self.conv1.backward(self.relu1.grad, lr)
        ### 代码结束 ###
        
class TorchNet(torch.nn.Module):
    def __init__(self):
        super(TorchNet, self).__init__()
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(1, 20, 5, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2, 2),
        )
    
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(180, 100),
            torch.nn.ReLU(),
            torch.nn.Linear(100, 10),
        )
        
    def forward(self, x):
        x = self.conv(x)
        #self.flatten_shape = input.shape
        #input = np.reshape(input,(input.shape[0],-1))
        x = x.view(x.size(0), -1)
        #x = np.reshape(x,(x.size[0],-1))
        x = self.classifier(x)
        return x
            

In [3]:
import torch

np.random.seed(10)
torch.manual_seed(10)
data1 = np.random.randn(2, 1, 10, 10)
data2= torch.tensor(data1,dtype=torch.float32)
print(data1[:,:,:2,:2])
print()
print(data2[:,:,:2,:2])
print()

net = SimpleNet()
torchnet = TorchNet()
output1 = net.forward(data1)  # 前向传播
print(output1)
print()

torchnet.train()
output2 = torchnet.forward(data2)
print(output2)

[[[[ 1.3315865   0.71527897]
   [ 0.43302619  1.20303737]]]


 [[[ 0.11747566 -1.90745689]
   [ 0.29294072 -0.47080725]]]]

tensor([[[[ 1.3316,  0.7153],
          [ 0.4330,  1.2030]]],


        [[[ 0.1175, -1.9075],
          [ 0.2929, -0.4708]]]])

[[-0.1626764  -0.21762127 -0.07795562 -0.0424239  -0.12086755  0.06888997
   0.05691363 -0.0281157  -0.4101945   0.046524  ]
 [-0.12926707 -0.08638534 -0.04449985 -0.10439779 -0.0916423   0.09180234
  -0.03483336 -0.14310843 -0.4089186  -0.0713471 ]]

tensor([[ 0.3089,  0.1918,  0.2641,  0.3564,  0.1590, -0.0052, -0.0805, -0.2107,
          0.0450,  0.0576],
        [ 0.3219,  0.2387,  0.1183,  0.4128,  0.1718,  0.0545, -0.0974, -0.1542,
          0.1521, -0.1120]], grad_fn=<AddmmBackward>)


In [4]:
top_grad = np.ones_like(output1)
net.backward(top_grad, 0.)  # 反向传播
net.conv1.grad[:, :, :2, :2]

array([[[[-0.00066155, -0.01311418],
         [ 0.02485082, -0.00623123]]],


       [[[-0.00253486,  0.00286818],
         [-0.00192729,  0.03077973]]]], dtype=float32)

In [5]:
from numba import jit
import random

def monte_carlo_pi(nsamples):
    acc = 0
    for i in range(nsamples):
        x = random.random()
        y = random.random()
        if (x**2 + y**2) < 1.0:
            acc += 1
    return 4.0 * acc / nsamples

@jit(nopython=True, parallel=True)
def monte_carlo_pi_numba(nsamples):
    acc = 0
    for i in range(nsamples):
        x = random.random()
        y = random.random()
        if (x**2 + y**2) < 1.0:
            acc += 1
    return 4.0 * acc / nsamples

In [6]:
%timeit monte_carlo_pi(100)
%timeit monte_carlo_pi_numba(100)
a=monte_carlo_pi(100)
b=monte_carlo_pi_numba(100)
print(a)
print(b)

47.2 µs ± 133 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.
[1m
File "<ipython-input-5-f47e102bc9c0>", line 14:[0m
[1m@jit(nopython=True, parallel=True)
[1mdef monte_carlo_pi_numba(nsamples):
[0m[1m^[0m[0m
[0m
  state.func_ir.loc))


1.16 µs ± 2.01 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
3.44
3.12


In [7]:
'利用torch搭建CNN网络'
from torchvision import datasets, transforms
import torch.nn.functional as F
from tqdm import tqdm

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, 5, 1),
            torch.nn.ReLU(inplace=True),
            torch.nn.MaxPool2d(2, 2),
            torch.nn.Conv2d(64, 128, 3, 1),
            torch.nn.ReLU(inplace=True),
            # BatchNorm 正则化方法
            torch.nn.BatchNorm2d(128),
            torch.nn.MaxPool2d(2, 2),
            torch.nn.Conv2d(128, 256, 2, 1),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(256, 256, 2, 1),
            torch.nn.ReLU(inplace=True),
            # BatchNorm 正则化方法
            torch.nn.BatchNorm2d(256),
            torch.nn.MaxPool2d(2, 2),
        )
    
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(1024, 1024),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(1024, 10),
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [8]:
# 迭代次数、学习率等
batch_size = 120
base_lr = 0.1
EPOCHS = 20
step_size = 8
download = True
best_acc = -float('inf')
    
# 加载数据
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data/', train=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                   ])),
    batch_size=batch_size, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('./data/', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                   ])),
    batch_size=batch_size, shuffle=False, num_workers=1)

# 定义网络结构和优化器
model = Net()
if torch.cuda.is_available():
    model = model.cuda()

# weight_decay 表明使用权重衰减的系数，不宜过大，一般取小数点四位再慢慢调整
optimizer = torch.optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.001)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.1)

In [9]:
# 开始迭代
for epoch in range(1, EPOCHS + 1):
    model.train()
    train_loss = 0
    correct = 0
    # 训练
    for data, target in tqdm(train_loader):
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        train_loss += loss.item()
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(target.view_as(pred)).sum().item()
        loss.backward()
        optimizer.step()
    exp_lr_scheduler.step()
    train_loss /= len(train_loader.dataset)
    print('Epoch {}/{}:'.format(epoch, EPOCHS))
    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))

    model.eval()
    test_loss = 0
    correct = 0
    # 测试
    with torch.no_grad():
        for data, target in test_loader:
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    acc = correct/len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * acc))
    if best_acc<acc: best_acc=acc
    print()

print('best accuracy: ', best_acc)

100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:14<00:00,  5.57it/s]


Epoch 1/20:
Train set: Average loss: 0.0139, Accuracy: 19952/50000 (39.90%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.9027, Accuracy: 3697/10000 (36.97%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.55it/s]


Epoch 2/20:
Train set: Average loss: 0.0105, Accuracy: 27579/50000 (55.16%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.2679, Accuracy: 5440/10000 (54.40%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.55it/s]


Epoch 3/20:
Train set: Average loss: 0.0092, Accuracy: 30447/50000 (60.89%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.5004, Accuracy: 4952/10000 (49.52%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.55it/s]


Epoch 4/20:
Train set: Average loss: 0.0083, Accuracy: 32552/50000 (65.10%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.2324, Accuracy: 5817/10000 (58.17%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.52it/s]


Epoch 5/20:
Train set: Average loss: 0.0077, Accuracy: 33953/50000 (67.91%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.2219, Accuracy: 5696/10000 (56.96%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.53it/s]


Epoch 6/20:
Train set: Average loss: 0.0072, Accuracy: 34858/50000 (69.72%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.0335, Accuracy: 6353/10000 (63.53%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:14<00:00,  5.56it/s]


Epoch 7/20:
Train set: Average loss: 0.0068, Accuracy: 35760/50000 (71.52%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 1.2724, Accuracy: 5907/10000 (59.07%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.54it/s]


Epoch 8/20:
Train set: Average loss: 0.0068, Accuracy: 35924/50000 (71.85%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 3.1689, Accuracy: 2497/10000 (24.97%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.53it/s]


Epoch 9/20:
Train set: Average loss: 0.0044, Accuracy: 40790/50000 (81.58%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.6769, Accuracy: 7682/10000 (76.82%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:14<00:00,  5.57it/s]


Epoch 10/20:
Train set: Average loss: 0.0035, Accuracy: 42801/50000 (85.60%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.6823, Accuracy: 7712/10000 (77.12%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:14<00:00,  5.56it/s]


Epoch 11/20:
Train set: Average loss: 0.0030, Accuracy: 43895/50000 (87.79%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.7669, Accuracy: 7503/10000 (75.03%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:14<00:00,  5.57it/s]


Epoch 12/20:
Train set: Average loss: 0.0025, Accuracy: 44940/50000 (89.88%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.6651, Accuracy: 7841/10000 (78.41%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:15<00:00,  5.55it/s]


Epoch 13/20:
Train set: Average loss: 0.0021, Accuracy: 45832/50000 (91.66%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.6893, Accuracy: 7790/10000 (77.90%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:17<00:00,  5.36it/s]


Epoch 14/20:
Train set: Average loss: 0.0018, Accuracy: 46650/50000 (93.30%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.7218, Accuracy: 7728/10000 (77.28%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:16<00:00,  5.48it/s]


Epoch 15/20:
Train set: Average loss: 0.0015, Accuracy: 47368/50000 (94.74%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.9061, Accuracy: 7405/10000 (74.05%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:14<00:00,  5.57it/s]


Epoch 16/20:
Train set: Average loss: 0.0012, Accuracy: 47999/50000 (96.00%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.7729, Accuracy: 7738/10000 (77.38%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:17<00:00,  5.37it/s]


Epoch 17/20:
Train set: Average loss: 0.0007, Accuracy: 49258/50000 (98.52%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.7378, Accuracy: 7885/10000 (78.85%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:19<00:00,  5.23it/s]


Epoch 18/20:
Train set: Average loss: 0.0006, Accuracy: 49561/50000 (99.12%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.7471, Accuracy: 7874/10000 (78.74%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:20<00:00,  5.18it/s]


Epoch 19/20:
Train set: Average loss: 0.0005, Accuracy: 49659/50000 (99.32%)


  0%|                                                                                          | 0/417 [00:00<?, ?it/s]

Test set: Average loss: 0.7538, Accuracy: 7880/10000 (78.80%)



100%|████████████████████████████████████████████████████████████████████████████████| 417/417 [01:18<00:00,  5.29it/s]


Epoch 20/20:
Train set: Average loss: 0.0005, Accuracy: 49703/50000 (99.41%)
Test set: Average loss: 0.7593, Accuracy: 7878/10000 (78.78%)

best accuracy:  0.7885
