# ResNet18 实现

![pic](resnet18.jpg)

In [1]:
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
print(torch.__version__)

1.10.1


In [2]:
#prepare dataset and preprocessing
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    # 至少要加上下面这句ToTensor
    transforms.ToTensor(),
    # ciaf10固有均值标准差
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    # 至少要加上下面这句ToTensor
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

## 加载数据集

In [3]:
# minist数据集
# 正常来说,train_set这个类是需要自己定义的,但是在官方数据集中已经给定义好了
data_dir="~/data"
train_set=datasets.CIFAR10(root=data_dir,
                        transform=transform_train,
                        train=True,
                        download=True)

val_set=datasets.CIFAR10(root=data_dir,
                        transform=transform_test,
                        train=False,
                        download=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
epoch_total=10
batch=8
lr=0.01
device=torch.device('cuda'if torch.cuda.is_available() else 'cpu')
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
print('use device:',device)

use device: cpu


In [5]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch,
                                         shuffle=True, num_workers=8)
val_loader = torch.utils.data.DataLoader(train_set, batch_size=batch,
                                         shuffle=False, num_workers=8)

In [6]:
# Residual 残差块
class Residual(nn.Module):
    def __init__(self,input_channel,out_channel,kersize=3,use_1x1=False,strides=1):
        super().__init__()
        # 第一层是输入->输出 ,可能会有深度变化
        self.conv1=nn.Conv2d(input_channel,out_channel,kersize,padding=1,stride=strides)
        # 第二层就是输出->输出,没有深度变化,也没有跨距变化
        self.conv2=nn.Conv2d(out_channel,out_channel,kersize,padding=1,stride=1)
        self.bn1=nn.BatchNorm2d(out_channel)
        self.bn2=nn.BatchNorm2d(out_channel)
        if(use_1x1):
            self.short=nn.Conv2d(input_channel,out_channel,kernel_size=(1,1),stride=strides)
        else:
            self.short=None
    def forward(self,x):
        y=self.conv1(x)
        y=self.bn1(y)
        y=F.relu(y)
        y=self.conv2(y)
        y=self.bn2(y)
        if(self.short):
            x=self.short(x)
        y=y+x
        F.relu(y)
        return y
        
        

标准resnet使用的是224x224的图片

CIFAR10图片为3x32x32能跑,但是因为图片太小,最后的卷积可能效果不好

可以尝试稍微改动下模型,比如修改第一层的输入跨距

In [7]:
# 标准reset 18
class ResetNet18(nn.Module):
    def __init__(self,input_channel,out_label):
        super().__init__()
        self.bn1=nn.Sequential(
            nn.Conv2d(input_channel,64,7,stride=2,padding=3),
            nn.BatchNorm2d(64),nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
        )
        self.bn2=nn.Sequential(*self.res_block(64,64,2,bfirst=True))
        self.bn3=nn.Sequential(*self.res_block(64,128,2))
        self.bn4=nn.Sequential(*self.res_block(128,256,2))
        self.bn5=nn.Sequential(*self.res_block(256,512,2))
        self.full=nn.Sequential(nn.AdaptiveAvgPool2d((1,1)),nn.Flatten(), nn.Linear(512, out_label))
        
    def res_block(self,input_channel,out_channel,num_block,bfirst=False):
        blk=[]
        for i in range(num_block):
            if(i==0 and not bfirst):
                # 一般第一个块需要降维
                # 降维则是wh缩减为一半,深度增加为2倍
                blk.append(Residual(input_channel,out_channel,use_1x1=True,strides=2))
            else:
                # 第一层比较特殊,输入输出相同宽度,所以直接可都用out_channel变量
                blk.append(Residual(out_channel,out_channel))
        # print("make block ",blk)
        return blk

    def forward(self,x):

        y=self.bn1(x)
        y=self.bn2(y)
        y=self.bn3(y)
        y=self.bn4(y)
        y=self.bn5(y)
        y=self.full(y)
        return y

In [8]:
# 送一个数据进去实验一下
X=torch.rand(4,3,32,32)
net=ResetNet18(3,10)
out=net(X)
print(out.shape)

torch.Size([4, 10])


In [9]:
# 送到设备上
net.to(device);

In [10]:
criterion=nn.CrossEntropyLoss()
# 0.9倍当前的梯度+0.1倍上次的梯度
optimizer=optim.SGD(net.parameters(),lr=lr,momentum=0.9)

In [11]:
# 测试一下 dataloader
iterator=iter(train_loader)
data,label=next(iterator)
print(data.size(),label.size())

torch.Size([8, 3, 32, 32]) torch.Size([8])


In [12]:
def train():
    net.train()
    for idx,data in enumerate(train_loader):
        data,label=data
        data,label=data.to(device),label.to(device)
        optimizer.zero_grad()
        # forward
        outputs=net(data)
        loss=criterion(outputs,label)
        loss.backward()
        optimizer.step()


In [18]:
def test():
    # eval 模式下,dropout失效,bn层参数采用之前训练的,不更新
    net.eval()
    val_loss=0
    correct=0
    total_num=len(val_loader)
    # 不计算损失,这样速度更快
    with torch.no_grad():
        for idx,data in enumerate(val_loader):
            x,y=data
            x,y=x.to(device),y.to(device)
            optimizer.zero_grad()
            y_pre=net(x)
            val_loss+=criterion(y_pre,y).item()
            # max 第一个返回的是元素值,第二个为索引值
            # 求第一个维度的max,因此结果返回的是batch维度的max
            # 返回是一个第一个元素为值,第二个元素为idx的tuple
            pred=torch.max(y_pre,dim=1)[1]
            # pred维度为batch,每个元素为索引
            correct+=pred.eq(y).sum().item()
    print("test loss {},accuracy {:.4f}".format(val_loss,correct/total_num))
            
        

In [22]:
# 跑一个epoch大概十分钟
# 实在太慢,就不在mac上运行了
%%time
for epoch in range(epoch_total):
    print('epoch:',epoch)
    train()
    test()

epoch: 0


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x131702f70>
Traceback (most recent call last):
  File "/Users/ljjia/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/Users/ljjia/anaconda3/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1301, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Users/ljjia/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/Users/ljjia/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/Users/ljjia/anaconda3/envs/pytorch/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/Users/ljjia/anaconda3/envs/pytorch/lib/python3.8/selectors.py", line 415, in select
    

KeyboardInterrupt: 

NameError: name 'pred' is not defined