In [1]:
# import
from cifardef import *
from PIL import Image  
import torch,math,os
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [30]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=3, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, ResidualBlock, layers, num_classes=10):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self.make_layer(block, 64, layers[0])
        self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)


    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)   #strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


def ResNet18():

    return ResNet(ResidualBlock)


In [31]:
PATH='ResNet_origin.pkl'
model = ResNet18()
if os.path.isfile(PATH):
    model.load_state_dict(torch.load(PATH))
    print(model.eval())
else:
    print('No model')

No model


In [32]:
batch_size = 128
learning_rate = 0.1
num_epoches = 1000
transform = transforms.Compose(
    [
        transforms.ToTensor(),
#         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  #先四周填充0，在吧图像随机裁剪成32*32
    transforms.RandomHorizontalFlip(),  #图像一半的概率翻转，一半的概率不翻转
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #R,G,B每层的归一化用到的均值和方差
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
# train_dataset = cifar_10('data/cifar-10-batches-py',
#                          train=True, transform=transform,index=2)
test_dataset = cifar_10('data/cifar-10-batches-py',
                        train=False, transform=transform_test)
meta=unpickle('data/cifar-10-batches-py/batches.meta')
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

train_datasetList = [ cifar_10('data/cifar-10-batches-py',
                         train=True, transform=transform_train,index=i+1) for i in range(5)]
train_loaderList=[DataLoader(t, batch_size=batch_size, shuffle=True) for t in train_datasetList]

# 定义loss和optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
# optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) #优化方式为mini-batch momentum-SGD，并采用L2正则化（权重衰减）

cuda_gpu = torch.cuda.is_available()
if cuda_gpu:
    model = model.cuda()
log=open(PATH.split('.')[0]+'.log','a')
best_acc=0


In [33]:
res={}
epoch=1
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (left): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (left): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05

In [34]:
epochs1=135
epochs2=185
epochs3=240
epochs=epochs3

optimizer = optim.SGD(model.parameters(), lr=0.1)
while epoch<=epochs1:
    print('Train Epoch: {}/{}: lr = {}'.format(epoch,epochs,0.1))
    print('Train Epoch: {}/{}:'.format(epoch,epochs),file=log)
    for t in train_loaderList:
        train(model,num_epoches,criterion,optimizer,t,file=log)
    if(epoch%(1)==0):
        res[epoch]={}
        acc, loss = test(model, 1, criterion, test_loader,log)
        res[epoch]['epoch']=epoch
        res[epoch]['acc']=float(acc)
        res[epoch]['loss']=float(loss)
        if acc>best_acc:
            best_acc=acc
            print('better model,saved as',PATH)
            torch.save(model.state_dict(),PATH)
    epoch+=1
optimizer = optim.SGD(model.parameters(), lr=0.01)

while epoch<=epochs2:
    print('Train Epoch: {}/{}: lr = {}'.format(epoch,epochs,0.01))
    print('Train Epoch: {}/{}:'.format(epoch,epochs),file=log)
    for t in train_loaderList:
        train(model,num_epoches,criterion,optimizer,t,file=log)
    if(epoch%(1)==0):
        res[epoch]={}
        acc, loss = test(model, 1, criterion, test_loader,log)
        res[epoch]['epoch']=epoch
        res[epoch]['acc']=float(acc)
        res[epoch]['loss']=float(loss)
        if acc>best_acc:
            best_acc=acc
            print('better model,saved as',PATH)
            torch.save(model.state_dict(),PATH)
    epoch+=1

optimizer = optim.SGD(model.parameters(), lr=0.001)

while epoch<=epochs3:
    print('Train Epoch: {}/{}: lr = {}'.format(epoch,epochs,0.001))
    print('Train Epoch: {}/{}:'.format(epoch,epochs),file=log)
    for t in train_loaderList:
        train(model,num_epoches,criterion,optimizer,t,file=log)
    if(epoch%(1)==0):
        res[epoch]={}
        acc, loss = test(model, 1, criterion, test_loader,log)
        res[epoch]['epoch']=epoch
        res[epoch]['acc']=float(acc)
        res[epoch]['loss']=float(loss)
        if acc>best_acc:
            best_acc=acc
            print('better model,saved as',PATH)
            torch.save(model.state_dict(),PATH)  
    epoch+=1
log.close()

Train Epoch: 1/240: lr = 0.1


RuntimeError: The size of tensor a (12) must match the size of tensor b (8) at non-singleton dimension 3

In [None]:
torch.save(model.state_dict(), PATH)

In [None]:
test_visual(model,'data/cifar-10-batches-py',meta,transform_test)

In [None]:
test(model, 1, criterion, test_loader)

In [None]:
pd.DataFrame(res).transpose()

In [None]:
log.close()