<a href="https://colab.research.google.com/github/BATiger/Classic/blob/main/ResNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import math
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as T
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [None]:
# ReLU inplace = True, 

class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()

        self.basic = nn.Sequential(
            nn.Conv2d(inchannel,outchannel,3,stride=stride,padding=1,bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel,outchannel,3,stride=1,padding=1,bias=False),
            nn.BatchNorm2d(outchannel),
        )

        self.shortcut = nn.Sequential()

        # 这里是为了处理input和output feature map尺寸不同的问题
        if stride != 1 or inchannel != outchannel:

            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1,stride=stride,padding=0,bias=False),
                nn.BatchNorm2d(outchannel)
            )
      
    def forward(self, x):
          
          out = self.basic(x)
          out += self.shortcut(x)
          out = F.relu(out)
          return out

class ResNet18(nn.Module):
    def __init__(self, ResidualBlock, num_classes=10):
        super(ResNet18, self).__init__()

        self.inchannel = 64
        # 3,32,32
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size = 3, stride = 1,
                                            padding = 1, bias = False), 
                                  nn.BatchNorm2d(64), 
                                  nn.ReLU())
        # 64, 32, 32
        self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride = 1)
        # 128,
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride = 2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride = 2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride = 2)
        self.maxpool = nn.MaxPool2d(4)
        self.fc = nn.Linear(512, num_classes)
        
    
    def make_layer(self, block, channels, num_blocks, stride):
        
        strides = [stride] + [1] * (num_blocks - 1)
        
        layers = []
        
        for stride in strides:
            
            layers.append(block(self.inchannel, channels, stride))
            
            self.inchannel = channels
            
        return nn.Sequential(*layers)
    
    
    def forward(self, x):
        
        x = self.conv1(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.maxpool(x)
        
        x = x.view(x.size(0), -1)
        
        x = self.fc(x)
        
        return x


In [None]:
train_transforms = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010)),
    T.RandomHorizontalFlip(p=0.2),
    T.RandomRotation(5)                        
])

test_transforms = T.Compose([
    T.ToTensor(),
    T.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010))                      
])

In [None]:
data_dir = './data'

trainset = datasets.CIFAR10(root=data_dir, train=True,download=True, transform=train_transforms)
loader_train = DataLoader(trainset, batch_size=64,shuffle=True)

cifar10_test = datasets.CIFAR10(root=data_dir, train=False,download=True, transform=test_transforms)
loader_test = DataLoader(cifar10_test, batch_size=64,shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
USE_GPU = True
dtype = torch.float32 

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
    

print_every = 100
def check_accuracy(loader, model):
    # function for test accuracy on validation and test set
    
    if type(loader_train.dataset) is torch.utils.data.dataset.Subset:
        print('Checking accuracy on validation set')
    else:
        if loader.dataset.train:
            print('Checking accuracy on validation set')
        else:
            print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        return acc

        

def train_part(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    print(len(loader_train))
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            loss.backward()

            # Update the parameters of the model using the gradients
            optimizer.step()

            if type(loader_train.dataset) is not torch.utils.data.dataset.Subset:
              if t % print_every == 0:
                  print('Epoch: %d, Iteration %d, loss = %.4f' % (e, t, loss.item()))
                  #check_accuracy(loader_val, model)
                  print()

In [None]:
print(len(trainset))

best_lr = 0.001

# define and train the network
model = ResNet18(ResidualBlock)
optimizer = optim.Adam(model.parameters(),lr = best_lr)

train_part(model, optimizer, epochs = 10)

# report test set accuracy

check_accuracy(loader_test, model)


50000
782
Epoch: 0, Iteration 0, loss = 3.6695

Epoch: 0, Iteration 100, loss = 1.4540

Epoch: 0, Iteration 200, loss = 1.2053

Epoch: 0, Iteration 300, loss = 1.2100

Epoch: 0, Iteration 400, loss = 1.3632

Epoch: 0, Iteration 500, loss = 1.3581

Epoch: 0, Iteration 600, loss = 0.9987

Epoch: 0, Iteration 700, loss = 1.1465

Epoch: 1, Iteration 0, loss = 0.9044

Epoch: 1, Iteration 100, loss = 0.8772

Epoch: 1, Iteration 200, loss = 0.8067

Epoch: 1, Iteration 300, loss = 0.6624

Epoch: 1, Iteration 400, loss = 0.9255

Epoch: 1, Iteration 500, loss = 0.8193

Epoch: 1, Iteration 600, loss = 0.7699

Epoch: 1, Iteration 700, loss = 1.1003

Epoch: 2, Iteration 0, loss = 0.6204

Epoch: 2, Iteration 100, loss = 0.4449

Epoch: 2, Iteration 200, loss = 0.5326

Epoch: 2, Iteration 300, loss = 0.4919

Epoch: 2, Iteration 400, loss = 0.7924

Epoch: 2, Iteration 500, loss = 0.5108

Epoch: 2, Iteration 600, loss = 0.7154

Epoch: 2, Iteration 700, loss = 0.5611

Epoch: 3, Iteration 0, loss = 0.4416

0.84