In [1]:
import torch
from torch import nn
from torchvision import datasets,models,transforms
import numpy as np
from tqdm import tqdm

In [2]:
batch_size=64
learning_rate=1e-3
num_epochs=5

In [3]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [4]:
train_dataset=datasets.CIFAR10(root='../data',download=True,train=True,transform=transforms.ToTensor())
test_dataset=datasets.CIFAR10(root='../data',download=True,train=False,transform=transforms.ToTensor())

In [5]:
train_loader=torch.utils.data.DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_loader=torch.utils.data.DataLoader(test_dataset,shuffle=True,batch_size=batch_size)

In [6]:
image,label=next(iter(train_loader))
image.shape

torch.Size([64, 3, 32, 32])

In [7]:
# transform在索引和进入到loader之后才会有效
# totensor函数执行两个功能：
#1.将0-255 -> 0-1：防止模型学不动
#2.将维度从(h,w,c)->(c,h,w):提高cuda算子的计算性能
train_dataset.data.shape

(50000, 32, 32, 3)

In [8]:
# transform在索引才会有效
image,label=train_dataset[0]
image.shape

torch.Size([3, 32, 32])

**VGG**

In [9]:
vgg=models.vgg16(pretrained=True)
vgg



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [10]:
from torchsummary import summary

In [11]:
summary(vgg,input_size=(3,224,224),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [12]:
in_features=vgg.classifier[6].in_features
vgg.classifier[6]=nn.Linear(in_features,10)
vgg
vgg.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [13]:
summary(vgg,input_size=(3,224,224),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [14]:
def get_mean_and_std(dataset):
    dataloader=torch.utils.data.DataLoader(dataset=dataset,batch_size=1,shuffle=True,num_workers=2)
    mean=torch.zeros(3)
    std=torch.zeros(3)
    for image,label in tqdm(dataloader):
        for i in range(3):
            mean[i]+=image[:,i,:,:].mean()
            std[i]+=image[:,i,:,:].std()
    mean.div_(len(dataset))
    std.div_(len(dataset))
    return mean,std

In [15]:
get_mean_and_std(train_dataset)

100%|█████████████████████████████████████████████████████████████████████| 50000/50000 [00:19<00:00, 2550.24it/s]


(tensor([0.4914, 0.4822, 0.4465]), tensor([0.2023, 0.1994, 0.2010]))

In [16]:
transform=transforms.Compose([
        #resize自动检测PIL的长和高的维度
        transforms.Resize(size=(224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),std=(0.2023, 0.1994, 0.2010))
    ])

In [17]:
train_dataset=datasets.CIFAR10(root='../data',download=True,train=True,transform=transform)
test_dataset=datasets.CIFAR10(root='../data',download=True,train=False,transform=transform)

In [18]:
train_loader=torch.utils.data.DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_loader=torch.utils.data.DataLoader(test_dataset,shuffle=True,batch_size=batch_size)

In [19]:
image,label=next(iter(train_loader))
image.shape

torch.Size([64, 3, 224, 224])

**Finetuning**

In [20]:
in_features=vgg.classifier[6].in_features
vgg.classifier[6]=nn.Linear(in_features,10)
vgg
vgg.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [21]:
summary(vgg,input_size=(3,224,224),device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [22]:
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(vgg.parameters(),lr=learning_rate,momentum=0.9,weight_decay=5e-4)
total_batch=len(train_loader)

In [None]:
for epoch in range(num_epochs):
    for batch_idx,(image,label) in enumerate(train_loader):
        image=image.to(device)
        lavel=label.to(device)
        out=vgg(image)
        loss=criterion(out,label)
        acc=(torch.argmax(out,axis=1)==label).sum().item()/label.shape[0]
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print(f'epochs:{epoch+1}/{num_epochs},batch:{batch_idx+1}/{total_batch},loss:{loss.item():.4f}, acc:{acc}')

epochs:1/5,batch:1/782,loss:1.7035, acc:0.453125
epochs:1/5,batch:2/782,loss:1.6006, acc:0.46875
epochs:1/5,batch:3/782,loss:1.5334, acc:0.5625
epochs:1/5,batch:4/782,loss:1.5381, acc:0.4375
epochs:1/5,batch:5/782,loss:1.4030, acc:0.5625
epochs:1/5,batch:6/782,loss:1.4185, acc:0.546875
epochs:1/5,batch:7/782,loss:1.0546, acc:0.671875
epochs:1/5,batch:8/782,loss:1.0609, acc:0.671875
epochs:1/5,batch:9/782,loss:1.1741, acc:0.5625
