In [1]:
import torch
import torchvision 
import torch.nn.functional as F  
import torchvision.datasets as datasets  # Standard datasets
import torchvision.transforms as transforms
from torch import optim  # For optimizers SGD, Adam, etc.
from torch import nn  # All neural network modules
from torch.utils.data import DataLoader
from tqdm import tqdm # For nice progress bar!

In [2]:
target_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
num_classes = 10
learning_rate = 1e-3
batch_size = 1024
num_epochs = 5

In [4]:
# Simple Identity class that let's input pass without changes
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

In [19]:
# Load pretrain model & modify it
model = torchvision.models.vgg16(pretrained=True)

# If you want to do finetuning then set requires_grad = False
# Remove these two lines if you want to train entire model

In [20]:
# vanila way
# for param in model.parameters():
#    param.requires_grad = False

# freeze layers according to the number of layer
layer = 0
for param in model.parameters():
    if layer <= 30 :
        param.requires_grad = False
    layer = layer + 1

for k,v in model.named_parameters():
     if k!='XXX':
         v.requires_grad=False#固定参数
for k,v in model.named_parameters():
    if k!='xxx.weight' and k!='xxx.bias' :
            print(v.requires_grad)#理想状态下，所有值都是False

# Sometimes we will change datasets, so changing some parameter of ther network is necessary

In [21]:
model.avgpool = Identity() ## .avgpool is the name of a layer in VGG
model.classifier = nn.Sequential(
    nn.Linear(512, 100), nn.ReLU(), nn.Linear(100, num_classes)
)
model.to(target_device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

# load dataset

In [22]:
train_dataset = datasets.CIFAR10(root="dataset/", train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

Files already downloaded and verified


# Loss and optimizer

In [23]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
for epoch in range(num_epochs):
    for batch_idx , ( data, targets ) in enumerate( tqdm( train_loader ) ):
        # Get data to cuda if possible
        data = data.to( device = target_device )
        targets = targets.to( device = target_device )

        # Get to correct shape
        # data.shape[0] usually is the number of examples in a batch
        # in this case, just set each sample to a long bar
        # data = data.reshape(data.shape[0], -1) this is done before fully connected layer
        # not in CNN

        # forward
        predictions = model(data)
        #targets are true labels
        loss = loss_func(predictions , targets)

        # backward
        optimizer.zero_grad() # set the gradients to zero, so that we are ready for the next loop
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [00:28<00:00,  1.74it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [00:28<00:00,  1.72it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [00:28<00:00,  1.72it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [00:28<00:00,  1.72it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [00:28<00:00,  1.72it/s]


In [15]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=target_device)
            y = y.to(device=target_device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return num_correct/num_samples

print(f"Accuracy on training set: {check_accuracy(train_loader, model):.2f}")


Accuracy on training set: 0.63
