Cho 2 pretrained model trên 2 dataset CIFAR-10 và MNIST, finetune trên dataset mới FashionMNIST (có trong thư viện torchvision).\
# **Data augmentation**:
normalize ảnh từ scale [0, 255] về [-1,1], sử dụng ít nhất 2 trong số các transformations dưới đây cho dataset \
Random resized\
Center cropping\
Random vertical flipping\
Random horizontal flipping\
Các loại transformation khác có thể tham khảo ở đây https://pytorch.org/vision/master/transforms.html

In [1]:
import os
import time
import copy

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms

In [2]:
# Đ/n ransformations cho data augmentation
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(28, scale=(0.8, 1.2)),
    transforms.CenterCrop(24),
    transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
# Tạo DataLoader cho FashionMNIST với data augmentation
data_dir = './data'  # Thay đổi đường dẫn nếu cần thiết
batch_size = 64
train_dataset = datasets.FashionMNIST(root=data_dir, train = True, transform = transform_train, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [4]:
# Đ/n ransformations cho data augmentation
transform_test = transforms.Compose([
    transforms.RandomResizedCrop(28, scale=(0.8, 1.2)),
    transforms.CenterCrop(24),
    transforms.RandomVerticalFlip(),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
test_dataset = datasets.FashionMNIST(root = data_dir, train = False, transform = transform_test, download = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# **Fine-tuning**:
Cho mô hình MiniVGG như dưới, hãy load 2 pretrained models vào model MiniVGG ở dưới. Freeze self.features và train lớp cuối (self.classifier) của 2 pretrained models trên dataset FashionMNIST. Sau đó, train model MiniVGG from scratch trên tập FashionMNIST.

In [5]:
# Define model
class MiniVGG(nn.Module):
    def __init__(
        self,
        ):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256 * 3 * 3, 10)
        nn.init.normal_(self.classifier.weight, 0, 0.01)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(
        self,
        x,
        ):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [6]:
#checking for device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create the model and define loss and optimizer
# Initialize the model and optimizer
model = MiniVGG()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
# Load Cifar-10 pretrained model
loaded_model  = torch.load('cifar10_mini_vgg.pth',map_location=torch.device('cpu'))
# Load pre-trained weights
model.load_state_dict(loaded_model)
for param in model.classifier.parameters():
    param.require_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True

In [8]:
# Put the model to cuda if available
model = model.to(device)

epochs = 5
best_accuracy = 0.0

# Training loop
for epoch in range(epochs):
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))


    train_loss = train_loss / len(train_dataset)

    # Evaluation on testing dataset
    model.eval()

    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())

        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))

    test_accuracy = test_accuracy / len(test_dataset)


    print(f'Epoch [{epoch + 1}/{epochs}]: - Train Loss:{train_loss:.4f}  - Test_Accuracy: {test_accuracy:.4f}')

    #Save the best model
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy = test_accuracy

print() 

print("Best Acc: {:.4f}".format(best_accuracy))

Epoch [1/5]: - Train Loss:0.5435  - Test_Accuracy: 0.8469
Epoch [2/5]: - Train Loss:0.3819  - Test_Accuracy: 0.8674
Epoch [3/5]: - Train Loss:0.3480  - Test_Accuracy: 0.8758
Epoch [4/5]: - Train Loss:0.3248  - Test_Accuracy: 0.8740
Epoch [5/5]: - Train Loss:0.3089  - Test_Accuracy: 0.8812

Best Acc: 0.8812


In [9]:
# Train on MNIST pretrained model
model = MiniVGG()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Load pretrained weights
pretrained_model_weights = torch.load('mnist_mini_vgg.pth',map_location=torch.device('cpu'))
# Load pre-trained weights
model.load_state_dict(pretrained_model_weights)
for param in model.classifier.parameters():
    param.require_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True
    
# Put the model to cuda if available
model = model.to(device)

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * inputs.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))


    train_loss = train_loss / len(train_dataset)
    train_accuracy = train_accuracy / len(train_dataset)

    # Evaluation on testing dataset
    model.eval()

    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())

        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))

    test_accuracy = test_accuracy / len(test_dataset)


    print(f'Epoch [{epoch + 1}/{epochs}]: - Train Loss:{train_loss:.4f}  - Test_Accuracy: {test_accuracy:.4f}')

    #Save the best model
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy = test_accuracy

print()
print("Best Acc: {:.4f}".format(best_accuracy))

Epoch [1/5]: - Train Loss:0.5939  - Test_Accuracy: 0.8343
Epoch [2/5]: - Train Loss:0.4103  - Test_Accuracy: 0.8552
Epoch [3/5]: - Train Loss:0.3692  - Test_Accuracy: 0.8638
Epoch [4/5]: - Train Loss:0.3443  - Test_Accuracy: 0.8689
Epoch [5/5]: - Train Loss:0.3270  - Test_Accuracy: 0.8753

Best Acc: 0.8812


# Feature extractor:
Với model train from scratch, dùng get_graph_node_names() và create_feature_extractor từ thư viện torchvision.models.feature_extraction để in ra tên layer và weight của layer tương ứng

In [10]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
import matplotlib.pyplot as plt

In [11]:
train_nodes, val_nodes = get_graph_node_names(model)
print(train_nodes)

['x', 'features.0', 'features.1', 'features.2', 'features.3', 'features.4', 'features.5', 'features.6', 'features.7', 'features.8', 'features.9', 'features.10', 'features.11', 'features.12', 'features.13', 'features.14', 'flatten', 'classifier']


  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


In [13]:
create_feature_extractor(model, train_return_nodes= train_nodes, eval_return_nodes= val_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [14]:
model.features[0].weight

Parameter containing:
tensor([[[[ 0.1789,  0.2818,  0.1133],
          [ 0.1632, -0.1582, -0.0233],
          [-0.2986, -0.2325,  0.1045]]],


        [[[ 0.0605, -0.1875,  0.2379],
          [ 0.2820, -0.3540, -0.1131],
          [ 0.3466, -0.3103,  0.0138]]],


        [[[ 0.3106, -0.3086, -0.1879],
          [-0.0352, -0.3174,  0.2216],
          [ 0.2336, -0.1717,  0.2935]]],


        [[[ 0.2665, -0.1507, -0.2782],
          [ 0.0646, -0.1715, -0.0378],
          [ 0.3592,  0.1795, -0.1527]]],


        [[[-0.0974, -0.1239,  0.1665],
          [-0.2577, -0.1411,  0.2315],
          [-0.1613,  0.1740,  0.1831]]],


        [[[ 0.1898,  0.1099,  0.0668],
          [-0.3016,  0.2496,  0.2150],
          [-0.3411, -0.3160,  0.2183]]],


        [[[-0.1961,  0.1626, -0.0047],
          [ 0.2176,  0.3377, -0.0160],
          [ 0.0019, -0.2053, -0.2903]]],


        [[[-0.0644,  0.1090, -0.0593],
          [ 0.1305,  0.0027,  0.2403],
          [-0.1884, -0.2643,  0.0780]]],


        [[