In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.models import vgg13
import matplotlib.pyplot as plt

In [None]:
transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download and load the training data
trainset = torchvision.datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
testset = torchvision.datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [None]:
from torchvision.models import vgg13
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Specify the path to the pre-trained model on Google Drive
model_weights_path = '/content/drive/MyDrive/Colab Notebooks/DeepLearning/cifar10_mini_vgg.pth'

# Load the VGG13 model
model_cifar = vgg13(pretrained=False)
model_cifar.features[0] = torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
# Load pre-trained weights
model_cifar.load_state_dict(torch.load(model_weights_path), strict=False)



_IncompatibleKeys(missing_keys=['features.15.weight', 'features.15.bias', 'features.17.weight', 'features.17.bias', 'features.20.weight', 'features.20.bias', 'features.22.weight', 'features.22.bias', 'classifier.0.weight', 'classifier.0.bias', 'classifier.3.weight', 'classifier.3.bias', 'classifier.6.weight', 'classifier.6.bias'], unexpected_keys=['classifier.weight', 'classifier.bias'])

In [None]:
num_classes = 10  # FashionMNIST has 10 classes
model_cifar.classifier[-1] = nn.Linear(4096, num_classes)
# Freeze the Pre-Trained Model Layers and unfreeze the last layer
for param in model_cifar.parameters():
    param.requires_grad = False
for param in model_cifar.classifier.parameters():
    param.requires_grad = True

In [None]:
model_cifar.to(device)
# Example optimizer and criterion
optimizer = torch.optim.SGD(model_cifar.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training loop
# Assuming you have your train_loader and test_loader ready
n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_cifar.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_cifar(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_cifar.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_cifar(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 1.6688
Epoch [1/5], Batch [200/938], Loss: 1.2877
Epoch [1/5], Batch [300/938], Loss: 1.0428
Epoch [1/5], Batch [400/938], Loss: 0.9650
Epoch [1/5], Batch [500/938], Loss: 0.8875
Epoch [1/5], Batch [600/938], Loss: 0.8163
Epoch [1/5], Batch [700/938], Loss: 0.9374
Epoch [1/5], Batch [800/938], Loss: 0.9274
Epoch [1/5], Batch [900/938], Loss: 0.8232
Epoch [1/5], Loss: 0.7891, Accuracy: 0.7554
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.8531
Epoch [2/5], Batch [200/938], Loss: 0.7039
Epoch [2/5], Batch [300/938], Loss: 0.7194
Epoch [2/5], Batch [400/938], Loss: 0.7951
Epoch [2/5], Batch [500/938], Loss: 0.6178
Epoch [2/5], Batch [600/938], Loss: 0.5953
Epoch [2/5], Batch [700/938], Loss: 0.6425
Epoch [2/5], Batch [800/938], Loss: 0.9206
Epoch [2/5], Batch [900/938], Loss: 0.5296
Epoch [2/5], Loss: 0.3505, Accuracy: 0.7778
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.6945
Epoch [3/5], 

In [None]:
model_weights_path = '/content/drive/MyDrive/Colab Notebooks/DeepLearning/mnist_mini_vgg.pth'

# Load the VGG13 model
model_mnist = vgg13(pretrained=False)
model_mnist.features[0] = torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
# Load pre-trained weights
model_mnist.load_state_dict(torch.load(model_weights_path), strict=False)

num_classes = 10  # FashionMNIST has 10 classes
model_mnist.classifier[-1] = nn.Linear(4096, num_classes)
# Freeze the Pre-Trained Model Layers and unfreeze the last layer
for param in model_mnist.parameters():
    param.requires_grad = False
for param in model_mnist.classifier.parameters():
    param.requires_grad = True

In [None]:
model_mnist.to(device)
# Example optimizer and criterion
optimizer = torch.optim.SGD(model_mnist.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training loop
# Assuming you have your train_loader and test_loader ready
n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_mnist.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_mnist(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_mnist.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_mnist(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 1.8341
Epoch [1/5], Batch [200/938], Loss: 1.3627
Epoch [1/5], Batch [300/938], Loss: 1.1587
Epoch [1/5], Batch [400/938], Loss: 1.2188
Epoch [1/5], Batch [500/938], Loss: 0.7901
Epoch [1/5], Batch [600/938], Loss: 1.1799
Epoch [1/5], Batch [700/938], Loss: 0.7516
Epoch [1/5], Batch [800/938], Loss: 0.8587
Epoch [1/5], Batch [900/938], Loss: 0.9752
Epoch [1/5], Loss: 0.9426, Accuracy: 0.7234
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.8981
Epoch [2/5], Batch [200/938], Loss: 0.7488
Epoch [2/5], Batch [300/938], Loss: 0.7884
Epoch [2/5], Batch [400/938], Loss: 0.7252
Epoch [2/5], Batch [500/938], Loss: 0.6263
Epoch [2/5], Batch [600/938], Loss: 0.7974
Epoch [2/5], Batch [700/938], Loss: 0.9739
Epoch [2/5], Batch [800/938], Loss: 0.6980
Epoch [2/5], Batch [900/938], Loss: 0.9488
Epoch [2/5], Loss: 0.7250, Accuracy: 0.7526
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.7779
Epoch [3/5], 

In [None]:
class MiniVGG(nn.Module):
    def __init__(self, num_classes= 10):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels= 1, out_channels= 64, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 64, out_channels= 64, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)),

            nn.Conv2d(in_channels= 64, out_channels= 128, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 128, out_channels= 128, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)),


            nn.Conv2d(in_channels= 128, out_channels= 256, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 256, out_channels= 256, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)))

        self.classifier = nn.Linear(256 * 4 * 4, 10)
        nn.init.normal_(self.classifier.weight, 0, 0.01)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = x.reshape(x.shape[0], -1)
        x = self.classifier(x)
        return x

In [None]:
model_HW3 = MiniVGG()

model_HW3.to(device)
# Example optimizer and criterion
optimizer = torch.optim.SGD(model_HW3.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training loop
# Assuming you have your train_loader and test_loader ready
n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_HW3.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_HW3(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_HW3.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_HW3(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 2.3025
Epoch [1/5], Batch [200/938], Loss: 2.3000
Epoch [1/5], Batch [300/938], Loss: 2.3002
Epoch [1/5], Batch [400/938], Loss: 2.2983
Epoch [1/5], Batch [500/938], Loss: 2.2927
Epoch [1/5], Batch [600/938], Loss: 2.2869
Epoch [1/5], Batch [700/938], Loss: 2.2326
Epoch [1/5], Batch [800/938], Loss: 1.4566
Epoch [1/5], Batch [900/938], Loss: 1.2200
Epoch [1/5], Loss: 1.2950, Accuracy: 0.5998
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.9145
Epoch [2/5], Batch [200/938], Loss: 0.8455
Epoch [2/5], Batch [300/938], Loss: 0.6973
Epoch [2/5], Batch [400/938], Loss: 0.7478
Epoch [2/5], Batch [500/938], Loss: 0.8678
Epoch [2/5], Batch [600/938], Loss: 0.9358
Epoch [2/5], Batch [700/938], Loss: 0.4953
Epoch [2/5], Batch [800/938], Loss: 0.5487
Epoch [2/5], Batch [900/938], Loss: 0.5761
Epoch [2/5], Loss: 0.4867, Accuracy: 0.7534
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.5639
Epoch [3/5], 

Mô hình được train với CIFAR-10 cho accuracy cao nhất sau 5 epochs. Mô hình này có accuracy cao nhất là vì có thể dataset CIFAR có nhiều feature gần giống với dataset FashionMNIST so với dataset MNIST. Tuy nhiên nếu train thêm nhiều epochs nữa thì model MiniVGG train from scratch sẽ tốt hơn 2 model pretrain vì các layer sẽ fit được với FashionMNIST dataset tốt hơn 2 model pretrain kia. 2 model pretrain kia thì các layer đã bị freeze do vậy nếu train thêm nhiều epoch nữa thì layer của model cũng sẽ không học được thêm điều gì

In [None]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

train_nodes, eval_nodes = get_graph_node_names(model_HW3)

In [None]:
train_nodes

['x',
 'features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'getattr',
 'getitem',
 'reshape',
 'classifier']

In [None]:
create_feature_extractor(model_HW3, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=4096, out_features=10, bias=True)
)

In [None]:
model_HW3.features[0].weight

Parameter containing:
tensor([[[[-0.2087, -0.2866, -0.3324],
          [-0.0860,  0.0231,  0.2356],
          [ 0.3145,  0.0366, -0.0483]]],


        [[[ 0.2092, -0.0196, -0.1657],
          [-0.1756, -0.3085,  0.2433],
          [-0.2368,  0.1833, -0.0705]]],


        [[[ 0.0116,  0.0642,  0.0187],
          [ 0.3022, -0.3126, -0.2176],
          [-0.2947,  0.1380,  0.2321]]],


        [[[-0.0561,  0.2840,  0.2432],
          [ 0.2430, -0.1524, -0.2603],
          [-0.0970, -0.2330, -0.0209]]],


        [[[-0.2607,  0.1875, -0.2858],
          [-0.0177,  0.2444, -0.0772],
          [-0.1152, -0.1637, -0.2856]]],


        [[[-0.1757, -0.2775,  0.3470],
          [-0.2766, -0.0583, -0.0941],
          [ 0.0417,  0.1752, -0.2729]]],


        [[[ 0.2849, -0.2380,  0.0088],
          [-0.2949,  0.0592,  0.1524],
          [-0.1234, -0.2475,  0.0695]]],


        [[[ 0.0557,  0.2448, -0.2138],
          [-0.3183, -0.0678, -0.1060],
          [ 0.2609, -0.1744, -0.2714]]],


        [[