In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from torchvision.models import vgg13
import matplotlib.pyplot as plt

In [None]:
transform = transforms.Compose([
    transforms.Resize((28,28)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download and load the training data
trainset = torchvision.datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
testset = torchvision.datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 16185612.10it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 300116.81it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 4933676.86it/s]


Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 16686458.26it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw






In [None]:
class MiniVGG(nn.Module):
    def __init__(self, num_classes= 10):
        super(MiniVGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels= 1, out_channels= 64, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 64, out_channels= 64, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)),

            nn.Conv2d(in_channels= 64, out_channels= 128, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 128, out_channels= 128, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)),

            nn.Conv2d(in_channels= 128, out_channels= 256, kernel_size= (3,3), stride= (1,1), padding= 1),
            nn.ReLU(),
            nn.Conv2d(in_channels= 256, out_channels= 256, kernel_size= (3,3), stride=(1,1), padding= 1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= (2,2), stride= (2,2)))

        self.classifier = nn.Linear(256 * 3 * 3, 10)
        nn.init.normal_(self.classifier.weight, 0, 0.01)
        nn.init.constant_(self.classifier.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
from torchvision.models import vgg13
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Specify the path to the pre-trained model on Google Drive
model_weights_path = '/content/drive/MyDrive/Colab Notebooks/DeepLearning/cifar10_mini_vgg.pth'

# Load the VGG13 model
model_cifar = MiniVGG() #vgg13(pretrained=False)
# model_cifar.features[0] = torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
# Load pre-trained weights
model_cifar.load_state_dict(torch.load(model_weights_path), strict=False)

<All keys matched successfully>

In [None]:
# num_classes = 10  # FashionMNIST has 10 classes
# model_cifar.classifier[-1] = nn.Linear(4096, num_classes)
# Freeze the Pre-Trained Model Layers and unfreeze the last layer
for param in model_cifar.parameters():
    param.requires_grad = False
for param in model_cifar.classifier.parameters():
    param.requires_grad = True

In [None]:
model_cifar.to(device)
# Example optimizer and criterion
optimizer = torch.optim.SGD(model_cifar.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training loop
# Assuming you have your train_loader and test_loader ready
n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_cifar.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_cifar(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_cifar.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_cifar(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 1.8142
Epoch [1/5], Batch [200/938], Loss: 1.1092
Epoch [1/5], Batch [300/938], Loss: 0.8716
Epoch [1/5], Batch [400/938], Loss: 0.4554
Epoch [1/5], Batch [500/938], Loss: 0.6518
Epoch [1/5], Batch [600/938], Loss: 0.7709
Epoch [1/5], Batch [700/938], Loss: 0.8202
Epoch [1/5], Batch [800/938], Loss: 0.5456
Epoch [1/5], Batch [900/938], Loss: 0.6313
Epoch [1/5], Loss: 0.8541, Accuracy: 0.8307
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.6013
Epoch [2/5], Batch [200/938], Loss: 0.2657
Epoch [2/5], Batch [300/938], Loss: 0.3865
Epoch [2/5], Batch [400/938], Loss: 0.4239
Epoch [2/5], Batch [500/938], Loss: 0.4479
Epoch [2/5], Batch [600/938], Loss: 0.6449
Epoch [2/5], Batch [700/938], Loss: 0.4650
Epoch [2/5], Batch [800/938], Loss: 0.4998
Epoch [2/5], Batch [900/938], Loss: 0.2791
Epoch [2/5], Loss: 0.3730, Accuracy: 0.8495
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.5291
Epoch [3/5], 

In [None]:
model_weights_path = '/content/drive/MyDrive/Colab Notebooks/DeepLearning/mnist_mini_vgg.pth'

# Load the VGG13 model
model_mnist = MiniVGG() # vgg13(pretrained=False)
# model_mnist.features[0] = torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
# Load pre-trained weights
model_mnist.load_state_dict(torch.load(model_weights_path), strict=False)

# num_classes = 10  # FashionMNIST has 10 classes
# model_mnist.classifier[-1] = nn.Linear(4096, num_classes)
# Freeze the Pre-Trained Model Layers and unfreeze the last layer
for param in model_mnist.parameters():
    param.requires_grad = False
for param in model_mnist.classifier.parameters():
    param.requires_grad = True

In [None]:
model_mnist.to(device)
# Example optimizer and criterion
optimizer = torch.optim.SGD(model_mnist.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training loop
# Assuming you have your train_loader and test_loader ready
n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_mnist.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_mnist(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_mnist.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_mnist(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 1.3604
Epoch [1/5], Batch [200/938], Loss: 0.6518
Epoch [1/5], Batch [300/938], Loss: 0.7750
Epoch [1/5], Batch [400/938], Loss: 0.6119
Epoch [1/5], Batch [500/938], Loss: 0.5567
Epoch [1/5], Batch [600/938], Loss: 1.0333
Epoch [1/5], Batch [700/938], Loss: 0.6334
Epoch [1/5], Batch [800/938], Loss: 0.7004
Epoch [1/5], Batch [900/938], Loss: 0.5256
Epoch [1/5], Loss: 0.9660, Accuracy: 0.7869
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.8204
Epoch [2/5], Batch [200/938], Loss: 0.6716
Epoch [2/5], Batch [300/938], Loss: 0.5600
Epoch [2/5], Batch [400/938], Loss: 0.6921
Epoch [2/5], Batch [500/938], Loss: 0.5475
Epoch [2/5], Batch [600/938], Loss: 0.5556
Epoch [2/5], Batch [700/938], Loss: 0.5907
Epoch [2/5], Batch [800/938], Loss: 0.6066
Epoch [2/5], Batch [900/938], Loss: 0.5051
Epoch [2/5], Loss: 0.5854, Accuracy: 0.8120
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.5291
Epoch [3/5], 

In [None]:
model_HW3 = MiniVGG()

model_HW3.to(device)
# Example optimizer and criterion
optimizer = torch.optim.SGD(model_HW3.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training loop
# Assuming you have your train_loader and test_loader ready
n_total_batches = len(trainloader)
num_epochs = 5

for epoch in range(num_epochs):
    model_HW3.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model_HW3(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{n_total_batches}], Loss: {loss.item():.4f}')
    # Validation
    model_HW3.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model_HW3(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        print('-' * 40)


Epoch [1/5], Batch [100/938], Loss: 2.3021
Epoch [1/5], Batch [200/938], Loss: 2.3012
Epoch [1/5], Batch [300/938], Loss: 2.3002
Epoch [1/5], Batch [400/938], Loss: 2.2969
Epoch [1/5], Batch [500/938], Loss: 2.2951
Epoch [1/5], Batch [600/938], Loss: 2.2888
Epoch [1/5], Batch [700/938], Loss: 2.2649
Epoch [1/5], Batch [800/938], Loss: 2.0005
Epoch [1/5], Batch [900/938], Loss: 1.2626
Epoch [1/5], Loss: 1.1689, Accuracy: 0.4848
----------------------------------------
Epoch [2/5], Batch [100/938], Loss: 0.8911
Epoch [2/5], Batch [200/938], Loss: 0.9897
Epoch [2/5], Batch [300/938], Loss: 0.7467
Epoch [2/5], Batch [400/938], Loss: 0.9631
Epoch [2/5], Batch [500/938], Loss: 0.7620
Epoch [2/5], Batch [600/938], Loss: 0.8269
Epoch [2/5], Batch [700/938], Loss: 0.6882
Epoch [2/5], Batch [800/938], Loss: 0.6701
Epoch [2/5], Batch [900/938], Loss: 0.6189
Epoch [2/5], Loss: 0.6655, Accuracy: 0.7484
----------------------------------------
Epoch [3/5], Batch [100/938], Loss: 0.7168
Epoch [3/5], 

Mô hình được train với CIFAR-10 cho accuracy cao nhất sau 5 epochs. Mô hình này có accuracy cao nhất là vì có thể dataset CIFAR có nhiều feature gần giống với dataset FashionMNIST so với dataset MNIST. Tuy nhiên nếu train thêm nhiều epochs nữa thì model MiniVGG train from scratch sẽ tốt hơn 2 model pretrain vì các layer sẽ fit được với FashionMNIST dataset tốt hơn 2 model pretrain kia. 2 model pretrain kia thì các layer đã bị freeze do vậy nếu train thêm nhiều epoch nữa thì layer của model cũng sẽ không học được điều gìtốt

In [None]:
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor

train_nodes, eval_nodes = get_graph_node_names(model_HW3)

In [None]:
train_nodes

['x',
 'features.0',
 'features.1',
 'features.2',
 'features.3',
 'features.4',
 'features.5',
 'features.6',
 'features.7',
 'features.8',
 'features.9',
 'features.10',
 'features.11',
 'features.12',
 'features.13',
 'features.14',
 'flatten',
 'classifier']

In [None]:
create_feature_extractor(model_HW3, train_return_nodes= train_nodes, eval_return_nodes= eval_nodes)

MiniVGG(
  (features): Module(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Linear(in_features=2304, out_features=10, bias=True)
)

In [None]:
model_HW3.features[0].weight

Parameter containing:
tensor([[[[-4.9261e-02, -3.3996e-02,  1.3932e-01],
          [-2.9171e-01, -1.2294e-01, -2.0583e-01],
          [-1.8300e-01, -1.1332e-01, -4.3617e-02]]],


        [[[-2.8938e-01,  2.2051e-01, -6.5313e-02],
          [ 2.4501e-01,  3.1906e-01,  1.0882e-01],
          [-1.8060e-02, -2.8399e-01, -3.1272e-01]]],


        [[[-1.2573e-02, -3.5681e-02, -2.2704e-02],
          [ 1.9991e-01, -1.2565e-01,  2.1561e-01],
          [ 1.6551e-01, -4.7146e-02, -2.1403e-01]]],


        [[[ 1.7682e-01,  2.4552e-01, -5.4870e-02],
          [-1.1699e-01, -2.9124e-01, -2.7061e-01],
          [ 2.8950e-01,  2.9127e-01,  1.0661e-01]]],


        [[[ 1.4823e-02, -1.4314e-01,  1.5886e-01],
          [-1.0912e-01, -2.7046e-02, -1.2821e-01],
          [ 1.1047e-01, -1.5646e-01,  2.6694e-01]]],


        [[[ 2.6849e-01,  3.1240e-01, -3.0154e-01],
          [-2.0837e-01, -2.0277e-01, -1.7017e-01],
          [-2.0373e-01,  1.4033e-01,  1.3967e-01]]],


        [[[-1.1505e-01, -1.1403e-01,