# BÀI THỰC HÀNH 2: MẠNG NEURAL TÍCH CHẬP

In [1]:
# Kết nối Google Colab với Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


<b>Hướng dẫn nộp bài:</b> Các bạn commit và push code lên github, sử dụng file txt đặt tên theo cú pháp <MSSV>.txt chứa đường link dẫn đến github của bài thực hành và nộp file txt này tên courses.

Bộ dữ liệu sử dụng: [MNIST dataset](https://git-disl.github.io/GTDLBench/datasets/mnist_datasets/) (bài 1) và [VinaFood21 dataset](https://arxiv.org/abs/2108.02929) (các bài còn lại).

Link download: https://drive.google.com/file/d/1UpZOf0XlwvB4rKpyZ35iwTA8oWHqDBbR/view?usp=share_link.

### Bài 1: Xây dựng mô hình LeNet. Huấn luyện và đánh giá mô hình LeNet trên 4 độ đo precision, recall và F1-macro (sử dụng Adam làm optimizer).

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np
from tqdm import tqdm
import os

In [None]:
def load_mnist(images_path, labels_path):
    """Hàm đọc file .ubyte của MNIST."""
    with open(labels_path, 'rb') as lbpath:
        lbpath.read(8)  # Bỏ qua magic number và số lượng items
        labels = np.fromfile(lbpath, dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        imgpath.read(16)  # Bỏ qua magic number, số lượng ảnh, số hàng, số cột
        # Đọc dữ liệu ảnh và reshape thành (số lượng ảnh, 784)
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

class MNISTDataset(Dataset):
    """Custom Dataset cho MNIST."""
    def __init__(self, images_path, labels_path, transform=None):
        self.images, self.labels = load_mnist(images_path, labels_path)
        self.transform = transform

        # Chuyển đổi sang tensor và chuẩn hóa ảnh
        self.images = torch.tensor(self.images, dtype=torch.float32) / 255.0
        self.labels = torch.tensor(self.labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # Reshape ảnh thành (1, 28, 28) cho mạng CNN
        image = self.images[idx].view(1, 28, 28)
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

def get_mnist_loaders(batch_size=64):
    """Hàm tạo DataLoader từ các file local."""
    base_path = '/content/drive/MyDrive/Colab_Notebooks/DS201/LAB_2'
    train_images_path = os.path.join(base_path, 'train-images.idx3-ubyte')
    train_labels_path = os.path.join(base_path, 'train-labels.idx1-ubyte')
    test_images_path = os.path.join(base_path, 't10k-images.idx3-ubyte')
    test_labels_path = os.path.join(base_path, 't10k-labels.idx1-ubyte')

    # Kiểm tra sự tồn tại của file
    for path in [train_images_path, train_labels_path, test_images_path, test_labels_path]:
        if not os.path.exists(path):
            raise FileNotFoundError(f"Không tìm thấy file: {path}. Vui lòng kiểm tra lại cấu trúc thư mục.")

    train_dataset = MNISTDataset(images_path=train_images_path, labels_path=train_labels_path)
    test_dataset = MNISTDataset(images_path=test_images_path, labels_path=test_labels_path)

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

    return train_loader, test_loader

# Lấy loaders
train_loader, test_loader = get_mnist_loaders(batch_size=64)
print("Tạo DataLoader từ file local thành công!")


# Kiểm tra thiết bị (sử dụng GPU nếu có)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Sử dụng thiết bị: {device}")

Tạo DataLoader từ file local thành công!
Sử dụng thiết bị: cpu


In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.sigmoid = nn.Sigmoid()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool1(self.sigmoid(self.conv1(x)))
        x = self.pool2(self.sigmoid(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = self.sigmoid(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

model = LeNet().to(device)
print(model)

LeNet(
  (sigmoid): Sigmoid()
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

In [None]:
model.train()
for epoch in range(num_epochs):
    loop = tqdm(train_loader, total=len(train_loader), leave=True)
    for images, labels in loop:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item())

print("Hoàn thành quá trình huấn luyện!")

Epoch [1/10]: 100%|██████████| 938/938 [00:16<00:00, 55.18it/s, loss=0.193]
Epoch [2/10]: 100%|██████████| 938/938 [00:21<00:00, 44.06it/s, loss=0.0912]
Epoch [3/10]: 100%|██████████| 938/938 [00:17<00:00, 53.25it/s, loss=0.243]
Epoch [4/10]: 100%|██████████| 938/938 [00:17<00:00, 53.32it/s, loss=0.0631]
Epoch [5/10]: 100%|██████████| 938/938 [00:17<00:00, 54.61it/s, loss=0.0957]
Epoch [6/10]: 100%|██████████| 938/938 [00:17<00:00, 53.94it/s, loss=0.00847]
Epoch [7/10]: 100%|██████████| 938/938 [00:16<00:00, 55.19it/s, loss=0.0294]
Epoch [8/10]: 100%|██████████| 938/938 [00:16<00:00, 57.30it/s, loss=0.121]
Epoch [9/10]: 100%|██████████| 938/938 [00:17<00:00, 54.38it/s, loss=0.0608]
Epoch [10/10]: 100%|██████████| 938/938 [00:18<00:00, 51.73it/s, loss=0.0422]

Hoàn thành quá trình huấn luyện!





In [None]:
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

all_labels = np.array(all_labels)
all_preds = np.array(all_preds)

accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

print("\n--- Kết quả đánh giá trên tập test ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (macro): {precision:.4f}")
print(f"Recall (macro): {recall:.4f}")
print(f"F1-score (macro): {f1:.4f}")


--- Kết quả đánh giá trên tập test ---
Accuracy: 0.9831
Precision (macro): 0.9832
Recall (macro): 0.9830
F1-score (macro): 0.9830


### Bài 2: Xây dựng mô hình GoogLeNet. Huấn luyện và đánh giá mô hình GoogLeNet trên 4 độ đo precision, recall và F1 (sử dụng Adam làm optimizer). Lưu ý lớp Convolution đầu tiên có padding là 3, các lớp Max Pooling đều bật chế độ ceil_mode (`ceil_mode=True`).

**Inception Blocks**

**GoogLeNet (Multi-branch Network)**


**GoogLeNet Parameters**


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np
from tqdm import tqdm
import zipfile
import os
from PIL import Image

# Kiểm tra thiết bị
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Sử dụng thiết bị: {device}")

Sử dụng thiết bị: cuda


In [7]:
class VinaFoodDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.class_to_idx = {}

        class_names = sorted(os.listdir(root_dir))
        for i, class_name in enumerate(class_names):
            class_path = os.path.join(root_dir, class_name)
            if os.path.isdir(class_path):
                self.class_to_idx[class_name] = i
                for img_name in os.listdir(class_path):
                    if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                        self.image_paths.append(os.path.join(class_path, img_name))
                        self.labels.append(i)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

In [9]:
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    # transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [10]:
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
zip_path = '/content/drive/MyDrive/Colab_Notebooks/DS201/LAB_2/VinaFood21.zip'
extract_path = '/content/drive/MyDrive/Colab_Notebooks/DS201/LAB_2/'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

base_data_dir = '/content/drive/MyDrive/Colab_Notebooks/DS201/LAB_2/VinaFood21'

train_dir = os.path.join(base_data_dir, 'train')
test_dir = os.path.join(base_data_dir, 'test')

train_dataset = VinaFoodDataset(root_dir=train_dir, transform=train_transforms)
test_dataset = VinaFoodDataset(root_dir=test_dir, transform=test_transforms)

In [None]:
num_classes = len(train_dataset.class_to_idx)
print(f"Đã tìm thấy {num_classes} lớp.")
print(f"Số lượng ảnh train: {len(train_dataset)}")
print(f"Số lượng ảnh test: {len(test_dataset)}")

Đã tìm thấy 21 lớp.
Số lượng ảnh train: 10044
Số lượng ảnh test: 6682


In [None]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
print("\nTạo DataLoader từ cấu trúc train/test thành công!")


Tạo DataLoader từ cấu trúc train/test thành công!


In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(ConvBlock, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        return self.relu(self.batchnorm(self.conv(x)))

class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3_red, ch3x3, ch5x5_red, ch5x5, pool_proj):
        super(Inception, self).__init__()
        self.branch1 = ConvBlock(in_channels, ch1x1, kernel_size=1)
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, ch3x3_red, kernel_size=1),
            ConvBlock(ch3x3_red, ch3x3, kernel_size=3, padding=1)
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, ch5x5_red, kernel_size=1),
            ConvBlock(ch5x5_red, ch5x5, kernel_size=5, padding=2)
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBlock(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        outputs = [self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)]
        return torch.cat(outputs, 1)

In [None]:
class GoogLeNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=21):
        super(GoogLeNet, self).__init__()
        self.conv1 = ConvBlock(in_channels, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
        self.conv2 = ConvBlock(64, 64, kernel_size=1)
        self.conv3 = ConvBlock(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.4)
        self.fc1 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.conv1(x); x = self.maxpool1(x); x = self.conv2(x); x = self.conv3(x); x = self.maxpool2(x)
        x = self.inception3a(x); x = self.inception3b(x); x = self.maxpool3(x)
        x = self.inception4a(x); x = self.inception4b(x); x = self.inception4c(x); x = self.inception4d(x); x = self.inception4e(x); x = self.maxpool4(x)
        x = self.inception5a(x); x = self.inception5b(x)
        x = self.avgpool(x); x = x.reshape(x.shape[0], -1); x = self.dropout(x); x = self.fc1(x)
        return x

model = GoogLeNet(num_classes=num_classes).to(device)
print("Kiến trúc GoogLeNet đã được tạo!")

Kiến trúc GoogLeNet đã được tạo!


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

for epoch in range(num_epochs):
    # --- Training ---
    model.train()
    running_loss = 0.0
    train_loop = tqdm(train_loader, total=len(train_loader), leave=True)
    for images, labels in train_loop:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        train_loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        train_loop.set_postfix(loss=loss.item())

    # --- Evaluation on Test Set ---
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        # Đánh giá trên test_loader
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

    print(f"\n--- Epoch {epoch+1} Test Results ---")
    print(f"Train Loss: {running_loss/len(train_loader):.4f}")
    print(f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1-score: {f1:.4f}\n")

print("Hoàn thành quá trình huấn luyện!")

Epoch [1/10]: 100%|██████████| 314/314 [02:59<00:00,  1.75it/s, loss=3]



--- Epoch 1 Test Results ---
Train Loss: 2.8033
Accuracy: 0.1809 | Precision: 0.1500 | Recall: 0.1679 | F1-score: 0.1029



Epoch [2/10]: 100%|██████████| 314/314 [02:56<00:00,  1.78it/s, loss=2.55]



--- Epoch 2 Test Results ---
Train Loss: 2.5487
Accuracy: 0.2472 | Precision: 0.1941 | Recall: 0.2263 | F1-score: 0.1777



Epoch [3/10]: 100%|██████████| 314/314 [02:52<00:00,  1.82it/s, loss=2.13]



--- Epoch 3 Test Results ---
Train Loss: 2.3773
Accuracy: 0.2463 | Precision: 0.2089 | Recall: 0.2387 | F1-score: 0.1815



Epoch [4/10]: 100%|██████████| 314/314 [02:54<00:00,  1.80it/s, loss=2.55]



--- Epoch 4 Test Results ---
Train Loss: 2.1969
Accuracy: 0.3155 | Precision: 0.3042 | Recall: 0.2795 | F1-score: 0.2499



Epoch [5/10]: 100%|██████████| 314/314 [02:54<00:00,  1.80it/s, loss=2.4]



--- Epoch 5 Test Results ---
Train Loss: 2.0744
Accuracy: 0.3505 | Precision: 0.3474 | Recall: 0.3115 | F1-score: 0.2973



Epoch [6/10]: 100%|██████████| 314/314 [02:52<00:00,  1.82it/s, loss=1.99]



--- Epoch 6 Test Results ---
Train Loss: 1.9483
Accuracy: 0.3463 | Precision: 0.3272 | Recall: 0.3177 | F1-score: 0.3070



Epoch [7/10]: 100%|██████████| 314/314 [02:51<00:00,  1.83it/s, loss=1.84]



--- Epoch 7 Test Results ---
Train Loss: 1.8270
Accuracy: 0.3466 | Precision: 0.3793 | Recall: 0.3310 | F1-score: 0.3068



Epoch [8/10]: 100%|██████████| 314/314 [02:50<00:00,  1.84it/s, loss=2.05]



--- Epoch 8 Test Results ---
Train Loss: 1.7250
Accuracy: 0.3894 | Precision: 0.4154 | Recall: 0.3664 | F1-score: 0.3482



Epoch [9/10]: 100%|██████████| 314/314 [02:49<00:00,  1.85it/s, loss=1.55]



--- Epoch 9 Test Results ---
Train Loss: 1.6194
Accuracy: 0.4056 | Precision: 0.4071 | Recall: 0.3960 | F1-score: 0.3782



Epoch [10/10]: 100%|██████████| 314/314 [02:49<00:00,  1.85it/s, loss=1.18]



--- Epoch 10 Test Results ---
Train Loss: 1.4979
Accuracy: 0.4045 | Precision: 0.4365 | Recall: 0.3861 | F1-score: 0.3609

Hoàn thành quá trình huấn luyện!


### Bài 3: Xây dựng mô hình ResNet-18, đánh giá mô hình ResNet-18 trên bộ dữ liệu VinaFood21 sử dụng các độ đo precision, recall, và F1 (Sử dụng Adam làm optimizer). Lưu ý, giữa các block Residual-Connection có một lớp Max Pooling (kernel = 3, stride = 2, paddding = 0)

**ResNet Block**

![image.png](attachment:image.png)

**ResNet**

![image-2.png](attachment:image-2.png)

**ResNet-18 Parameter**
![image-4.png](attachment:image-4.png)

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()

        # Luồng chính (main path)
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        # Luồng phụ (shortcut path) để cộng trực tiếp
        self.shortcut = nn.Sequential()
        # Nếu số kênh đầu vào và đầu ra khác nhau, ta cần một lớp 1x1 Conv
        # để biến đổi shortcut cho phù hợp về kích thước.
        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        # Lưu lại đầu vào cho shortcut connection
        shortcut_out = self.shortcut(x)

        # Cho dữ liệu đi qua luồng chính
        main_out = self.relu(self.bn1(self.conv1(x)))
        main_out = self.bn2(self.conv2(main_out))

        # Cộng luồng chính và luồng phụ, sau đó qua ReLU
        out = self.relu(main_out + shortcut_out)
        return out

In [None]:
class ResNet18(nn.Module):
    def __init__(self, block, num_blocks, num_classes=21):
        super(ResNet18, self).__init__()
        self.in_channels = 64

        # 1. Lớp đầu vào (Stem)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool_stem = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # Maxpool chuẩn

        # 2. Các khối Residual
        # Dựa trên bảng tham số, ResNet-18 có cấu hình [2, 2, 2, 2]
        self.layer1 = self._make_layer(block, 64, num_blocks[0])
        self.layer2 = self._make_layer(block, 128, num_blocks[1])
        self.layer3 = self._make_layer(block, 256, num_blocks[2])
        self.layer4 = self._make_layer(block, 512, num_blocks[3])

        # Lớp Max Pooling đặc biệt theo yêu cầu (k=3, s=2, p=0)
        self.custom_maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)

        # 3. Lớp cuối (Classifier)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks):
        layers = []
        # Block đầu tiên của mỗi layer có thể thay đổi số kênh
        layers.append(block(self.in_channels, out_channels))
        self.in_channels = out_channels
        # Các block còn lại giữ nguyên số kênh
        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        # Stem
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool_stem(x)

        # Các khối Residual và lớp Max Pooling đặc biệt
        x = self.layer1(x)
        x = self.custom_maxpool(x)

        x = self.layer2(x)
        x = self.custom_maxpool(x)

        x = self.layer3(x)
        x = self.custom_maxpool(x)

        x = self.layer4(x)

        # Classifier
        x = self.avgpool(x)
        x = torch.flatten(x, 1) # Flatten
        x = self.fc(x)

        return x

# Khởi tạo model ResNet-18
model = ResNet18(ResidualBlock, [2, 2, 2, 2], num_classes=num_classes).to(device)
print("Kiến trúc ResNet-18 đã được tạo!")
print(model)

Kiến trúc ResNet-18 đã được tạo!
ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool_stem): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.

In [None]:
# --- Thiết lập quá trình huấn luyện ---
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

# --- Vòng lặp huấn luyện và đánh giá ---
for epoch in range(num_epochs):
    # --- Training ---
    model.train()
    running_loss = 0.0
    train_loop = tqdm(train_loader, total=len(train_loader), leave=True)
    for images, labels in train_loop:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        train_loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        train_loop.set_postfix(loss=loss.item())

    # --- Evaluation on Test Set ---
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

    print(f"\n--- Epoch {epoch+1} Test Results ---")
    print(f"Train Loss: {running_loss/len(train_loader):.4f}")
    print(f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1-score: {f1:.4f}\n")

print("Hoàn thành quá trình huấn luyện!")

Epoch [1/10]: 100%|██████████| 314/314 [02:45<00:00,  1.89it/s, loss=2.36]



--- Epoch 1 Test Results ---
Train Loss: 2.5134
Accuracy: 0.2749 | Precision: 0.2657 | Recall: 0.2659 | F1-score: 0.2272



Epoch [2/10]: 100%|██████████| 314/314 [02:43<00:00,  1.91it/s, loss=2.23]



--- Epoch 2 Test Results ---
Train Loss: 2.1158
Accuracy: 0.2622 | Precision: 0.3572 | Recall: 0.2687 | F1-score: 0.2318



Epoch [3/10]: 100%|██████████| 314/314 [02:44<00:00,  1.91it/s, loss=1.81]



--- Epoch 3 Test Results ---
Train Loss: 1.8570
Accuracy: 0.3797 | Precision: 0.4191 | Recall: 0.3565 | F1-score: 0.3393



Epoch [4/10]: 100%|██████████| 314/314 [03:35<00:00,  1.46it/s, loss=1.65]



--- Epoch 4 Test Results ---
Train Loss: 1.6723
Accuracy: 0.4033 | Precision: 0.4655 | Recall: 0.4007 | F1-score: 0.3909



Epoch [5/10]: 100%|██████████| 314/314 [02:54<00:00,  1.80it/s, loss=1.49]



--- Epoch 5 Test Results ---
Train Loss: 1.5086
Accuracy: 0.4024 | Precision: 0.4770 | Recall: 0.4076 | F1-score: 0.4032



Epoch [6/10]: 100%|██████████| 314/314 [02:51<00:00,  1.83it/s, loss=0.974]



--- Epoch 6 Test Results ---
Train Loss: 1.3560
Accuracy: 0.4886 | Precision: 0.4924 | Recall: 0.4871 | F1-score: 0.4715



Epoch [7/10]: 100%|██████████| 314/314 [02:46<00:00,  1.88it/s, loss=0.886]



--- Epoch 7 Test Results ---
Train Loss: 1.2389
Accuracy: 0.4954 | Precision: 0.5309 | Recall: 0.4911 | F1-score: 0.4758



Epoch [8/10]: 100%|██████████| 314/314 [02:46<00:00,  1.88it/s, loss=1.25]



--- Epoch 8 Test Results ---
Train Loss: 1.0961
Accuracy: 0.4620 | Precision: 0.5280 | Recall: 0.4465 | F1-score: 0.4427



Epoch [9/10]: 100%|██████████| 314/314 [02:47<00:00,  1.87it/s, loss=1.1]



--- Epoch 9 Test Results ---
Train Loss: 0.9871
Accuracy: 0.5262 | Precision: 0.5756 | Recall: 0.4961 | F1-score: 0.4957



Epoch [10/10]: 100%|██████████| 314/314 [02:47<00:00,  1.87it/s, loss=0.739]



--- Epoch 10 Test Results ---
Train Loss: 0.8576
Accuracy: 0.5109 | Precision: 0.5849 | Recall: 0.5286 | F1-score: 0.5104

Hoàn thành quá trình huấn luyện!


### Bài 4: Sử dụng pretrained ResNet50 từ HuggingFace để fine-tune trên bộ dữ liệu VinaFood21.

In [2]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from tqdm import tqdm
from transformers import ResNetForImageClassification

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Đang sử dụng thiết bị: {device}")

Đang sử dụng thiết bị: cuda


In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])


In [None]:
base_data_dir = '/content/drive/MyDrive/VinaFood21'
train_dir = os.path.join(base_data_dir, 'train')
test_dir = os.path.join(base_data_dir, 'test')

train_dataset = VinaFoodDataset(root_dir=train_dir, transform=train_transforms)
test_dataset = VinaFoodDataset(root_dir=test_dir, transform=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Số ảnh train: {len(train_dataset)} | test: {len(test_dataset)}")

Số ảnh train: 10044 | test: 6683


In [12]:
class PretrainedResnet(nn.Module):
    def __init__(self, num_classes=21):
        super().__init__()
        basemodel = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
        self.resnet = basemodel.resnet
        self.classifier = nn.Linear(in_features=2048, out_features=num_classes, bias=True)

    def forward(self, images: torch.Tensor):
        features = self.resnet(images).pooler_output
        features = features.squeeze(-1).squeeze(-1)
        logits = self.classifier(features)
        return logits


In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PretrainedResnet(num_classes=21).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

EPOCHS = 10

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    correct, total = 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Train"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
    print(f"Train Loss: {train_loss/len(train_loader):.4f} | Train Acc: {acc:.4f}")

    # Đánh giá trên tập test
    model.eval()
    test_loss = 0.0
    correct, total = 0, 0

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Test"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    test_acc = correct / total
    print(f"Test Loss: {test_loss/len(test_loader):.4f} | Test Acc: {test_acc:.4f}")

print("Fine-tuning hoàn tất!")

Epoch 1/10 - Train: 100%|██████████| 314/314 [37:07<00:00,  7.09s/it]


Train Loss: 2.6690 | Train Acc: 0.2209


Epoch 1/10 - Test: 100%|██████████| 209/209 [26:10<00:00,  7.51s/it]


Test Loss: 2.0756 | Test Acc: 0.4666


Epoch 2/10 - Train: 100%|██████████| 314/314 [03:14<00:00,  1.61it/s]


Train Loss: 1.3343 | Train Acc: 0.6557


Epoch 2/10 - Test: 100%|██████████| 209/209 [01:57<00:00,  1.78it/s]


Test Loss: 0.9644 | Test Acc: 0.7196


Epoch 3/10 - Train: 100%|██████████| 314/314 [03:15<00:00,  1.60it/s]


Train Loss: 0.6759 | Train Acc: 0.8053


Epoch 3/10 - Test: 100%|██████████| 209/209 [01:54<00:00,  1.82it/s]


Test Loss: 0.7375 | Test Acc: 0.7717


Epoch 4/10 - Train: 100%|██████████| 314/314 [03:12<00:00,  1.63it/s]


Train Loss: 0.4194 | Train Acc: 0.8779


Epoch 4/10 - Test: 100%|██████████| 209/209 [01:54<00:00,  1.82it/s]


Test Loss: 0.6424 | Test Acc: 0.8041


Epoch 5/10 - Train: 100%|██████████| 314/314 [03:10<00:00,  1.65it/s]


Train Loss: 0.2703 | Train Acc: 0.9241


Epoch 5/10 - Test: 100%|██████████| 209/209 [01:54<00:00,  1.83it/s]


Test Loss: 0.5930 | Test Acc: 0.8157


Epoch 6/10 - Train: 100%|██████████| 314/314 [03:12<00:00,  1.63it/s]


Train Loss: 0.1711 | Train Acc: 0.9549


Epoch 6/10 - Test: 100%|██████████| 209/209 [01:55<00:00,  1.82it/s]


Test Loss: 0.5550 | Test Acc: 0.8314


Epoch 7/10 - Train: 100%|██████████| 314/314 [03:11<00:00,  1.64it/s]


Train Loss: 0.1060 | Train Acc: 0.9745


Epoch 7/10 - Test: 100%|██████████| 209/209 [01:54<00:00,  1.82it/s]


Test Loss: 0.5662 | Test Acc: 0.8330


Epoch 8/10 - Train: 100%|██████████| 314/314 [03:11<00:00,  1.64it/s]


Train Loss: 0.0758 | Train Acc: 0.9846


Epoch 8/10 - Test: 100%|██████████| 209/209 [01:51<00:00,  1.87it/s]


Test Loss: 0.5651 | Test Acc: 0.8332


Epoch 9/10 - Train: 100%|██████████| 314/314 [03:06<00:00,  1.68it/s]


Train Loss: 0.0514 | Train Acc: 0.9898


Epoch 9/10 - Test: 100%|██████████| 209/209 [01:50<00:00,  1.90it/s]


Test Loss: 0.5834 | Test Acc: 0.8367


Epoch 10/10 - Train: 100%|██████████| 314/314 [03:07<00:00,  1.67it/s]


Train Loss: 0.0425 | Train Acc: 0.9913


Epoch 10/10 - Test: 100%|██████████| 209/209 [01:50<00:00,  1.90it/s]

Test Loss: 0.5929 | Test Acc: 0.8348
Fine-tuning hoàn tất!



