In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from tqdm import tqdm

In [None]:
class VinaFoodDataLoader:
    def __init__(self, batch_size=64, num_workers=2, data_dir='/kaggle/input/vinafood21/VinaFood21', input_image_size=224):
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.data_dir = data_dir
        self.input_image_size = input_image_size

        self.transform = transforms.Compose([
            # 1. Resize image into 224 x 224
            transforms.Resize((input_image_size, input_image_size)),

            # 2. Convert into PyTorch Tensor
            transforms.ToTensor(),

            # 3. Normalize
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def get_train_loader(self):
        train_path = os.path.join(self.data_dir, 'train')
        train_dataset = datasets.ImageFolder(root=train_path, transform=self.transform)
        train_loader = DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
        print(f"Loaded {len(train_dataset)} training samples from {train_path}. Found {len(train_dataset.classes)} classes")
        return train_loader

        # Mỗi thư mục con tương ứng với 1 nhãn (class) -> gán label cho từng ảnh dựa theo thứ tự thư mục con được sắp xếp alphabetically

    def get_test_loader(self):
        test_path = os.path.join(self.data_dir, 'test')
        test_dataset = datasets.ImageFolder(root=test_path, transform=self.transform)
        test_loader = DataLoader(dataset=test_dataset, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False)
        print(f"Loaded {len(test_dataset)} test samples from {test_path}")
        return test_loader


In [None]:
class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, ch1x1_after_pooling):
        """
        Args:
            in_channels (int): Number of input channels.

            ch1x1 (int): Number of output channels for the 1x1 convolution branch.

            ch3x3red (int): Number of output channels for the 1x1 convolution (reduction) before the 3x3 convolution.

            ch3x3 (int): Number of output channels for the 3x3 convolution branch.

            ch5x5red (int): Number of output channels for the 1x1 convolution (reduction) before the 5x5 convolution.

            ch5x5 (int): Number of output channels for the 5x5 convolution branch.

            ch1x1_after_pooling (int): Number of output channels for the 1x1 convolution after the pooling operation.
        """
        super(Inception, self).__init__()
        # Branch 1: 1x1 Conv
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=ch1x1, kernel_size=1),
            nn.ReLU(True) # set 0 for value < 0 (inplace)
        )

        # Branch 2: 1x1 Conv -> 3x3 Conv, pad 1
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=ch3x3red, kernel_size=1),
            nn.ReLU(True),
            nn.Conv2d(in_channels=ch3x3red, out_channels=ch3x3, kernel_size=3, padding=1),
            nn.ReLU(True)
        )

        # Brach 3: 1x1 Conv -> 5x5 Conv, pad 2
        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=ch5x5red, kernel_size=1),
            nn.ReLU(True),
            nn.Conv2d(in_channels=in_channels, out_channels=ch5x5, kernel_size=5, padding=2),
            nn.ReLU(True)
        )

        # Brach 4: 3x3MaxPool, pad 1 -> 1x1 Conv
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            # có padding=1 -> giữ nguyên kích thước
            nn.Conv2d(in_channels=in_channels, out_channels=ch1x1_after_pooling, kernel_size=1),
            nn.ReLU(True)

        )

    def forward(self, x):
        """
        Define the forward pass just for Inception block
        Return:
            torch.Tensor: The concatenated ouput tensor from all branches
        """
        branch_1 = self.branch1(x)
        branch_2 = self.branch2(x)
        branch_3 = self.branch3(x)
        branch_4 = self.branch4(x)

        return torch.cat([branch_1, branch_2, branch_3, branch_4], 1)

class GoogleNet(nn.Module):
    def __init__(self, num_clases):
        super(GoogleNet, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3)
        # in_channels cuar conv1 = 3 vì ảnh màu -> RGB

        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1),
            nn.ReLU(True),
            nn.Conv2d(in_channels=64,out_channels=192, kernel_size=3, padding=1),
            # Add padding to maintain kernel size = 56
            nn.ReLU(True)
        )

        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)

        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)

        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)

        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)

        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)

        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)

        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)

        self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)

        # Global Average Pooling
        # Input (N, 1024, 7, 7) -> Output (N, 1024, 1, 1) -> Flatten to (N, 1024)
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))

        self.dropout = nn.Dropout(0.4)

        self.fc = nn.Linear(in_features=1024, out_features=num_clases)

    def forward(self, x):

        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)

        # Inception 3a, 3b
        x = self.inception3a(x)
        x = self.inception3b(x)

        x = self.pool3(x)

        # Inception 4
        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)

        x = self.pool4(x)

        # Inception 5
        x = self.inception5a(x)
        x = self.inception5b(x)

        x = self.avgpool(x)
        # Flatten the output from (N, 1024, 1, 1) to (N, 1024)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        logits = self.fc(x)

        probabilities = F.softmax(logits, dim=1)
        return logits, probabilities


In [None]:
def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for data, target in tqdm(data_loader, desc="Evaluating"):
            