In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models

In [2]:
DATA_DIR = "/kaggle/input/face-recognition-mini-project"
CATEGORIES = [  "Brad Pitt",
                "Charles Leclerc",
                "Conor McGregor",
                "David Beckham",
                "Erling Haaland",
                "Faker",
                "General Vo Nguyen Giap",
                "Huy",
                "J97",
                "Jeff Bezo",
                "Jeffray",
                "Joji",
                "Khabib",
                "Leonardo DiCaprio",
                "Levi",
                "Messi",
                "Mixigaming",
                "Park Hang-seo",
                "Robert Downey Junior",
                "Ronaldo",
                "SonTungMTP",
                "Taylor Swift",
                "Tobey Maguire",
                "Tom Hanks",
                "Tom_Cruise",
                "Will Smith",
                "Zhao Lusi",
                "antony",
                "thayongnoi"]
IMG_SIZE = 300
BATCH_SIZE = 32
SEED = 42

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [3]:
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),  # đưa ảnh về [0, 1]
])

In [4]:
class FaceRecognitionMiniProject(Dataset):
    def __init__(self, data_dir, categories, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform

        for label, category in enumerate(categories):
            class_path = os.path.join(data_dir, category)
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                self.image_paths.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
            
        return image, label

full_dataset = FaceRecognitionMiniProject(
    data_dir=DATA_DIR,
    categories=CATEGORIES,
    transform=transform
)

print(f"Loaded {len(full_dataset)} images.")


total_size = len(full_dataset)
train_size = int(0.7 * total_size)
val_size   = int(0.3 * total_size)
test_size  = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(
    full_dataset,
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(SEED)
)

print(f"Train: {len(train_dataset)}")
print(f"Validation: {len(val_dataset)}")
print(f"Test: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


images, labels = next(iter(train_loader))
print("Image batch shape:", images.shape)   # [B, 3, 224, 224]
print("Label batch shape:", labels.shape)   # [B]
print("Sample labels:", labels[:5])

num_classes = len(CATEGORIES)
print("Number of classes:", num_classes)


Loaded 600 images.
Train: 420
Validation: 180
Test: 0
Image batch shape: torch.Size([32, 3, 300, 300])
Label batch shape: torch.Size([32])
Sample labels: tensor([ 6, 26,  2, 13,  9])
Number of classes: 29


In [5]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.RandomResizedCrop(227, scale=(0.9, 1.0)),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [6]:
def get_model():
    weights = models.MobileNet_V2_Weights.IMAGENET1K_V2
    model = models.mobilenet_v2(weights=weights)
    for param in model.parameters():
        param.requires_grad = False
    model.classifier = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1280, 256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256, num_classes),
    nn.Sigmoid())
    loss_fn = loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr= 1e-3)
    return model.to(device), loss_fn, optimizer

In [7]:
!pip install torch_summary
from torchsummary import summary
model, criterion, optimizer = get_model()
summary(model, torch.zeros(1,3,224,224))

Collecting torch_summary
  Downloading torch_summary-1.4.5-py3-none-any.whl.metadata (18 kB)
Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch_summary
Successfully installed torch_summary-1.4.5
Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 192MB/s]


Layer (type:depth-idx)                             Output Shape              Param #
├─Sequential: 1-1                                  [-1, 1280, 7, 7]          --
|    └─Conv2dNormActivation: 2-1                   [-1, 32, 112, 112]        --
|    |    └─Conv2d: 3-1                            [-1, 32, 112, 112]        (864)
|    |    └─BatchNorm2d: 3-2                       [-1, 32, 112, 112]        (64)
|    |    └─ReLU6: 3-3                             [-1, 32, 112, 112]        --
|    └─InvertedResidual: 2-2                       [-1, 16, 112, 112]        --
|    |    └─Sequential: 3-4                        [-1, 16, 112, 112]        (896)
|    └─InvertedResidual: 2-3                       [-1, 24, 56, 56]          --
|    |    └─Sequential: 3-5                        [-1, 24, 56, 56]          (5,136)
|    └─InvertedResidual: 2-4                       [-1, 24, 56, 56]          --
|    |    └─Sequential: 3-6                        [-1, 24, 56, 56]          (8,832)
|    └─InvertedRe

Layer (type:depth-idx)                             Output Shape              Param #
├─Sequential: 1-1                                  [-1, 1280, 7, 7]          --
|    └─Conv2dNormActivation: 2-1                   [-1, 32, 112, 112]        --
|    |    └─Conv2d: 3-1                            [-1, 32, 112, 112]        (864)
|    |    └─BatchNorm2d: 3-2                       [-1, 32, 112, 112]        (64)
|    |    └─ReLU6: 3-3                             [-1, 32, 112, 112]        --
|    └─InvertedResidual: 2-2                       [-1, 16, 112, 112]        --
|    |    └─Sequential: 3-4                        [-1, 16, 112, 112]        (896)
|    └─InvertedResidual: 2-3                       [-1, 24, 56, 56]          --
|    |    └─Sequential: 3-5                        [-1, 24, 56, 56]          (5,136)
|    └─InvertedResidual: 2-4                       [-1, 24, 56, 56]          --
|    |    └─Sequential: 3-6                        [-1, 24, 56, 56]          (8,832)
|    └─InvertedRe

In [None]:
num_epochs = 30

train_acc_list = []
val_acc_list = []
best_val_acc = 0.0
best_model_path = 'best_inception_model.pth'


for epoch in range(num_epochs):
    # ===== TRAIN =====
    model.train()
    correct, total = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    train_acc_list.append(train_acc)

    # ===== VALIDATION =====
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total
    val_acc_list.append(val_acc)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model_weights.pth')
        print(f"✓ Đã lưu model tốt nhất với Val Acc: {val_acc:.2f}%")

Epoch [1/30] Train Acc: 9.76% | Val Acc: 6.11%
✓ Đã lưu model tốt nhất với Val Acc: 6.11%
Epoch [2/30] Train Acc: 11.67% | Val Acc: 12.22%
✓ Đã lưu model tốt nhất với Val Acc: 12.22%
Epoch [3/30] Train Acc: 34.05% | Val Acc: 38.89%
✓ Đã lưu model tốt nhất với Val Acc: 38.89%
Epoch [4/30] Train Acc: 52.62% | Val Acc: 42.78%
✓ Đã lưu model tốt nhất với Val Acc: 42.78%
Epoch [5/30] Train Acc: 56.90% | Val Acc: 47.78%
✓ Đã lưu model tốt nhất với Val Acc: 47.78%
Epoch [6/30] Train Acc: 70.48% | Val Acc: 47.22%
Epoch [7/30] Train Acc: 66.90% | Val Acc: 45.56%
Epoch [8/30] Train Acc: 78.10% | Val Acc: 55.56%
✓ Đã lưu model tốt nhất với Val Acc: 55.56%
Epoch [9/30] Train Acc: 76.67% | Val Acc: 51.11%
Epoch [10/30] Train Acc: 81.67% | Val Acc: 56.11%
✓ Đã lưu model tốt nhất với Val Acc: 56.11%
Epoch [11/30] Train Acc: 87.38% | Val Acc: 59.44%
✓ Đã lưu model tốt nhất với Val Acc: 59.44%
Epoch [12/30] Train Acc: 88.57% | Val Acc: 56.67%
Epoch [13/30] Train Acc: 84.76% | Val Acc: 57.78%
Epoch [14/

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(train_acc_list, label="Train Accuracy")
plt.plot(val_acc_list, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.title("Training Accuracy")
plt.legend()
plt.grid(True)
plt.show()
