# Sys check

In [1]:
import os 
root = os.getcwd()
root 

'd:\\IT\\GITHUB\\TakeHomeTest'

In [2]:
!nvidia-smi

Fri Apr 11 20:21:10 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650 Ti   WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   51C    P8              5W /   50W |     613MiB /   4096MiB |     23%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
import torch 
print(torch.__version__)
print(torch.cuda.is_available())

2.6.0+cu118
True


In [5]:
def training(model, optimizer, criterion, train_loader, val_loader, num_epochs=10, learning_rate=0.001, device='cuda'):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {running_loss / len(train_loader):.4f}")
        
        # Evaluate on validation set
        print("Validation performance:")
        evaluate(model, val_loader)

# Single-Image

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report

import os
import matplotlib.pyplot as plt

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 2
batch_size = 32
num_epochs = 10
learning_rate = 1e-4

In [8]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean/std
                         std=[0.229, 0.224, 0.225])
])

In [9]:
# Tải dữ liệu
data_dir = "dataset"
train_set = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)

val_set = datasets.ImageFolder(os.path.join(data_dir, "dev"), transform=transform)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

In [10]:
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

# Thay thế lớp fully-connected cuối cùng
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, num_classes)
)

model = model.to(device)



In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate)

In [12]:
# Hàm đánh giá
def evaluate(model, dataloader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    report = classification_report(y_true, y_pred, target_names=["normal", "spoof"])
    print(report)

In [14]:
# Huấn luyện
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {running_loss / len(train_loader):.4f}")
    
    # Evaluate on validation set
    print("Validation performance:")
    evaluate(model, val_loader)

[Epoch 1/10] Loss: 0.4960
Validation performance:
              precision    recall  f1-score   support

      normal       0.83      0.80      0.81       602
       spoof       0.81      0.83      0.82       602

    accuracy                           0.82      1204
   macro avg       0.82      0.82      0.82      1204
weighted avg       0.82      0.82      0.82      1204

[Epoch 2/10] Loss: 0.4300
Validation performance:
              precision    recall  f1-score   support

      normal       0.90      0.66      0.76       602
       spoof       0.73      0.93      0.82       602

    accuracy                           0.79      1204
   macro avg       0.82      0.79      0.79      1204
weighted avg       0.82      0.79      0.79      1204

[Epoch 3/10] Loss: 0.3972
Validation performance:
              precision    recall  f1-score   support

      normal       0.78      0.89      0.83       602
       spoof       0.87      0.75      0.81       602

    accuracy                    

KeyboardInterrupt: 

In [None]:
# Lưu mô hình
# torch.save(model.state_dict(), "resnet50_liveness.pth")

In [15]:
evaluate(model, val_loader)

KeyboardInterrupt: 

# Multi-image

In [None]:
from torch.utils.data import Dataset
from PIL import Image
import glob
import os
import torch

class MultiImageLivenessDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.transform = transform

        for label_dir in ["normal", "spoof"]:
            full_path = os.path.join(root_dir, label_dir)
            persons = {}

            # Gom ảnh theo từng person_id
            for img_path in glob.glob(os.path.join(full_path, "*.jpg")):
                filename = os.path.basename(img_path)
                person_id = filename.split("_")[0]
                if person_id not in persons:
                    persons[person_id] = []
                persons[person_id].append(img_path)

            for person_id, images in persons.items():
                images = sorted(images)
                if len(images) >= 4:
                    selected = images[:4]
                else:
                    # Nếu ít hơn 4 ảnh → nhân bản ảnh đầu tiên
                    selected = (images + [images[0]] * 4)[:4]
                self.samples.append((selected, 0 if label_dir == "normal" else 1))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_paths, label = self.samples[idx]
        imgs = []

        for path in img_paths:
            image = Image.open(path).convert("RGB")
            if self.transform:
                image = self.transform(image)
            imgs.append(image)

        return torch.stack(imgs), torch.tensor(label)


In [67]:
train_set = MultiImageLivenessDataset("dataset/train", transform=transform)
val_set = MultiImageLivenessDataset("dataset/dev", transform=transform)
train_loader = DataLoader(train_set, batch_size=8, shuffle=True)
val_loader = DataLoader(val_set, batch_size=8, shuffle=False)

In [68]:
# Hàm đánh giá
def evaluate(model, dataloader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    report = classification_report(y_true, y_pred, target_names=["normal", "spoof"])
    print(report)

## Resnet


In [19]:
import torch.nn as nn
import torchvision.models as models

class MultiImageResNetLSTM(nn.Module):
    def __init__(self, base_model, lstm_hidden=512, num_classes=2):
        super(MultiImageResNetLSTM, self).__init__()
        self.feature_extractor = nn.Sequential(*list(base_model.children())[:-1])  # ResNet50 bỏ FC
        self.lstm = nn.LSTM(input_size=2048, hidden_size=lstm_hidden, batch_first=True)
        self.classifier = nn.Sequential(
            nn.Linear(lstm_hidden, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        # x: [B, 4, C, H, W]
        B, N, C, H, W = x.shape
        x = x.view(B * N, C, H, W)

        features = self.feature_extractor(x)  # [B * 4, 2048, 1, 1]
        features = features.view(B, N, -1)    # [B, 4, 2048]

        _, (hn, _) = self.lstm(features)      # hn: [1, B, hidden]
        hn = hn.squeeze(0)                    # [B, hidden]

        out = self.classifier(hn)
        return out


In [20]:
resnet = models.resnet50(pretrained=True)
model = MultiImageResNetLSTM(resnet).to(device)



In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [22]:
training(model, optimizer, criterion, train_loader, val_loader, num_epochs=3, learning_rate=0.001, device='cuda')

[Epoch 1/3] Loss: 0.4829
Validation performance:
              precision    recall  f1-score   support

      normal       0.83      0.92      0.88       130
       spoof       0.91      0.82      0.86       130

    accuracy                           0.87       260
   macro avg       0.87      0.87      0.87       260
weighted avg       0.87      0.87      0.87       260

[Epoch 2/3] Loss: 0.3081
Validation performance:
              precision    recall  f1-score   support

      normal       0.76      0.95      0.85       130
       spoof       0.93      0.71      0.80       130

    accuracy                           0.83       260
   macro avg       0.85      0.83      0.82       260
weighted avg       0.85      0.83      0.82       260

[Epoch 3/3] Loss: 0.2774
Validation performance:
              precision    recall  f1-score   support

      normal       0.87      0.95      0.91       130
       spoof       0.94      0.86      0.90       130

    accuracy                       

## VIT

In [23]:
import timm
import torch
import torch.nn as nn

class ViT_LivenessClassifier(nn.Module):
    def __init__(self, vit_model_name='vit_base_patch16_224', num_classes=2):
        super(ViT_LivenessClassifier, self).__init__()
        self.vit = timm.create_model(vit_model_name, pretrained=True)
        self.vit.head = nn.Identity()  # Bỏ classification head của ViT

        self.embedding_dim = self.vit.num_features  # Thường là 768

        self.classifier = nn.Sequential(
            nn.Linear(self.embedding_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        # x: [B, 4, C, H, W]
        B, N, C, H, W = x.shape
        x = x.view(B * N, C, H, W)

        embeddings = self.vit(x)  # [B*4, D]
        embeddings = embeddings.view(B, N, -1)  # [B, 4, D]

        # Mean pooling over 4 embeddings
        pooled = embeddings.mean(dim=1)  # [B, D]
        out = self.classifier(pooled)
        return out


  from .autonotebook import tqdm as notebook_tqdm


In [24]:
vit_model = ViT_LivenessClassifier(vit_model_name='vit_base_patch16_224').to(device)

In [25]:
# loss và optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(vit_model.parameters(), lr=2e-5)

In [26]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3),  # Hoặc theo ViT
])

In [27]:
training(model, optimizer, criterion, train_loader, val_loader, num_epochs=3, learning_rate=0.001, device='cuda')

[Epoch 1/3] Loss: 0.2134
Validation performance:
              precision    recall  f1-score   support

      normal       0.93      0.91      0.92       130
       spoof       0.91      0.93      0.92       130

    accuracy                           0.92       260
   macro avg       0.92      0.92      0.92       260
weighted avg       0.92      0.92      0.92       260

[Epoch 2/3] Loss: 0.2278
Validation performance:
              precision    recall  f1-score   support

      normal       0.88      0.94      0.91       130
       spoof       0.93      0.88      0.90       130

    accuracy                           0.91       260
   macro avg       0.91      0.91      0.91       260
weighted avg       0.91      0.91      0.91       260

[Epoch 3/3] Loss: 0.2521
Validation performance:
              precision    recall  f1-score   support

      normal       0.86      0.93      0.90       130
       spoof       0.93      0.85      0.89       130

    accuracy                       

# AutoEncoder

In [70]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleAutoEncoder(nn.Module):
    def __init__(self):
        super(SimpleAutoEncoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),  # [B, 32, 112, 112]
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),  # [B, 64, 56, 56]
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),  # [B, 128, 28, 28]
            nn.ReLU(),
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),  # [B, 64, 56, 56]
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),  # [B, 32, 112, 112]
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 3, stride=2, padding=1, output_padding=1),  # [B, 3, 224, 224]
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


### Reconstruction error

In [82]:
from torchvision import transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Chỉ dùng ảnh "normal" để huấn luyện AE
class NormalOnlyDataset(MultiImageLivenessDataset):
    def __init__(self, root_dir, transform=None):
        super().__init__(root_dir, transform)
        self.samples = [s for s in self.samples if s[1] == 0]

train_dataset = NormalOnlyDataset("dataset/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Full dataset để đánh giá
val_dataset = MultiImageLivenessDataset("dataset/dev", transform=transform)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


In [73]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleAutoEncoder().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

In [75]:
num_epochs = 2

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for image_group, _ in train_loader:  # image_group: [B, 4, 3, H, W]
        images = image_group.view(-1, 3, 224, 224).to(device)  # [B*4, 3, H, W]
        outputs = model(images)
        loss = criterion(outputs, images)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.0329
Epoch 2, Loss: 0.0129


In [85]:
from sklearn.metrics import classification_report

def evaluate_autoencoder(model, dataloader, threshold=None):
    model.eval()
    y_true, y_pred, scores = [], [], []

    with torch.no_grad():
        for image_group, labels in dataloader:
            batch_size = image_group.size(0)
            images = image_group.view(-1, 3, 224, 224).to(device)  # [B*4, 3, H, W]
            recons = model(images)

            # Tính lỗi từng ảnh: [B*4]
            mse = F.mse_loss(recons, images, reduction='none')
            mse = mse.view(batch_size, 4, -1).mean(dim=2)  # [B, 4]
            group_mse = mse.mean(dim=1)  # [B]

            # Phân loại
            if threshold is None:
                scores.extend(group_mse.cpu().numpy())
                y_true.extend(labels.numpy())
            else:
                pred_labels = (group_mse > threshold).long()
                y_pred.extend(pred_labels.cpu().numpy())
                y_true.extend(labels.numpy())

    if threshold is None:
        # Auto tìm threshold tốt nhất
        from sklearn.metrics import roc_curve
        fpr, tpr, thres = roc_curve(y_true, scores)
        best_idx = (tpr - fpr).argmax()
        best_threshold = thres[best_idx]
        print(f"[INFO] Best threshold: {best_threshold:.4f}")
        return best_threshold
    else:
        report = classification_report(y_true, y_pred, target_names=["normal", "spoof"])
        print(report)


In [87]:
# Tìm ngưỡng phân biệt tốt nhất
best_thresh = evaluate_autoencoder(model, val_loader, threshold=None)

# Đánh giá với ngưỡng này
evaluate_autoencoder(model, val_loader, threshold=best_thresh)


[INFO] Best threshold: 0.0109
              precision    recall  f1-score   support

      normal       0.58      0.82      0.68       130
       spoof       0.69      0.41      0.51       130

    accuracy                           0.61       260
   macro avg       0.63      0.61      0.59       260
weighted avg       0.63      0.61      0.59       260



### AE+Resnet Classifier

In [95]:
train_set = MultiImageLivenessDataset("dataset/train", transform=transform)
val_set = MultiImageLivenessDataset("dataset/dev", transform=transform)
train_loader = DataLoader(train_set, batch_size=8, shuffle=True)
val_loader = DataLoader(val_set, batch_size=8, shuffle=False)

In [88]:
from torchvision.models import resnet18

class ResNetClassifier(nn.Module):
    def __init__(self):
        super(ResNetClassifier, self).__init__()
        base = resnet18(pretrained=True)
        base.fc = nn.Identity()  # remove final FC
        self.backbone = base
        self.classifier = nn.Linear(512, 2)  # binary classification

    def forward(self, x):
        feat = self.backbone(x)
        out = self.classifier(feat)
        return out


In [90]:
class AE_ResNet_Pipeline(nn.Module):
    def __init__(self, autoencoder, classifier):
        super(AE_ResNet_Pipeline, self).__init__()
        self.autoencoder = autoencoder
        self.classifier = classifier

    def forward(self, x):
        # x: [B, 4, 3, 224, 224]
        b, s, c, h, w = x.size()
        x = x.view(-1, c, h, w)  # [B*4, 3, H, W]

        # AE reconstruct
        recon = self.autoencoder(x)  # [B*4, 3, H, W]

        # Classifier
        logits = self.classifier(recon)  # [B*4, 2]

        logits = logits.view(b, s, 2).mean(dim=1)  # [B, 2], avg over 4 imgs
        return logits


In [91]:
model = AE_ResNet_Pipeline(SimpleAutoEncoder(), ResNetClassifier()).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()



In [96]:
for epoch in range(3):
    model.train()
    total_loss = 0
    for image_group, labels in train_loader:  # [B, 4, 3, H, W]
        image_group = image_group.to(device)
        labels = labels.to(device)

        outputs = model(image_group)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f"[Epoch {epoch+1}] Loss: {total_loss / len(train_loader):.4f}")


[Epoch 1] Loss: 0.7765
[Epoch 2] Loss: 0.5868
[Epoch 3] Loss: 0.5260


In [99]:
from sklearn.metrics import classification_report

def evaluate_pipeline(model, dataloader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for image_group, labels in dataloader:
            image_group = image_group.to(device)
            outputs = model(image_group)
            preds = torch.argmax(outputs, dim=1)

            y_true.extend(labels.numpy())
            y_pred.extend(preds.cpu().numpy())

    print(classification_report(y_true, y_pred, target_names=["normal", "spoof"]))


In [100]:
evaluate_pipeline(model, val_loader)

              precision    recall  f1-score   support

      normal       0.75      0.05      0.09       130
       spoof       0.51      0.98      0.67       130

    accuracy                           0.52       260
   macro avg       0.63      0.52      0.38       260
weighted avg       0.63      0.52      0.38       260

