In [None]:
# Step 1: Clone repository
!git clone https://github.com/zhanghang1989/PyTorch-Encoding.git
%cd PyTorch-Encoding

# Step 2: Install dependencies (optional but good to have)
!pip install -q yacs

# Step 3: Fix deprecated .data<T>() to .data_ptr<T>() in CUDA files
!find encoding/lib -type f -name "*.cu" -exec sed -i 's/\.data<float>()/.data_ptr<float>()/g' {} \;
!find encoding/lib -type f -name "*.cu" -exec sed -i 's/\.data<double>()/.data_ptr<double>()/g' {} \;

# Step 4: Build the extension in-place
!python setup.py build_ext --inplace

In [None]:
!pip install timm
!pip install portalocker

In [9]:
import sys
sys.path.append("/kaggle/working/PyTorch-Encoding")

In [10]:
# Import all packages need
from PIL import Image
from sklearn.metrics import recall_score, accuracy_score
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet50, ResNet50_Weights, mobilenet_v2, MobileNet_V2_Weights

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import os
os.environ['TORCH_CUDA_ARCH_LIST'] = '6.0' #  Consider change that if you have GPU diff P100

import numpy as np
import matplotlib.pyplot as plt
import encoding
import timm

In [11]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# 1. Load & Pre-process the Dataset

In [12]:
class AntiSpoofingDataset(Dataset):
    def __init__(self, root_dir, transform):
        '''
        :param root_dir: Path to the directory containing either train or dev data.
        :param transform: Processing image.
        '''
        self.root_dir = root_dir
        self.classes = ["normal", "spoof"]
        self.class_to_idx = {"normal" : 0, "spoof" : 1}
        self.transform = transform

        self.files = [] # # Contains the image file paths and their corresponding labels.
        for class_name in self.classes:
            class_dir = os.path.join(root_dir, class_name)
            class_idx = self.class_to_idx[class_name]
            for file in os.listdir(class_dir):
                file_dir = os.path.join(class_dir, file)
                self.files.append((file_dir, class_idx))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_dir, label = self.files[idx]
        img_rgb = Image.open(file_dir).convert("RGB")
        img_rgb_tensor = self.transform(img_rgb)
        return img_rgb_tensor, label

def load_data(root_dir, batch_size, transform, shuffle):
    dataset = AntiSpoofingDataset(root_dir=root_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader

# 2. Custom Model for Binary Classification

In [13]:
class CustomResNet50(nn.Module):
    def __init__(self, dropout_rate=0.5):
        '''
        :param dropout_rate: Dropout rate
        '''
        super(CustomResNet50, self).__init__()
        self.backbone = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1) # Load pre-trained of ResNet50

        self.backbone.fc = nn.Identity() # Drop the last layer
        for param in self.backbone.parameters():
            param.requires_grad = False

        # Add new FC layer: Batch norm -> Dropout -> Linear
        self.fc = nn.Sequential(
            nn.BatchNorm1d(num_features=2048),
            nn.Dropout(p=dropout_rate),
            nn.Linear(in_features=2048, out_features=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x

class CustomMobileNetV2(nn.Module):
    def __init__(self, dropout_rate=0.5):
        '''
        :param dropout_rate: Dropout rate
        '''
        super(CustomMobileNetV2, self).__init__()
        self.backbone = mobilenet_v2(MobileNet_V2_Weights.IMAGENET1K_V1)

        self.backbone.classifier = nn.Identity() # Drop the last layer
        for param in self.backbone.parameters():
            param.requires_grad = False

        # Add new FC layer: Batch norm -> Dropout -> Linear
        self.fc = nn.Sequential(
            nn.BatchNorm1d(num_features=1280),
            nn.Dropout(p=dropout_rate),
            nn.Linear(in_features=1280, out_features=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x

class CustomViT(nn.Module):
    def __init__(self, dropout_rate=0.5):
        '''
        :param dropout_rate: Dropout rate
        '''
        super(CustomViT, self).__init__()
        self.backbone = timm.create_model("vit_base_patch16_224", pretrained=True)

        self.backbone.head = nn.Identity() # Drop the last layer
        for param in self.backbone.parameters():
            param.requires_grad = False

        # Add new FC layer: Batch norm -> Dropout -> Linear
        self.fc = nn.Sequential(
            nn.BatchNorm1d(num_features=768),
            nn.Dropout(p=dropout_rate),
            nn.Linear(in_features=768, out_features=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x


class CustomTextureModel(nn.Module):
    def __init__(self, dropout_rate=0.5):
        '''
        :param dropout_rate: Dropout rate
        '''
        super(CustomTextureModel, self).__init__()

        # Load the pre-trained model (DeepTen with ResNet backbone)
        self.backbone = encoding.models.get_model("deepten_resnet50_minc", pretrained=True)

        self.backbone.head[5] = nn.Identity()
        self.backbone.head[6] = nn.Identity()

        self.fc = nn.Sequential(
            nn.BatchNorm1d(num_features=4096),
            nn.Dropout(p=dropout_rate),
            nn.Linear(in_features=4096, out_features=1),
            nn.Sigmoid()
        )

        # Freeze the weights of the pretrained backbone layers
        for param in self.backbone.pretrained.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x

In [30]:
# Fixed randomization to ensure fairness in comparing methods
torch.cuda.manual_seed(42)

In [31]:
# Validate model and calculate loss for per batch
def validate_model(dev_dataloader, classify_model, texture_model, criterion, device, alpha):
    classify_model.eval()
    texture_model.eval()

    total_loss = 0
    with torch.no_grad():
        for img, label in dev_dataloader:
            img, label = img.to(device), label.to(device)
            out1, out2 = classify_model(img), texture_model(img)
            out = alpha * out1 + (1 - alpha) * out2
            loss = criterion(out, label.unsqueeze(1).float())
            total_loss += loss.item()
    loss_val = total_loss / len(dev_dataloader)
    return loss_val

# Training model per epoch and calculate loss for per batch
def train_per_epoch(classify_model, texture_model, train_dataloader, criterion, classify_optimizer, texture_optimizer, device, epoch, num_epochs, alpha):
    classify_model.train()
    texture_model.train()

    total_loss = 0
    progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")

    for img, label in progress_bar:
        img, label = img.to(device), label.to(device)

        classify_optimizer.zero_grad()
        texture_optimizer.zero_grad()
        
        out1, out2 = classify_model(img), texture_model(img)
        out = alpha * out1 + (1 - alpha) * out2
        loss = criterion(out, label.unsqueeze(1).float())
        loss.backward()
        
        classify_optimizer.step()
        texture_optimizer.step()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=f"{loss.item():.4f}")

    return total_loss / len(train_dataloader)

# Save loss of training and validating
def save_plots(train_losses, val_losses, model_name, folder_ckpt, alpha):
    plt.figure(figsize=(10, 5))
    epochs = range(1, len(train_losses) + 1)
    plt.plot(epochs, train_losses, label="Train Loss", marker="o")
    plt.plot(epochs, val_losses, label="Validation Loss", marker="o")
    plt.title(f"Train and Validation Loss - {model_name} with alpha = {alpha}")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid()
    plt.savefig(os.path.join(folder_ckpt, f"{alpha}_{model_name}_train_val_loss.png"), dpi=300, bbox_inches="tight")
    plt.close()

# Load model and setting layer to fine-tuning
def get_model(model_name):
    models = {
        "ResNet50": CustomResNet50,
        "MobileNetV2": CustomMobileNetV2,
        "ViT": CustomViT
    }

    if model_name not in models:
        raise ValueError(f"Model should be one of {list(models.keys())}, get {model_name}.")
    return models[model_name]()

# Show training information like: num_epochs, batch_size, model, ...
def print_training_infor(model_name, num_epochs, lr, batch_size, alpha):
    print(60 * "-")
    if alpha == 1:
        model_info = "Don't use Texture Model"
    elif alpha == 0:
        model_info = "Don't use Classification Model"
    else:
        model_info = "Ensemble Classification and Texture Models"
    
    print(f"Training `{model_name}` with {model_info} (alpha = {alpha})")
    print(f"Epochs: {num_epochs} | Batch size: {batch_size} | Init Learning rate: {lr}")
    print(60 * "-")

# Train model
def train_model(train_root_dir, dev_root_dir, model_name, num_epochs, lr, batch_size, folder_ckpt, alpha, beta, theta):
    # Make a folder save checkpoint, image loss, ...
    os.makedirs(folder_ckpt, exist_ok=True)

    # Load classify model and texture model
    classify_model = get_model(model_name)
    texture_model = CustomTextureModel()

    # Pre-prare data: Data Augmentation for Training Set, Transform for Validation/Test Set
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ColorJitter(
            brightness=0.15,
            contrast=0.15
        ),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    dev_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataloader = load_data(root_dir=train_root_dir, batch_size=batch_size, transform=train_transform, shuffle=True)
    dev_dataloader = load_data(root_dir=dev_root_dir, batch_size=batch_size, transform=dev_transform, shuffle=False)

    # Show information training
    print_training_infor(model_name, num_epochs, lr, batch_size, alpha)

    # Setting device, loss function and optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    classify_model = classify_model.to(device)
    texture_model = texture_model.to(device)
    
    criterion = nn.BCELoss()
    
    classify_optimizer = optim.Adam(classify_model.parameters(), lr)
    texture_optimizer = optim.Adam(texture_model.parameters(), lr)

    # Add ReduceLROnPlateau scheduler
    classify_scheduler = optim.lr_scheduler.ReduceLROnPlateau(classify_optimizer, mode='min', factor=0.9, patience=3, verbose=True)
    texture_scheduler = optim.lr_scheduler.ReduceLROnPlateau(texture_optimizer, mode='min', factor=0.9, patience=3, verbose=True)

    # Flow train and validate phase
    train_losses = []
    val_losses = []
    min_val_loss = float("inf")

    for epoch in range(num_epochs):
        # Training 1 epoch
        train_loss = train_per_epoch(
            classify_model, texture_model, train_dataloader, criterion, classify_optimizer, texture_optimizer, device, epoch, num_epochs, alpha
        )
        train_losses.append(train_loss)

        # Validate in validation set
        val_loss = validate_model(dev_dataloader, classify_model, texture_model, criterion, device, alpha)
        val_losses.append(val_loss)

        # Step scheduler based on val_loss
        classify_scheduler.step(val_loss)
        texture_scheduler.step(val_loss)


        # Show information per epoch
        print(f"Train loss = {train_loss:.4f} | Val loss = {val_loss:.4f} | Learning rate = {classify_optimizer.param_groups[0]['lr']:.7f}")

        # Save the best model
        if val_loss < min_val_loss:
            min_val_loss = val_loss
            # Save the best model
            best_classify_model_dir = os.path.join(folder_ckpt, f"{alpha}_{model_name}_best.pt")
            torch.save(classify_model.state_dict(), best_classify_model_dir)

            best_texture_model_dir = os.path.join(folder_ckpt, f"{alpha}_texture_best.pt")
            torch.save(texture_model.state_dict(), best_texture_model_dir)

            print(f"Best model saved at Epoch {epoch+1}.\n")

        # Save the last model
        last_classify_model_dir = os.path.join(folder_ckpt, f"{alpha}_{model_name}_last.pt")
        torch.save(classify_model.state_dict(), last_classify_model_dir)

        last_texture_model_dir = os.path.join(folder_ckpt, f"{alpha}_texture_last.pt")
        torch.save(texture_model.state_dict(), last_texture_model_dir)

    # Save plot of train/validate loss
    save_plots(train_losses, val_losses, model_name, folder_ckpt, alpha)

    # We find the best threshold for metric
    find_best_threshold(model_name, dev_dataloader, device, folder_ckpt, alpha, beta, theta)

def plot_metric_follow_threshold(thresholds, metrics, color, title, xlabel, y_label, filename, folder_ckpt, theta=None):
    plt.figure(figsize=(10, 5))
    plt.plot(thresholds, metrics, color=color, linewidth=2)
    if theta:
        plt.axhline(y=theta, color="cyan", linestyle="--", linewidth=2, label=f"{y_label} threshold")
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(y_label)
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(folder_ckpt, filename))
    plt.close()

# Find the threshold to get best metrics
def find_best_threshold(model_name, dev_dataloader, device, folder_ckpt, alpha, beta, theta):
    classify_model = get_model(model_name)
    best_classify_model_dir = os.path.join(folder_ckpt, f"{alpha}_{model_name}_best.pt")
    classify_model.load_state_dict(torch.load(best_classify_model_dir, map_location=device, weights_only=True))

    texture_model = CustomTextureModel()
    best_texture_model_dir = os.path.join(folder_ckpt, f"{alpha}_texture_best.pt")
    texture_model.load_state_dict(torch.load(best_texture_model_dir, map_location=device, weights_only=True))

    classify_model.to(device)
    classify_model.eval()

    texture_model.to(device)
    texture_model.eval()

    thresholds = []
    recalls = []
    accus = []
    bpcers = []

    y_true = []
    y_probability = []

    with torch.no_grad():
        for img, label in dev_dataloader:
            img, label = img.to(device), label.to(device)
            out1, out2 = classify_model(img).squeeze(1), texture_model(img).squeeze(1)
            out = alpha * out1 + (1 - alpha) * out2
            y_probability.extend(out.cpu().numpy())
            y_true.extend(label.cpu().numpy())

    y_true = np.array(y_true)
    y_probability = np.array(y_probability)

    best_recall = 0 # maximum recall but keep bpcer <= beta, accuracy >= theta
    best_bpcer = 1
    best_accu = 0
    best_threshold = 0

    total_real_images = np.sum(y_true == 0)
    for i in range(0, 100, 1):
        threshold = i / 100
        thresholds.append(threshold)

        y_pred = (y_probability >= threshold).astype(int)

        # Recall: ability to correctly detect spoof (label=1)
        recall = recall_score(y_true, y_pred, pos_label=1)
        recalls.append(recall)

        # Acuracy
        accu = accuracy_score(y_true, y_pred)
        accus.append(accu)

        # BPCER: % real images (label=0) wrongly predicted as fake
        FP = sum(1 for y, y_hat in zip(y_true, y_pred) if y_hat == 1 and y == 0) # Total real image but model predict fake

        bpcer = 0
        if total_real_images != 0:
            bpcer = FP / total_real_images
        bpcers.append(bpcer)

        if  bpcer <= beta and accu >= theta:
            if recall > best_recall:
                best_recall, best_bpcer, best_accu, best_threshold = recall, bpcer, accu, threshold

    if best_recall == 0:
        print("Don't have any value of threshold make best metric.")
    else:
        print(f"At threshold = {best_threshold}: Recall = {best_recall:.4f}, BPCER = {best_bpcer:.4f}, Accuracy = {best_accu:.4f}")

    # Plot of metrics for every threshold values
    plot_metric_follow_threshold(thresholds, recalls, "blue", "Recall vs Threshold", "Threshold", "Recall", f"{alpha}_{model_name}_recall_threshold.png", folder_ckpt)
    plot_metric_follow_threshold(thresholds, bpcers, "red", "BPCER vs Threshold", "Threshold", "BPCER", f"{alpha}_{model_name}_bpcer_threshold.png", folder_ckpt, beta)
    plot_metric_follow_threshold(thresholds, accus, "yellow", "Accuracy vs Threshold", "Threshold", "Accuracy", f"{alpha}_{model_name}_accuracy_threshold.png", folder_ckpt, theta)

In [56]:
train_model(
    train_root_dir="/kaggle/input/fas-dataset/train", 
    dev_root_dir="/kaggle/input/fas-dataset/dev", 
    model_name="ViT", 
    num_epochs=40, 
    lr=0.001, 
    batch_size=128, 
    folder_ckpt="/kaggle/working/", 
    alpha=0.5, 
    beta=0.20, 
    theta=0.85
)

------------------------------------------------------------
Training `ViT` with Ensemble Classification and Texture Models (alpha = 0.5)
Epochs: 40 | Batch size: 128 | Init Learning rate: 0.001
------------------------------------------------------------


Epoch 1/40: 100%|██████████| 25/25 [01:27<00:00,  3.50s/batch, loss=0.2484]


Train loss = 0.4188 | Val loss = 0.4371 | Learning rate = 0.0010000
Best model saved at Epoch 1.



Epoch 2/40: 100%|██████████| 25/25 [01:23<00:00,  3.35s/batch, loss=0.2225]


Train loss = 0.2729 | Val loss = 0.3577 | Learning rate = 0.0010000
Best model saved at Epoch 2.



Epoch 3/40: 100%|██████████| 25/25 [01:23<00:00,  3.33s/batch, loss=0.2155]


Train loss = 0.2246 | Val loss = 0.3290 | Learning rate = 0.0010000
Best model saved at Epoch 3.



Epoch 4/40: 100%|██████████| 25/25 [01:22<00:00,  3.31s/batch, loss=0.1619]


Train loss = 0.1888 | Val loss = 0.3089 | Learning rate = 0.0010000
Best model saved at Epoch 4.



Epoch 5/40: 100%|██████████| 25/25 [01:22<00:00,  3.32s/batch, loss=0.1640]


Train loss = 0.1615 | Val loss = 0.3031 | Learning rate = 0.0010000
Best model saved at Epoch 5.



Epoch 6/40: 100%|██████████| 25/25 [01:24<00:00,  3.36s/batch, loss=0.1222]


Train loss = 0.1488 | Val loss = 0.3018 | Learning rate = 0.0010000
Best model saved at Epoch 6.



Epoch 7/40: 100%|██████████| 25/25 [01:23<00:00,  3.34s/batch, loss=0.1056]


Train loss = 0.1449 | Val loss = 0.3004 | Learning rate = 0.0010000
Best model saved at Epoch 7.



Epoch 8/40: 100%|██████████| 25/25 [01:23<00:00,  3.34s/batch, loss=0.1955]


Train loss = 0.1349 | Val loss = 0.3007 | Learning rate = 0.0010000


Epoch 9/40: 100%|██████████| 25/25 [01:23<00:00,  3.33s/batch, loss=0.1667]


Train loss = 0.1341 | Val loss = 0.3148 | Learning rate = 0.0010000


Epoch 10/40: 100%|██████████| 25/25 [01:21<00:00,  3.24s/batch, loss=0.1420]


Train loss = 0.1325 | Val loss = 0.3044 | Learning rate = 0.0010000


Epoch 11/40: 100%|██████████| 25/25 [01:21<00:00,  3.26s/batch, loss=0.0885]


Train loss = 0.1274 | Val loss = 0.3226 | Learning rate = 0.0009000


Epoch 12/40: 100%|██████████| 25/25 [01:22<00:00,  3.28s/batch, loss=0.1336]


Train loss = 0.1225 | Val loss = 0.2794 | Learning rate = 0.0009000
Best model saved at Epoch 12.



Epoch 13/40: 100%|██████████| 25/25 [01:21<00:00,  3.27s/batch, loss=0.1197]


Train loss = 0.1183 | Val loss = 0.3073 | Learning rate = 0.0009000


Epoch 14/40: 100%|██████████| 25/25 [01:21<00:00,  3.27s/batch, loss=0.1177]


Train loss = 0.1183 | Val loss = 0.3129 | Learning rate = 0.0009000


Epoch 15/40: 100%|██████████| 25/25 [01:21<00:00,  3.27s/batch, loss=0.1043]


Train loss = 0.1107 | Val loss = 0.2847 | Learning rate = 0.0009000


Epoch 16/40: 100%|██████████| 25/25 [01:21<00:00,  3.26s/batch, loss=0.1098]


Train loss = 0.1142 | Val loss = 0.2973 | Learning rate = 0.0008100


Epoch 17/40: 100%|██████████| 25/25 [01:21<00:00,  3.24s/batch, loss=0.0693]


Train loss = 0.1130 | Val loss = 0.2954 | Learning rate = 0.0008100


Epoch 18/40: 100%|██████████| 25/25 [01:21<00:00,  3.24s/batch, loss=0.1078]


Train loss = 0.1131 | Val loss = 0.3023 | Learning rate = 0.0008100


Epoch 19/40: 100%|██████████| 25/25 [01:21<00:00,  3.25s/batch, loss=0.1308]


Train loss = 0.1085 | Val loss = 0.2942 | Learning rate = 0.0008100


Epoch 20/40: 100%|██████████| 25/25 [01:21<00:00,  3.25s/batch, loss=0.0824]


Train loss = 0.1113 | Val loss = 0.2973 | Learning rate = 0.0007290


Epoch 21/40: 100%|██████████| 25/25 [01:21<00:00,  3.25s/batch, loss=0.0808]


Train loss = 0.0989 | Val loss = 0.3004 | Learning rate = 0.0007290


Epoch 22/40: 100%|██████████| 25/25 [01:21<00:00,  3.25s/batch, loss=0.0658]


Train loss = 0.1047 | Val loss = 0.3001 | Learning rate = 0.0007290


Epoch 23/40: 100%|██████████| 25/25 [01:20<00:00,  3.23s/batch, loss=0.0631]


Train loss = 0.1010 | Val loss = 0.2934 | Learning rate = 0.0007290


Epoch 24/40: 100%|██████████| 25/25 [01:20<00:00,  3.23s/batch, loss=0.1081]


Train loss = 0.1048 | Val loss = 0.3121 | Learning rate = 0.0006561


Epoch 25/40: 100%|██████████| 25/25 [01:21<00:00,  3.27s/batch, loss=0.0991]


Train loss = 0.0999 | Val loss = 0.2849 | Learning rate = 0.0006561


Epoch 26/40: 100%|██████████| 25/25 [01:21<00:00,  3.24s/batch, loss=0.0637]


Train loss = 0.0976 | Val loss = 0.2987 | Learning rate = 0.0006561


Epoch 27/40: 100%|██████████| 25/25 [01:20<00:00,  3.23s/batch, loss=0.1331]


Train loss = 0.1023 | Val loss = 0.3011 | Learning rate = 0.0006561


Epoch 28/40: 100%|██████████| 25/25 [01:22<00:00,  3.28s/batch, loss=0.1287]


Train loss = 0.0977 | Val loss = 0.3145 | Learning rate = 0.0005905


Epoch 29/40: 100%|██████████| 25/25 [01:22<00:00,  3.28s/batch, loss=0.1160]


Train loss = 0.1017 | Val loss = 0.2923 | Learning rate = 0.0005905


Epoch 30/40: 100%|██████████| 25/25 [01:22<00:00,  3.29s/batch, loss=0.0615]


Train loss = 0.0968 | Val loss = 0.3007 | Learning rate = 0.0005905


Epoch 31/40: 100%|██████████| 25/25 [01:22<00:00,  3.32s/batch, loss=0.1038]


Train loss = 0.0934 | Val loss = 0.3039 | Learning rate = 0.0005905


Epoch 32/40: 100%|██████████| 25/25 [01:21<00:00,  3.27s/batch, loss=0.1232]


Train loss = 0.0958 | Val loss = 0.3103 | Learning rate = 0.0005314


Epoch 33/40: 100%|██████████| 25/25 [01:22<00:00,  3.30s/batch, loss=0.1045]


Train loss = 0.0905 | Val loss = 0.3142 | Learning rate = 0.0005314


Epoch 34/40: 100%|██████████| 25/25 [01:22<00:00,  3.32s/batch, loss=0.1157]


Train loss = 0.0949 | Val loss = 0.3108 | Learning rate = 0.0005314


Epoch 35/40: 100%|██████████| 25/25 [01:22<00:00,  3.31s/batch, loss=0.1123]


Train loss = 0.0999 | Val loss = 0.3087 | Learning rate = 0.0005314


Epoch 36/40: 100%|██████████| 25/25 [01:22<00:00,  3.31s/batch, loss=0.1053]


Train loss = 0.0983 | Val loss = 0.3126 | Learning rate = 0.0004783


Epoch 37/40: 100%|██████████| 25/25 [01:22<00:00,  3.28s/batch, loss=0.1148]


Train loss = 0.0933 | Val loss = 0.3120 | Learning rate = 0.0004783


Epoch 38/40: 100%|██████████| 25/25 [01:21<00:00,  3.25s/batch, loss=0.0976]


Train loss = 0.0966 | Val loss = 0.3001 | Learning rate = 0.0004783


Epoch 39/40: 100%|██████████| 25/25 [01:21<00:00,  3.27s/batch, loss=0.0590]


Train loss = 0.0945 | Val loss = 0.3047 | Learning rate = 0.0004783


Epoch 40/40: 100%|██████████| 25/25 [01:21<00:00,  3.25s/batch, loss=0.0907]


Train loss = 0.0924 | Val loss = 0.2964 | Learning rate = 0.0004305
At threshold = 0.27: Recall = 0.9585, BPCER = 0.1944, Accuracy = 0.8821


# 3. Calculate time end-to-end

In [45]:
import time

In [46]:
def measure_inference_time_dataloader(model_name, root_dir, alpha, device='cuda'):
    device = torch.device(device if torch.cuda.is_available() else 'cpu')
    
    classify_model = get_model(model_name).to(device)
    classify_model.eval()

    texture_model = CustomTextureModel().to(device)
    texture_model.eval()

    total_time = 0.0
    total_samples = 0

    dev_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    dataloader = load_data(root_dir, batch_size=128, transform=dev_transform, shuffle=False)
    
    # Calculate time for all dataset
    with torch.no_grad():
        for img, _ in dataloader:
            img = img.to(device)
            
            if device.type == 'cuda':
                torch.cuda.synchronize()
            start_time = time.time()

            out1, out2 = classify_model(img), texture_model(img)
            out = alpha * out1 + (1 - alpha) * out2
            
            if device.type == 'cuda':
                torch.cuda.synchronize()
            end_time = time.time()

            batch_time = end_time - start_time
            total_time += batch_time
            total_samples += img.size(0)

    avg_time_per_sample = total_time / total_samples
    print(f"[{device}] Average inference time per sample: {avg_time_per_sample:.6f} seconds")
    return avg_time_per_sample

In [57]:
measure_inference_time_dataloader(model_name="ViT", alpha=0.5, root_dir="/kaggle/input/fas-dataset/dev", device='cuda')

[cuda] Average inference time per sample: 0.007237 seconds


0.007237450625017236

In [55]:
!rm "/kaggle/working/1_ViT_accuracy_threshold.png"
!rm "/kaggle/working/1_ViT_best.pt"
!rm "/kaggle/working/1_ViT_bpcer_threshold.png"
!rm "/kaggle/working/1_ViT_last.pt"
!rm "/kaggle/working/1_ViT_recall_threshold.png"
!rm "/kaggle/working/1_ViT_train_val_loss.png"
!rm "/kaggle/working/1_ViT_best.pt"
!rm "/kaggle/working/1_ViT_last.pt"
!rm "/kaggle/working/1_texture_best.pt"
!rm "/kaggle/working/1_texture_last.pt"

rm: cannot remove '/kaggle/working/1_ViT_accuracy_threshold.png': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_best.pt': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_bpcer_threshold.png': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_last.pt': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_recall_threshold.png': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_train_val_loss.png': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_best.pt': No such file or directory
rm: cannot remove '/kaggle/working/1_ViT_last.pt': No such file or directory
