In [18]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from transformers import DeiTForImageClassification, DeiTFeatureExtractor
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np
from sklearn.metrics import roc_curve
import pandas as pd
from tqdm import tqdm


In [19]:
# # small
# train_dir = '/kaggle/input/small-dataset/train'
# val_dir = '/kaggle/input/small-dataset/val'
# test_dir = '/kaggle/input/small-dataset/test'

# fm_dir = '/kaggle/input/mad-benchmark-small/FaceMorpher'
# mg1_dir = '/kaggle/input/mad-benchmark-small/MIPGAN_I'
# mg2_dir = '/kaggle/input/mad-benchmark-small/MIPGAN_II'
# oc_dir = '/kaggle/input/mad-benchmark-small/OpenCV'
# wm_dir = '/kaggle/input/mad-benchmark-small/Webmorph'


In [20]:
train_dir = '/kaggle/input/morph-splitted/train'
val_dir = '/kaggle/input/morph-splitted/val'
test_dir = '/kaggle/input/morph-splitted/test'


fm_dir = '/kaggle/input/mad-benchmark/FaceMorpher'
mg1_dir = '/kaggle/input/mad-benchmark/MIPGAN_I'
mg2_dir = '/kaggle/input/mad-benchmark/MIPGAN_II'
oc_dir = '/kaggle/input/mad-benchmark/OpenCV'
wm_dir = '/kaggle/input/mad-benchmark/Webmorph'


In [21]:
# Define transformations using Albumentations
train_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_test_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Create a custom dataset class to use Albumentations
class AlbumentationsDataset(datasets.ImageFolder):
    def __init__(self, root, transform=None):
        super(AlbumentationsDataset, self).__init__(root, transform=None)
        self.transform = transform

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = np.array(sample)
            sample = self.transform(image=sample)['image']
        return sample, target

In [22]:
# Create datasets
train_dataset = AlbumentationsDataset(train_dir, transform=train_transform)
val_dataset = AlbumentationsDataset(val_dir, transform=val_test_transform)
test_dataset = AlbumentationsDataset(test_dir, transform=val_test_transform)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# For the evaluation datasets
fm_dataset = AlbumentationsDataset(fm_dir, transform=val_test_transform)
mg1_dataset = AlbumentationsDataset(mg1_dir, transform=val_test_transform)
mg2_dataset = AlbumentationsDataset(mg2_dir, transform=val_test_transform)
oc_dataset = AlbumentationsDataset(oc_dir, transform=val_test_transform)
wm_dataset = AlbumentationsDataset(wm_dir, transform=val_test_transform)

# Create data loaders for evaluation datasets
fm_loader = DataLoader(fm_dataset, batch_size=batch_size, shuffle=False)
mg1_loader = DataLoader(mg1_dataset, batch_size=batch_size, shuffle=False)
mg2_loader = DataLoader(mg2_dataset, batch_size=batch_size, shuffle=False)
oc_loader = DataLoader(oc_dataset, batch_size=batch_size, shuffle=False)
wm_loader = DataLoader(wm_dataset, batch_size=batch_size, shuffle=False)


In [23]:
model_name = 'facebook/deit-base-distilled-patch16-224'
deit_model = DeiTForImageClassification.from_pretrained(model_name)

# Modify the classifier head for binary classification
num_features = deit_model.classifier.in_features
deit_model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(num_features, 1)
)

# Initialize the classifier model
classifier_model = deit_model

# Move model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
classifier_model.to(device)

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(classifier_model.parameters(), lr=1e-4, weight_decay=1e-2)

Some weights of DeiTForImageClassification were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    classifier_model.train()
    running_loss = 0.0
    train_loader_tqdm = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} Training")
    for images, labels in train_loader_tqdm:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1)  # Convert labels to float and reshape for BCEWithLogitsLoss
        
        optimizer.zero_grad()
        outputs = classifier_model(images)
        logits = outputs.logits  # Extract logits from the output
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        train_loader_tqdm.set_postfix(loss=running_loss/len(train_loader))
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")
    
    # Validation
    classifier_model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    val_loader_tqdm = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} Validation")
    with torch.no_grad():
        for images, labels in val_loader_tqdm:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)
            outputs = classifier_model(images)
            logits = outputs.logits  # Extract logits from the output
            loss = criterion(logits, labels)
            val_loss += loss.item()
            
            predicted = torch.sigmoid(logits).round()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_accuracy = correct / total
    print(f'Validation Loss: {val_loss/len(val_loader)}, Accuracy: {val_accuracy * 100}%')

print("Training complete")


Epoch 1/5 Training: 100%|██████████| 750/750 [10:31<00:00,  1.19it/s, loss=0.00659]


Epoch [1/5], Loss: 0.006594981925173064


Epoch 1/5 Validation: 100%|██████████| 250/250 [01:55<00:00,  2.16it/s]


Validation Loss: 7.144523358874722e-06, Accuracy: 100.0%


Epoch 2/5 Training: 100%|██████████| 750/750 [08:51<00:00,  1.41it/s, loss=0.00543] 


Epoch [2/5], Loss: 0.005425669205408364


Epoch 2/5 Validation: 100%|██████████| 250/250 [01:21<00:00,  3.06it/s]


Validation Loss: 0.0002386216320173844, Accuracy: 99.9875%


Epoch 3/5 Training: 100%|██████████| 750/750 [08:52<00:00,  1.41it/s, loss=0.00277] 


Epoch [3/5], Loss: 0.0027671900411690635


Epoch 3/5 Validation: 100%|██████████| 250/250 [01:22<00:00,  3.05it/s]


Validation Loss: 0.0001686474694952267, Accuracy: 99.9875%


Epoch 4/5 Training: 100%|██████████| 750/750 [08:52<00:00,  1.41it/s, loss=1.31e-5]


Epoch [4/5], Loss: 1.3092213251638896e-05


Epoch 4/5 Validation: 100%|██████████| 250/250 [01:22<00:00,  3.02it/s]


Validation Loss: 0.00015412616501680532, Accuracy: 99.9875%


Epoch 5/5 Training: 100%|██████████| 750/750 [08:50<00:00,  1.41it/s, loss=2.47e-6]


Epoch [5/5], Loss: 2.4699177638467517e-06


Epoch 5/5 Validation: 100%|██████████| 250/250 [01:22<00:00,  3.04it/s]

Validation Loss: 9.59559011764668e-05, Accuracy: 99.9875%
Training complete





In [25]:
# Testing
classifier_model.eval()
correct = 0
total = 0
test_loader_tqdm = tqdm(test_loader, desc="Testing")
with torch.no_grad():
    for images, labels in test_loader_tqdm:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1)
        outputs = classifier_model(images)
        logits = outputs.logits  # Extract logits from the output
        predicted = torch.sigmoid(logits).round()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy * 100}%')


Testing: 100%|██████████| 250/250 [02:02<00:00,  2.04it/s]

Test Accuracy: 100.0%





In [26]:
# Define the evaluation function
def evaluate_model(model, data_loader, device):
    model.eval()
    true_labels = []
    predictions = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc='Evaluating', leave=False):
            inputs = inputs.to(device)
            outputs = model(inputs)
            logits = outputs.logits.squeeze()  # Extract logits and then squeeze
            probs = torch.sigmoid(logits).cpu().numpy()
            true_labels.extend(labels.numpy())
            predictions.extend(probs)

    return np.array(true_labels), np.array(predictions)

# Evaluate the model on multiple datasets
data_loaders = [fm_loader, mg1_loader, mg2_loader, oc_loader, wm_loader]
results = []

for loader in data_loaders:
    true_labels, predictions = evaluate_model(classifier_model, loader, device)
    accuracy = np.mean(np.array(true_labels) == (np.array(predictions) > 0.5))
    results.append(accuracy)

# Print the final results
print("\nFinal Results:")
names = ["FaceMorpher", "MIPGAN_I", "MIPGAN_II", "OpenCV", "Webmorph"]
for name, accuracy in zip(names, results):
    print(f"{name}: {accuracy:.4f}")


                                                           


Final Results:
FaceMorpher: 0.1694
MIPGAN_I: 0.1694
MIPGAN_II: 0.1696
OpenCV: 0.1717
Webmorph: 0.2898




In [27]:
# Evaluation function
def evaluate_model(model, data_loader, device):
    model.eval()
    true_labels = []
    predictions = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc='Evaluating', leave=False):
            inputs = inputs.to(device)
            outputs = model(inputs).logits
            probs = torch.sigmoid(outputs).cpu().numpy()
            true_labels.extend(labels.numpy())
            predictions.extend(probs)

    return np.array(true_labels), np.array(predictions)

# Define functions to calculate APCER, BPCER, EER, and accuracy
def calculate_apcer(true_labels, predictions, fixed_bpcer):
    fpr, tpr, _ = roc_curve(true_labels, predictions, pos_label=1)
    fpr_target = fixed_bpcer
    closest_fpr_index = np.argmin(np.abs(fpr - fpr_target))
    apcer = 1 - tpr[closest_fpr_index]
    return apcer

def calculate_bpcer(true_labels, predictions, fixed_apcer):
    fpr, tpr, _ = roc_curve(true_labels, predictions, pos_label=1)
    tpr_target = 1 - fixed_apcer
    closest_tpr_index = np.argmin(np.abs(tpr - tpr_target))
    bpcer = fpr[closest_tpr_index]
    return bpcer

def calculate_eer(true_labels, predictions):
    fpr, tpr, _ = roc_curve(true_labels, predictions, pos_label=1)
    eer_index = np.argmin(np.abs(fpr - (1 - tpr)))
    eer = fpr[eer_index]
    return eer

def calculate_accuracy(true_labels, predictions):
    binary_predictions = (predictions > 0.5).astype(int)
    accuracy = np.mean(true_labels == binary_predictions)
    return accuracy

# Compute metrics for a dataset
def compute_metrics_for_dataset(model, data_loader, device, fixed_bpcer_values, fixed_apcer_values):
    true_labels, predictions = evaluate_model(model, data_loader, device)
    metrics = {
        'APCER': {bpcer: calculate_apcer(true_labels, predictions, bpcer) for bpcer in fixed_bpcer_values},
        'BPCER': {apcer: calculate_bpcer(true_labels, predictions, apcer) for apcer in fixed_apcer_values},
        'EER': calculate_eer(true_labels, predictions),
        'Accuracy': calculate_accuracy(true_labels, predictions)
    }
    return metrics

# Define fixed values for APCER and BPCER calculations
fixed_bpcer_values = [0.01, 0.1, 0.2]
fixed_apcer_values = [0.01, 0.1, 0.2]

# Create a DataFrame to store results
results_df = pd.DataFrame(columns=['Dataset', 'APCER_0.01', 'APCER_0.1', 'APCER_0.2',
                                    'BPCER_0.01', 'BPCER_0.1', 'BPCER_0.2', 'EER', 'Accuracy'])

# Loaders dictionary
loaders = {
    'FaceMorpher': fm_loader,
    'MIPGAN_I': mg1_loader,
    'MIPGAN_II': mg2_loader,
    'OpenCV': oc_loader,
    'Webmorph': wm_loader
}

# Iterate through the loaders and compute metrics for each dataset
for dataset_name, loader in loaders.items():
    metrics = compute_metrics_for_dataset(classifier_model, loader, device, fixed_bpcer_values, fixed_apcer_values)
    
    # Extract APCER and BPCER values for each fixed threshold
    apcer_0_01 = metrics['APCER'][0.01]
    apcer_0_1 = metrics['APCER'][0.1]
    apcer_0_2 = metrics['APCER'][0.2]
    
    bpcer_0_01 = metrics['BPCER'][0.01]
    bpcer_0_1 = metrics['BPCER'][0.1]
    bpcer_0_2 = metrics['BPCER'][0.2]
    
    # Create a DataFrame for the current dataset metrics
    df = pd.DataFrame({'Dataset': [dataset_name],
                       'APCER_0.01': [apcer_0_01],
                       'APCER_0.1': [apcer_0_1],
                       'APCER_0.2': [apcer_0_2],
                       'BPCER_0.01': [bpcer_0_01],
                       'BPCER_0.1': [bpcer_0_1],
                       'BPCER_0.2': [bpcer_0_2],
                       'EER': [metrics['EER']],
                       'Accuracy': [metrics['Accuracy']]})

    # Concatenate the current dataset DataFrame with the results_df
    results_df = pd.concat([results_df, df], ignore_index=True)

# Print or display the final DataFrame
# print(results_df)

# Export the results_df DataFrame to a CSV file
# results_df.to_csv('evaluation_results.csv', index=False)

results_df.head()


  results_df = pd.concat([results_df, df], ignore_index=True)
                                                           

Unnamed: 0,Dataset,APCER_0.01,APCER_0.1,APCER_0.2,BPCER_0.01,BPCER_0.1,BPCER_0.2,EER,Accuracy
0,FaceMorpher,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.169435
1,MIPGAN_I,0.114,0.005,0.001,0.039216,0.009804,0.0,0.029412,0.169435
2,MIPGAN_II,0.035035,0.003003,0.001001,0.02451,0.0,0.0,0.019608,0.169576
3,OpenCV,0.653455,0.238821,0.088415,0.54902,0.176471,0.117647,0.142157,0.171717
4,Webmorph,0.76,0.29,0.122,0.558824,0.215686,0.137255,0.161765,0.289773


In [28]:
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve, auc
from tqdm import tqdm

# Evaluation function
def evaluate_model(model, data_loader, device):
    model.eval()
    true_labels = []
    predictions = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc='Evaluating', leave=False):
            inputs = inputs.to(device)
            outputs = model(inputs)
            
            # Adjust this line based on the actual structure of your model's output
            if hasattr(outputs, 'logits'):
                outputs = outputs.logits
            elif hasattr(outputs, 'predictions'):
                outputs = outputs.predictions

            probs = torch.sigmoid(outputs).cpu().numpy().squeeze()
            true_labels.extend(labels.numpy())
            predictions.extend(probs)

    return np.array(true_labels), np.array(predictions)

# Define functions to calculate APCER, BPCER, EER, and accuracy
def calculate_apcer(true_labels, predictions, fixed_bpcer):
    fpr, tpr, _ = roc_curve(true_labels, predictions, pos_label=1)
    closest_fpr_index = np.argmin(np.abs(fpr - fixed_bpcer))
    apcer = 1 - tpr[closest_fpr_index]
    return apcer

def calculate_bpcer(true_labels, predictions, fixed_apcer):
    fpr, tpr, _ = roc_curve(true_labels, predictions, pos_label=1)
    closest_tpr_index = np.argmin(np.abs(tpr - (1 - fixed_apcer)))
    bpcer = fpr[closest_tpr_index]
    return bpcer

def calculate_eer(true_labels, predictions):
    fpr, tpr, _ = roc_curve(true_labels, predictions, pos_label=1)
    eer_index = np.argmin(np.abs(fpr - (1 - tpr)))
    eer = fpr[eer_index]
    return eer

def calculate_accuracy(true_labels, predictions):
    binary_predictions = (predictions > 0.5).astype(int)
    accuracy = np.mean(true_labels == binary_predictions)
    return accuracy

# Compute metrics for a dataset
def compute_metrics_for_dataset(model, data_loader, device, fixed_bpcer_values, fixed_apcer_values):
    true_labels, predictions = evaluate_model(model, data_loader, device)
    metrics = {
        'APCER': {bpcer: calculate_apcer(true_labels, predictions, bpcer) for bpcer in fixed_bpcer_values},
        'BPCER': {apcer: calculate_bpcer(true_labels, predictions, apcer) for apcer in fixed_apcer_values},
        'EER': calculate_eer(true_labels, predictions),
        'Accuracy': calculate_accuracy(true_labels, predictions)
    }
    return metrics

# Define fixed values for APCER and BPCER calculations
fixed_bpcer_values = [0.01, 0.1, 0.2]
fixed_apcer_values = [0.01, 0.1, 0.2]

# Create a DataFrame to store results
results_df = pd.DataFrame(columns=['Dataset', 'APCER_0.01', 'APCER_0.1', 'APCER_0.2',
                                    'BPCER_0.01', 'BPCER_0.1', 'BPCER_0.2', 'EER', 'Accuracy'])

# Loaders dictionary (ensure these DataLoader objects are defined elsewhere in your code)
loaders = {
    'FaceMorpher': fm_loader,
    'MIPGAN_I': mg1_loader,
    'MIPGAN_II': mg2_loader,
    'OpenCV': oc_loader,
    'Webmorph': wm_loader
}

# Iterate through the loaders and compute metrics for each dataset
for dataset_name, loader in loaders.items():
    metrics = compute_metrics_for_dataset(classifier_model, loader, device, fixed_bpcer_values, fixed_apcer_values)
    
    # Extract APCER and BPCER values for each fixed threshold
    apcer_0_01 = metrics['APCER'][0.01]
    apcer_0_1 = metrics['APCER'][0.1]
    apcer_0_2 = metrics['APCER'][0.2]
    
    bpcer_0_01 = metrics['BPCER'][0.01]
    bpcer_0_1 = metrics['BPCER'][0.1]
    bpcer_0_2 = metrics['BPCER'][0.2]
    
    # Create a DataFrame for the current dataset metrics
    df = pd.DataFrame({'Dataset': [dataset_name],
                       'APCER_0.01': [apcer_0_01],
                       'APCER_0.1': [apcer_0_1],
                       'APCER_0.2': [apcer_0_2],
                       'BPCER_0.01': [bpcer_0_01],
                       'BPCER_0.1': [bpcer_0_1],
                       'BPCER_0.2': [bpcer_0_2],
                       'EER': [metrics['EER']],
                       'Accuracy': [metrics['Accuracy']]})

    # Concatenate the current dataset DataFrame with the results_df
    results_df = pd.concat([results_df, df], ignore_index=True)


results_df.head()


  results_df = pd.concat([results_df, df], ignore_index=True)
                                                           

Unnamed: 0,Dataset,APCER_0.01,APCER_0.1,APCER_0.2,BPCER_0.01,BPCER_0.1,BPCER_0.2,EER,Accuracy
0,FaceMorpher,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.169435
1,MIPGAN_I,0.114,0.005,0.001,0.039216,0.009804,0.0,0.029412,0.169435
2,MIPGAN_II,0.035035,0.003003,0.001001,0.02451,0.0,0.0,0.019608,0.169576
3,OpenCV,0.653455,0.238821,0.088415,0.54902,0.176471,0.117647,0.142157,0.171717
4,Webmorph,0.76,0.29,0.122,0.558824,0.215686,0.137255,0.161765,0.289773


In [29]:
# results_df.to_csv('evaluation_results.csv', index=False)