In [1]:
import sys
sys.setrecursionlimit(15000)
import torch
import torch.nn.functional as F
from torch import nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torchvision.models as models

NO_CAPS=10

class StatsNet(nn.Module):
    def __init__(self):
        super(StatsNet, self).__init__()

    def forward(self, x):
        x = x.view(x.data.shape[0], x.data.shape[1], x.data.shape[2]*x.data.shape[3])

        mean = torch.mean(x, 2)
        std = torch.std(x, 2)

        return torch.stack((mean, std), dim=1)

class View(nn.Module):
    def __init__(self, *shape):
        super(View, self).__init__()
        self.shape = shape

    def forward(self, input):
        return input.view(self.shape)


class VggExtractor(nn.Module):
    def __init__(self, train=False):
        super(VggExtractor, self).__init__()

        self.vgg_1 = self.Vgg(models.vgg19(pretrained=True), 0, 18)
        if train:
            self.vgg_1.train(mode=True)
            self.freeze_gradient()
        else:
            self.vgg_1.eval()

    def Vgg(self, vgg, begin, end):
        features = nn.Sequential(*list(vgg.features.children())[begin:(end+1)])
        return features

    def freeze_gradient(self, begin=0, end=9):
        for i in range(begin, end+1):
            self.vgg_1[i].requires_grad = False

    def forward(self, input):
        return self.vgg_1(input)

class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()

        self.capsules = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(256, 64, kernel_size=3, stride=1, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.Conv2d(64, 16, kernel_size=3, stride=1, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                StatsNet(),

                nn.Conv1d(2, 8, kernel_size=5, stride=2, padding=2),
                nn.BatchNorm1d(8),
                nn.Conv1d(8, 1, kernel_size=3, stride=1, padding=1),
                nn.BatchNorm1d(1),
                View(-1, 8),
                )
                for _ in range(NO_CAPS)]
        )

    def squash(self, tensor, dim):
        squared_norm = (tensor ** 2).sum(dim=dim, keepdim=True)
        scale = squared_norm / (1 + squared_norm)
        return scale * tensor / (torch.sqrt(squared_norm))

    def forward(self, x):
        # outputs = [capsule(x.detach()) for capsule in self.capsules]
        # outputs = [capsule(x.clone()) for capsule in self.capsules]
        outputs = [capsule(x) for capsule in self.capsules]
        output = torch.stack(outputs, dim=-1)

        return self.squash(output, dim=-1)

class RoutingLayer(nn.Module):
    def __init__(self, gpu_id, num_input_capsules, num_output_capsules, data_in, data_out, num_iterations):
        super(RoutingLayer, self).__init__()

        self.gpu_id = gpu_id
        self.num_iterations = num_iterations
        self.route_weights = nn.Parameter(torch.randn(num_output_capsules, num_input_capsules, data_out, data_in))


    def squash(self, tensor, dim):
        squared_norm = (tensor ** 2).sum(dim=dim, keepdim=True)
        scale = squared_norm / (1 + squared_norm)
        return scale * tensor / (torch.sqrt(squared_norm))

    def forward(self, x, random, dropout):
        # x[b, data, in_caps]

        x = x.transpose(2, 1)
        # x[b, in_caps, data]

        if random:
            noise = Variable(0.01*torch.randn(*self.route_weights.size()))
            if self.gpu_id >= 0:
                noise = noise.cuda(self.gpu_id)
            route_weights = self.route_weights + noise
        else:
            route_weights = self.route_weights

        priors = route_weights[:, None, :, :, :] @ x[None, :, :, :, None]

        # route_weights [out_caps , 1 , in_caps , data_out , data_in]
        # x             [   1     , b , in_caps , data_in ,    1    ]
        # priors        [out_caps , b , in_caps , data_out,    1    ]

        priors = priors.transpose(1, 0)
        # priors[b, out_caps, in_caps, data_out, 1]

        if dropout > 0.0:
            drop = Variable(torch.FloatTensor(*priors.size()).bernoulli(1.0- dropout))
            if self.gpu_id >= 0:
                drop = drop.cuda(self.gpu_id)
            priors = priors * drop
            

        logits = Variable(torch.zeros(*priors.size()))
        # logits[b, out_caps, in_caps, data_out, 1]

        if self.gpu_id >= 0:
            logits = logits.cuda(self.gpu_id)

        num_iterations = self.num_iterations

        for i in range(num_iterations):
            probs = F.softmax(logits, dim=2)
            outputs = self.squash((probs * priors).sum(dim=2, keepdim=True), dim=3)

            if i != self.num_iterations - 1:
                delta_logits = priors * outputs
                logits = logits + delta_logits

        # outputs[b, out_caps, 1, data_out, 1]
        outputs = outputs.squeeze()

        if len(outputs.shape) == 3:
            outputs = outputs.transpose(2, 1).contiguous() 
        else:
            outputs = outputs.unsqueeze_(dim=0).transpose(2, 1).contiguous()
        # outputs[b, data_out, out_caps]

        return outputs


class CapsuleNet(nn.Module):
    def __init__(self, num_class, gpu_id):
        super(CapsuleNet, self).__init__()

        self.num_class = num_class
        self.fea_ext = FeatureExtractor()
        self.fea_ext.apply(self.weights_init)

        self.routing_stats = RoutingLayer(gpu_id=gpu_id, num_input_capsules=NO_CAPS, num_output_capsules=num_class, data_in=8, data_out=4, num_iterations=2)

    def weights_init(self, m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    def forward(self, x, random=False, dropout=0.0):

        z = self.fea_ext(x)
        z = self.routing_stats(z, random, dropout=dropout)
        # z[b, data, out_caps]

        # classes = F.softmax(z, dim=-1)

        # class_ = classes.detach()
        # class_ = class_.mean(dim=1)

        # return classes, class_

        classes = F.softmax(z, dim=-1)
        class_ = classes.detach()
        class_ = class_.mean(dim=1)

        return z, class_


class CapsuleLoss(nn.Module):
    def __init__(self, gpu_id):
        super(CapsuleLoss, self).__init__()
        self.cross_entropy_loss = nn.CrossEntropyLoss()

        if gpu_id >= 0:
            self.cross_entropy_loss.cuda(gpu_id)

    def forward(self, classes, labels):
        loss_t = self.cross_entropy_loss(classes[:,0,:], labels)

        for i in range(classes.size(1) - 1):
            loss_t = loss_t + self.cross_entropy_loss(classes[:,i+1,:], labels)

        return loss_t

In [2]:
import sys
sys.setrecursionlimit(15000)
import os
import random
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from torch.autograd import Variable
from torch.optim import Adam
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
from tqdm import tqdm
import argparse
from sklearn import metrics


# Hardcoded configuration values
dataset = '/kaggle/input/splitframes-uadfv/split_data'  # Path to root dataset
train_set = 'train'  # Train set
val_set = 'val'  # Validation set
workers = 4 # Number of data loading workers
batchSize = 32  # Batch size
imageSize = 300  # Input image size (height/width)
niter = 25  # Number of epochs to train for
lr = 0.0005  # Learning rate
beta1 = 0.9  # Beta1 for Adam optimizer
gpu_id = 0  # GPU ID
resume = 0  # Epoch to resume from (0 to train from scratch)
outf = 'checkpoints/binary_faceforensicspp'  # Folder to output model checkpoints
disable_random = False  # Disable randomness for routing matrix
dropout = 0.05  # Dropout percentage
manualSeed = None  # Manual seed

# Print configuration for verification
print(f"Dataset: {dataset}")
print(f"Train Set: {train_set}")
print(f"Validation Set: {val_set}")
print(f"Workers: {workers}")
print(f"Batch Size: {batchSize}")
print(f"Image Size: {imageSize}")
print(f"Epochs: {niter}")
print(f"Learning Rate: {lr}")
print(f"Beta1: {beta1}")
print(f"GPU ID: {gpu_id}")
print(f"Resume Epoch: {resume}")
print(f"Output Folder: {outf}")
print(f"Disable Random: {disable_random}")
print(f"Dropout: {dropout}")
print(f"Manual Seed: {manualSeed}")

# Set random seed
if manualSeed is None:
    manualSeed = random.randint(1, 10000)
print(f"Random Seed: {manualSeed}")
random.seed(manualSeed)
torch.manual_seed(manualSeed)

if gpu_id >= 0:
    torch.cuda.manual_seed_all(manualSeed)
    cudnn.benchmark = True

# Create output directory if it doesn't exist
os.makedirs(outf, exist_ok=True)

# Open the CSV file for logging
if resume > 0:
    text_writer = open(os.path.join(outf, 'train.csv'), 'a')
else:
    text_writer = open(os.path.join(outf, 'train.csv'), 'w')

# Initialize model components
vgg_ext = VggExtractor()
capnet = CapsuleNet(2, gpu_id)
capsule_loss = CapsuleLoss(gpu_id)

optimizer = Adam(capnet.parameters(), lr=lr, betas=(beta1, 0.999))

# Resume training if specified
if resume > 0:
    capnet.load_state_dict(torch.load(os.path.join(outf, f'capsule_{resume}.pt')))
    capnet.train(mode=True)
    optimizer.load_state_dict(torch.load(os.path.join(outf, f'optim_{resume}.pt')))

    if gpu_id >= 0:
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda(gpu_id)

# Move models to GPU if specified
if gpu_id >= 0:
    capnet.cuda(gpu_id)
    vgg_ext.cuda(gpu_id)
    capsule_loss.cuda(gpu_id)

# Define image transformations
transform_fwd = transforms.Compose([
    transforms.Resize((imageSize, imageSize)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# Load datasets
dataset_train = dset.ImageFolder(root=os.path.join(dataset, train_set), transform=transform_fwd)
assert dataset_train, "Train dataset not found!"
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batchSize, shuffle=True, num_workers=workers)

dataset_val = dset.ImageFolder(root=os.path.join(dataset, val_set), transform=transform_fwd)
assert dataset_val, "Validation dataset not found!"
dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=batchSize, shuffle=False, num_workers=workers)

# Training loop
for epoch in range(resume + 1, niter + 1):
    count = 0
    loss_train = 0
    loss_test = 0

    tol_label = np.array([], dtype=np.float64)
    tol_pred = np.array([], dtype=np.float64)

    for img_data, labels_data in tqdm(dataloader_train):
        labels_data[labels_data > 1] = 1
        img_label = labels_data.numpy().astype(np.float64)
        optimizer.zero_grad()

        if gpu_id >= 0:
            img_data = img_data.cuda(gpu_id)
            labels_data = labels_data.cuda(gpu_id)

        input_v = Variable(img_data)
        x = vgg_ext(input_v)
        classes, class_ = capnet(x, random=not disable_random, dropout=dropout)

        loss_dis = capsule_loss(classes, Variable(labels_data, requires_grad=False))
        loss_dis_data = loss_dis.item()

        loss_dis.backward()
        optimizer.step()

        output_dis = class_.data.cpu().numpy()
        output_pred = np.zeros((output_dis.shape[0]), dtype=np.float64)

        for i in range(output_dis.shape[0]):
            if output_dis[i, 1] >= output_dis[i, 0]:
                output_pred[i] = 1.0
            else:
                output_pred[i] = 0.0

        tol_label = np.concatenate((tol_label, img_label))
        tol_pred = np.concatenate((tol_pred, output_pred))

        loss_train += loss_dis_data
        count += 1

    acc_train = metrics.accuracy_score(tol_label, tol_pred)
    loss_train /= count

    # Save model checkpoint
    torch.save(capnet.state_dict(), os.path.join(outf, f'capsule_{epoch}.pt'))
    torch.save(optimizer.state_dict(), os.path.join(outf, f'optim_{epoch}.pt'))

    # Validation
    capnet.eval()

    tol_label = np.array([], dtype=np.float64)
    tol_pred = np.array([], dtype=np.float64)

    count = 0

    for img_data, labels_data in dataloader_val:
        labels_data[labels_data > 1] = 1
        img_label = labels_data.numpy().astype(np.float64)

        if gpu_id >= 0:
            img_data = img_data.cuda(gpu_id)
            labels_data = labels_data.cuda(gpu_id)

        input_v = Variable(img_data)
        x = vgg_ext(input_v)
        classes, class_ = capnet(x, random=False)

        loss_dis = capsule_loss(classes, Variable(labels_data, requires_grad=False))
        loss_dis_data = loss_dis.item()
        output_dis = class_.data.cpu().numpy()

        output_pred = np.zeros((output_dis.shape[0]), dtype=np.float64)

        for i in range(output_dis.shape[0]):
            if output_dis[i, 1] >= output_dis[i, 0]:
                output_pred[i] = 1.0
            else:
                output_pred[i] = 0.0

        tol_label = np.concatenate((tol_label, img_label))
        tol_pred = np.concatenate((tol_pred, output_pred))

        loss_test += loss_dis_data
        count += 1

    acc_test = metrics.accuracy_score(tol_label, tol_pred)
    loss_test /= count

    print(f'[Epoch {epoch}] Train loss: {loss_train:.4f}   acc: {acc_train * 100:.2f} | Test loss: {loss_test:.4f}  acc: {acc_test * 100:.2f}')

    text_writer.write(f'{epoch},{loss_train:.4f},{acc_train * 100:.2f},{loss_test:.4f},{acc_test * 100:.2f}\n')
    text_writer.flush()

    capnet.train(mode=True)

text_writer.close()

Dataset: /kaggle/input/splitframes-uadfv/split_data
Train Set: train
Validation Set: val
Workers: 4
Batch Size: 32
Image Size: 300
Epochs: 25
Learning Rate: 0.0005
Beta1: 0.9
GPU ID: 0
Resume Epoch: 0
Output Folder: checkpoints/binary_faceforensicspp
Disable Random: False
Dropout: 0.05
Manual Seed: None
Random Seed: 2971


Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:02<00:00, 210MB/s] 
100%|██████████| 68/68 [00:44<00:00,  1.53it/s]


[Epoch 1] Train loss: 1.8370   acc: 92.18 | Test loss: 1.8403  acc: 98.26


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 2] Train loss: 1.5613   acc: 97.67 | Test loss: 1.5428  acc: 98.70


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 3] Train loss: 1.5206   acc: 98.00 | Test loss: 1.5094  acc: 97.83


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 4] Train loss: 1.4970   acc: 98.09 | Test loss: 1.4711  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 5] Train loss: 1.4777   acc: 98.19 | Test loss: 1.4689  acc: 97.61


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 6] Train loss: 1.4616   acc: 98.60 | Test loss: 1.4709  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 7] Train loss: 1.4533   acc: 98.46 | Test loss: 1.4773  acc: 96.75


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 8] Train loss: 1.4404   acc: 98.42 | Test loss: 1.4227  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 9] Train loss: 1.4193   acc: 98.84 | Test loss: 1.4523  acc: 96.53


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 10] Train loss: 1.4186   acc: 98.65 | Test loss: 1.9595  acc: 85.03


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 11] Train loss: 1.4358   acc: 98.46 | Test loss: 1.4362  acc: 96.75


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 12] Train loss: 1.4073   acc: 98.37 | Test loss: 1.4354  acc: 97.40


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 13] Train loss: 1.3834   acc: 99.12 | Test loss: 1.4075  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 14] Train loss: 1.3821   acc: 98.84 | Test loss: 1.4347  acc: 96.53


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 15] Train loss: 1.3820   acc: 98.98 | Test loss: 1.3862  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 16] Train loss: 1.3662   acc: 99.12 | Test loss: 1.3959  acc: 97.61


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 17] Train loss: 1.3741   acc: 98.98 | Test loss: 1.4205  acc: 97.40


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 18] Train loss: 1.3596   acc: 99.12 | Test loss: 1.3634  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 19] Train loss: 1.3505   acc: 99.12 | Test loss: 1.3643  acc: 98.26


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 20] Train loss: 1.3451   acc: 99.30 | Test loss: 1.4034  acc: 97.18


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 21] Train loss: 1.3514   acc: 99.16 | Test loss: 1.3611  acc: 98.70


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 22] Train loss: 1.3422   acc: 99.30 | Test loss: 1.3593  acc: 98.70


100%|██████████| 68/68 [00:28<00:00,  2.39it/s]


[Epoch 23] Train loss: 1.3543   acc: 99.16 | Test loss: 1.3831  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 24] Train loss: 1.3423   acc: 99.21 | Test loss: 1.3610  acc: 98.48


100%|██████████| 68/68 [00:28<00:00,  2.40it/s]


[Epoch 25] Train loss: 1.3381   acc: 99.26 | Test loss: 1.3932  acc: 97.61


In [3]:
import os
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from torch.autograd import Variable
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
from tqdm import tqdm
from sklearn import metrics
from scipy.optimize import brentq
from scipy.interpolate import interp1d
from sklearn.metrics import roc_curve


# Directly set the parameters (replace with your specific values)
dataset = '/kaggle/input/splitframes-uadfv/split_data/'  # Path to dataset
test_set = 'test'  # Test set folder
workers = 0  # Number of data loading workers
batchSize = 32  # Batch size
imageSize = 300  # Image height and width
gpu_id = 0  # GPU ID
outf = '/kaggle/working/checkpoints/binary_faceforensicspp'  # Folder for model checkpoints
random = False  # Randomness for routing matrix
checkpoint_id = 21  # Checkpoint ID

# Print the options
print(f"Dataset: {dataset}")
print(f"Test set: {test_set}")
print(f"Batch size: {batchSize}")
print(f"Image size: {imageSize}")
print(f"GPU ID: {gpu_id}")
print(f"Output folder: {outf}")
print(f"Randomness enabled: {random}")
print(f"Checkpoint ID: {checkpoint_id}")

# Define the transform for images
transform_fwd = transforms.Compose([
    transforms.Resize((imageSize, imageSize)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# Load the test dataset
dataset_test = dset.ImageFolder(root=os.path.join(dataset, test_set), transform=transform_fwd)
assert dataset_test
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=batchSize, shuffle=False, num_workers=workers)

# Load model components
vgg_ext = VggExtractor()
capnet = CapsuleNet(2, gpu_id)

# Load the checkpoint
capnet.load_state_dict(torch.load(os.path.join(outf, f'capsule_{checkpoint_id}.pt')))
capnet.eval()

# Move models to GPU if available
if gpu_id >= 0:
    vgg_ext.cuda(gpu_id)
    capnet.cuda(gpu_id)

# Initialize arrays for storing results
tol_label = np.array([], dtype=float)
tol_pred = np.array([], dtype=float)
tol_pred_prob = np.array([], dtype=float)

count = 0
loss_test = 0

# Open the text file to write results
with open(os.path.join(outf, 'test.txt'), 'w') as text_writer:
    
    for img_data, labels_data in tqdm(dataloader_test):
        labels_data[labels_data > 1] = 1
        img_label = labels_data.numpy().astype(float)

        if gpu_id >= 0:
            img_data = img_data.cuda(gpu_id)
            labels_data = labels_data.cuda(gpu_id)

        input_v = Variable(img_data)

        x = vgg_ext(input_v)
        classes, class_ = capnet(x, random=random)

        output_dis = class_.data.cpu()
        output_pred = np.zeros((output_dis.shape[0]), dtype=float)

        for i in range(output_dis.shape[0]):
            if output_dis[i, 1] >= output_dis[i, 0]:
                output_pred[i] = 1.0
            else:
                output_pred[i] = 0.0

        tol_label = np.concatenate((tol_label, img_label))
        tol_pred = np.concatenate((tol_pred, output_pred))
        
        pred_prob = torch.softmax(output_dis, dim=1)
        tol_pred_prob = np.concatenate((tol_pred_prob, pred_prob[:, 1].data.numpy()))

        count += 1

    # Calculate accuracy and EER
    acc_test = metrics.accuracy_score(tol_label, tol_pred)
    loss_test /= count

    fpr, tpr, thresholds = roc_curve(tol_label, tol_pred_prob, pos_label=1)
    eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)

    # Write results to text file
    print(f'[Epoch {checkpoint_id}] Test acc: {acc_test*100:.2f}   EER: {eer*100:.2f}')
    text_writer.write(f'{checkpoint_id},{acc_test*100:.2f},{eer*100:.2f}\n')

    # Flush and close the file
    text_writer.flush()

Dataset: /kaggle/input/splitframes-uadfv/split_data/
Test set: test
Batch size: 32
Image size: 300
GPU ID: 0
Output folder: /kaggle/working/checkpoints/binary_faceforensicspp
Randomness enabled: False
Checkpoint ID: 21


100%|██████████| 15/15 [00:06<00:00,  2.17it/s]

[Epoch 21] Test acc: 98.92   EER: 1.29



