In [1]:
# 序列数据、标签的加载
import os
from PIL import Image
import torch
from torchvision import transforms

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

torch.manual_seed(42)

# 若需更换face和eye，需改图片size
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

data_dir = './dataset/UADFV/face'

sequences = []
labels = []

for label, class_name in enumerate(['real', 'fake']):
    label_dir = os.path.join(data_dir, class_name)

    for sequence_idx in range(49):
        if sequence_idx == 5 or sequence_idx == 33 or sequence_idx == 43:
            continue

        sequence = []
        for image_idx in range(20):
            image_path = os.path.join(label_dir, f'{sequence_idx}_{image_idx}.png')
            image = Image.open(image_path).convert("RGB")
            if transform:
                image = transform(image)
            sequence.append(image)

        sequences.append(torch.stack(sequence))
        labels.append(label)

sequences = torch.stack(sequences)
labels = torch.tensor(labels)
print(sequences.shape)
print(labels.shape)

torch.Size([92, 20, 3, 224, 224])
torch.Size([92])


In [2]:
# image
import numpy as np
from torch.utils.data import DataLoader, SubsetRandomSampler, TensorDataset

extend_labels = []
images = []

for i in range(sequences.size(0)):
    for j in range(20):
        images.append(sequences[i][j])
        extend_labels.append(labels[i])

images = torch.stack(images)
img_labels = torch.tensor(extend_labels)

img_dataset_size = images.size(0)
img_indices = list(range(img_dataset_size))

# img_split1 = 10 * split1
# img_split2 = 10 * split2
# img_train_indices, img_val_indices, img_test_indices = img_indices[:img_split1], img_indices[img_split1:img_split2], img_indices[img_split2:]

img_split = int(np.floor(0.8 * img_dataset_size))
img_test_size = img_dataset_size - img_split

# indices_left, img_test_indices, indices_right = img_indices[:(img_test_size * m)], img_indices[(img_test_size * m):(img_test_size * n)], img_indices[(img_test_size * n):]
# indices_left.extend(indices_right)
# img_train_indices = indices_left

# img_train_indices, img_test_indices = img_indices[:img_split], img_indices[img_split:]
img_test_indices, img_train_indices = img_indices[:img_test_size], img_indices[img_test_size:]

img_dataset = TensorDataset(images, img_labels)

img_train_sampler = SubsetRandomSampler(img_train_indices)
img_test_sampler = SubsetRandomSampler(img_test_indices)

img_train_loader = DataLoader(img_dataset, batch_size=32, sampler=img_train_sampler)
img_test_loader = DataLoader(img_dataset, batch_size=32, sampler=img_test_sampler)

print("Training set length:", len(img_train_indices))
print("Test set length:", len(img_test_indices))

Training set length: 1472
Test set length: 368


In [3]:
from models import SwinModel

img_model = SwinModel().to(device)
print(img_model)

SwinModel(
  (swin): SwinTransformer(
    (features): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): Permute()
        (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      )
      (1): Sequential(
        (0): SwinTransformerBlock(
          (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (attn): ShiftedWindowAttention(
            (qkv): Linear(in_features=96, out_features=288, bias=True)
            (proj): Linear(in_features=96, out_features=96, bias=True)
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (mlp): MLP(
            (0): Linear(in_features=96, out_features=384, bias=True)
            (1): GELU(approximate='none')
            (2): Dropout(p=0.0, inplace=False)
            (3): Linear(in_features=384, out_features=96, bias=True)
            (4): Dropout(p=0.0, inplace=Fa

In [4]:
import torch.nn as nn
import torch.optim as optim


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(img_model.parameters(), lr=0.001)
# optimizer = optim.SGD(img_model.parameters(), lr=0.01, momentum=0.9)
# torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # 设置 clipnorm
# torch.nn.utils.clip_grad_value_(model.parameters(), 0.5)  # 设置 clipvalue

# best_val_loss = float('inf')
# patience = 10
# counter = 0

# 训练
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in img_train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        img_model.train()

        optimizer.zero_grad()

        # 前向传播
        outputs = img_model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    loss = running_loss / len(img_train_loader)
    accuracy = 100 * correct / total
    print('Epoch %d, loss: %.3f, accuracy: %.2f %%' % (epoch + 1, loss, accuracy))

Epoch 1, loss: 0.693, accuracy: 58.83 %
Epoch 2, loss: 0.664, accuracy: 62.43 %
Epoch 3, loss: 0.655, accuracy: 62.84 %
Epoch 4, loss: 0.662, accuracy: 60.05 %
Epoch 5, loss: 0.655, accuracy: 63.25 %
Epoch 6, loss: 0.653, accuracy: 63.25 %
Epoch 7, loss: 0.656, accuracy: 62.64 %
Epoch 8, loss: 0.658, accuracy: 62.50 %
Epoch 9, loss: 0.654, accuracy: 61.55 %
Epoch 10, loss: 0.646, accuracy: 62.09 %


In [5]:
torch.save(img_model, './model/UADFV/transformer_face_model.pth')

In [6]:
# img_model = torch.load('./model/UADFV/transformer_face_model.pth')
correct = 0
total = 0
with torch.no_grad():
    img_model.eval()
    for inputs, label in img_test_loader:
        inputs, label = inputs.to(device), label.to(device)
        outputs = img_model(inputs)
        
        _, predicted = torch.max(outputs, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

test_accuracy = 100 * correct / total
print("Test Accuracy: %.2f %%" % test_accuracy)

Test Accuracy: 58.97 %


In [7]:
# 序列对图片的提升（序列整体平均）
correct = 0

with torch.no_grad():
    img_model.eval()
    for data in seq_test_loader:
        input, label = data
        input, label = input.to(device), label.to(device)
        img_output = []
        output = []

        for t in range(20):
            img_output.append(img_model(input[:, t, :, :, :]))
        img_output = torch.stack(img_output)
        img_output = torch.squeeze(img_output, dim=1)
        mean_img_output = torch.mean(img_output, dim=0, keepdim=True)

        for t in range(20):
            output = img_model(input[:, t, :, :, :])
            output += mean_img_output

            _, predicted = torch.max(output, 1)
            correct += (predicted == label).sum().item()

total = 20 * len(seq_test_loader)
print(total)
test_accuracy = 100 * correct / total
print("Test Accuracy: %.2f %%" % test_accuracy)

380
Test Accuracy: 100.00 %


In [9]:
# SVM：序列对图片的提升（伪）
# img_model = torch.load('./model/DeepfakeTIMIT/HQ/cnn_eye_model_5.pth')
img_output = []
img_label = []

with torch.no_grad():
    img_model.eval()
    for data in seq_train_loader:
        input, label = data
        input, label = input.to(device), label.to(device)
        single_output = []
        output = []

        for t in range(20):
            single_output.append(img_model(input[:, t, :, :, :]))
        single_output = torch.stack(single_output)
        single_output = torch.squeeze(single_output, dim=1)
        mean_img_output = torch.mean(single_output, dim=0, keepdim=True)

        for t in range(20):
            output = img_model(input[:, t, :, :, :])
            output += mean_img_output
            
            # output = torch.cat((output, mean_img_output), dim=1)
            # output = torch.sigmoid(output)
            img_output.append(output)
            img_label.append(label)
print(len(img_output))

1460


In [10]:
from sklearn import svm
from sklearn.metrics import accuracy_score

img_output_tensor = torch.stack(img_output)
img_label_tensor = torch.cat(img_label)

img_output_flattened = img_output_tensor.view(img_output_tensor.size(0), -1).cpu().numpy()
img_label_flattened = img_label_tensor.cpu().numpy()

svm_classifier = svm.SVC(kernel='linear')

# # 使用径向基函数核（RBF kernel）并设置惩罚参数C和gamma
# svm_classifier = svm.SVC(kernel='rbf', C=0.9, gamma='scale')

# 使用多项式核函数并设置阶数和惩罚参数C
# svm_classifier = svm.SVC(kernel='poly', degree=3, C=0.1)

# 使用Sigmoid核函数并设置coef0和惩罚参数C
# svm_classifier = svm.SVC(kernel='sigmoid', coef0=0.0, C=0.9)

svm_classifier.fit(img_output_flattened, img_label_flattened)

y_pred = svm_classifier.predict(img_output_flattened)

accuracy = accuracy_score(img_label_flattened, y_pred)
print(f"Accuracy of SVM classifier on train dataset: {accuracy:.4f}")

Accuracy of SVM classifier on train dataset: 1.0000


In [11]:
import joblib
# 保存训练好的 SVM 模型到文件
model_filename = './model/UADFV/svm/cnn_eye_1_l.pkl'
joblib.dump(svm_classifier, model_filename)

['./model/UADFV/svm/cnn_eye_1_l.pkl']

In [12]:
# SVM：序列对图片的提升（伪）
# img_model = torch.load('./model/DeepfakeTIMIT/HQ/cnn_eye_model_5.pth')
img_output = []
img_label = []

with torch.no_grad():
    img_model.eval()
    for data in seq_test_loader:
        input, label = data
        input, label = input.to(device), label.to(device)
        single_output = []
        output = []

        for t in range(20):
            single_output.append(img_model(input[:, t, :, :, :]))
        single_output = torch.stack(single_output)
        single_output = torch.squeeze(single_output, dim=1)
        mean_img_output = torch.mean(single_output, dim=0, keepdim=True)

        for t in range(20):
            output = img_model(input[:, t, :, :, :])
            output += mean_img_output
            
            # output = torch.cat((output, mean_img_output), dim=1)
            # output = torch.sigmoid(output)
            img_output.append(output)
            img_label.append(label)
print(len(img_output))

380


In [13]:
# 加载保存的 SVM 模型
# model_filename = './model/UADFV/svm/cnn_eye_2_l.pkl'
loaded_model = joblib.load(model_filename)

img_output_tensor = torch.stack(img_output)
img_label_tensor = torch.cat(img_label)
print(len(img_label_tensor))

img_output_flattened = img_output_tensor.view(img_output_tensor.size(0), -1).cpu().numpy()
img_label_flattened = img_label_tensor.cpu().numpy()

y_pred = loaded_model.predict(img_output_flattened)

accuracy = accuracy_score(img_label_flattened, y_pred)
print(f"Accuracy of SVM classifier on test dataset: {accuracy:.4f}")

380
Accuracy of SVM classifier on test dataset: 1.0000
