#Import Section

In [None]:
!pip install transformers datasets

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader,Dataset, Subset
from torchvision import models, transforms, datasets
from torchvision.datasets import ImageFolder
import torch.optim as optim

from tqdm import tqdm

from torch.nn import functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

from transformers import TrainingArguments, Trainer, AutoImageProcessor, BeitModel

import random

from google.colab import drive

import zipfile
import os
import shutil
from PIL import Image

drive.mount('/content/drive')

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

Mounted at /content/drive


#Model Section

Define the model architecture

In [None]:
class VGG16_Beit(nn.Module):

  def __init__(self, vgg16, beit):
      super(VGG16_Beit, self).__init__()

      #features
      self.vgg16 = vgg16
      self.beit = beit

      #classifier
      input_size = beit.config.hidden_size + vgg16.classifier[-1].out_features
      self.fc1 = nn.Linear(input_size, 256)
      self.fc2 = nn.Linear(256, 2)

  def forward(self, vgg16_images, beit_images):

    #features combination
    vgg16_features = self.vgg16(vgg16_images)
    beit_features = self.beit(beit_images)

    combined_features = torch.cat([vgg16_features, beit_features.pooler_output], dim=1)

    r1 = F.relu(self.fc1(combined_features))

    #classify then
    return self.fc2(r1)

Load from google drive the pretrained version

In [None]:
#Load the entire model from the .pt file
model = torch.load('drive/MyDrive/classifier0609242212.pt')

#Send the model to GPU if available
model = model.to(device)

  model = torch.load('drive/MyDrive/classifier0609242212.pt')


#Data Section

##Transform and Collate

Define two transformations: one for VGG16 and one for BEiT model

In [None]:
#VGG16 transform
transform_vgg = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

#BEiT transform
beit_image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")

transform_beit = lambda image: torch.squeeze(beit_image_processor(image, return_tensors="pt")['pixel_values'])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/276 [00:00<?, ?B/s]

Transform function wrapper; it is needed because we want both the image and the label as params

In [None]:
def dataset_transform(image, label):
  return {
      'vgg16_image': transform_vgg(image),
      'beit_image': transform_beit(image),
      'label': label
  }


Collate

In [None]:
def collate_fn(batch):
  vgg16_images = torch.stack([item['vgg16_image'] for item in batch])
  beit_images = torch.stack([item['beit_image'] for item in batch])
  labels = torch.tensor([item['label'] for item in batch])
  return {'vgg16_images': vgg16_images, 'beit_images': beit_images, 'labels': labels}

##FFHQ Dataset

Wrapper class for FFHQ dataset, ref to this [link](https://github.com/NVlabs/ffhq-dataset) for further details about this dataset

In [None]:
class FFHQ_Dataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.image_files = self._get_image_files(img_dir)
        self.transform = transform

        print(f"Found {len(self.image_files)} images in directory: {img_dir}")

    def _get_image_files(self, dir_path):
          image_files = []
          for root, _, files in os.walk(dir_path):
              for file in files:
                  if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                      image_files.append(os.path.join(root, file))
          return image_files

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image = Image.open(self.image_files[idx]).convert("RGB")
        return self.transform(image, 0)  # Returning 0 as a dummy label since all images belong to the same class


FFHQ dataset initialization

In [None]:
ffhq_dataset = FFHQ_Dataset(img_dir='drive/MyDrive/ComputerVision/datasets/ffhq-dataset/images1024x1024', transform=dataset_transform)

Found 70000 images in directory: drive/MyDrive/ComputerVision/datasets/ffhq-dataset/images1024x1024


Print a sample

In [None]:
ffhq_dataset[0]

{'vgg16_image': tensor([[[-2.1179, -2.1179, -2.1008,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-1.7412, -1.7240, -1.7412,  ...,  0.1426,  0.1426,  0.1426],
          [-1.7925, -1.7754, -1.7754,  ...,  0.1768,  0.1768,  0.1426],
          [-1.8439, -1.8439, -1.7925,  ...,  0.1254,  0.1939,  0.1597]],
 
         [[ 0.2402,  0.2227,  0.2402,  ...,  0.0301,  0.0126, -0.0224],
          [ 0.2227,  0.2227,  0.2227,  ..., -0.0224, -0.0399, -0.0399],
          [ 0.2227,  0.2052,  0.1877,  ..., -0.0749, -0.0574, -0.0574],
          ...,
          [ 0.6429,  0.6604,  0.6779,  ...,  0.7129,  0.6954,  0.6604],
          [ 0.5728,  0.6078,  0.6254,  ...,  0.7129,  0.6954,  0.6954],
          [ 0.5553,  0.5553,  0.5553,  ...,  0.6779,  0.7304,  0.7129]],
 
         [[ 0.7402,  0.7228,  0.7402,  ...,  0.8797,  0.8622,  0.8099],
          [ 0

Split this dataset to the half of its length because it has got so much images to be computed with the free plan of colab

In [None]:
half_size = len(ffhq_dataset) // 2
ffhq_subset = Subset(ffhq_dataset, torch.randperm(len(ffhq_dataset))[:half_size])

# Create DataLoader
ffhq_dataloader = DataLoader(ffhq_subset, batch_size=64, shuffle=False, num_workers=8, collate_fn=collate_fn)



Print a sample of subsampled FFHQ

In [None]:
ffhq_subset[1]

{'vgg16_image': tensor([[[-0.6623, -0.6623, -0.6794,  ...,  2.2489,  2.2489,  2.2318],
          [-0.6623, -0.6452, -0.6794,  ...,  2.2318,  2.2318,  2.2318],
          [-0.5938, -0.5938, -0.6281,  ...,  2.2489,  2.2489,  2.2318],
          ...,
          [ 1.4269,  1.4612,  1.4098,  ..., -1.3987, -1.4329, -1.4672],
          [ 1.4440,  1.4098,  1.4098,  ..., -1.4500, -1.4158, -1.3987],
          [ 1.4612,  1.3755,  1.3927,  ..., -1.4329, -1.4843, -1.3644]],
 
         [[-0.8452, -0.8452, -0.8452,  ...,  1.9384,  1.8508,  1.8683],
          [-0.8102, -0.7752, -0.8102,  ...,  1.9034,  1.9034,  1.9909],
          [-0.7752, -0.7752, -0.7927,  ...,  2.0434,  2.0259,  2.0259],
          ...,
          [ 1.3256,  1.3606,  1.2906,  ..., -1.5105, -1.4930, -1.5105],
          [ 1.3256,  1.3081,  1.2906,  ..., -1.5630, -1.4580, -1.4755],
          [ 1.3606,  1.2731,  1.2906,  ..., -1.5455, -1.5805, -1.4755]],
 
         [[-0.8633, -0.8458, -0.9156,  ...,  1.6465,  1.5594,  1.6465],
          [-0

##Deepfake Challenge Dataset

Extract dataset zip file from google drive

In [None]:
zip_path = 'drive/MyDrive/ComputerVision/datasets/deepfakechallenge/test-task1.zip'
extract_path = 'deepfakechallenge/'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

Organize dataset according with label file

In [None]:
# Paths
annotations_file = 'drive/MyDrive/ComputerVision/datasets/deepfakechallenge/label-task1.txt'
img_dir = extract_path + 'test-task1'
dataset_dir = 'deepfakechallenge/test-task1-organized'

# Read labels and organize files
with open(annotations_file, 'r') as f:
    for line in f:
        image_name, label = line.strip().split()
        label_dir = os.path.join(dataset_dir, f'class_{label}')
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)
        source_path = os.path.join(img_dir, image_name)
        dest_path = os.path.join(label_dir, image_name)
        shutil.move(source_path, dest_path)

print(f"Images organized into {dataset_dir}")

Images organized into deepfakechallenge/test-task1-organized


Define a wrapper class for this dataset because we have a custom transform that needs also the label value of the sample

In [None]:
class ChallengeDataset(ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root)
        self.map = transform

    def __getitem__(self, index):
        image, label = super().__getitem__(index)
        if self.map is not None:
            sample = self.map(image, label)
        return sample

Loading dataset in Pytorch

In [None]:
deepfakechallenge_dataset = ChallengeDataset(root='deepfakechallenge/test-task1-organized', transform=dataset_transform)

Print a sample of DeepFakeChallenge dataset

In [None]:
deepfakechallenge_dataset[0]

{'vgg16_image': tensor([[[ 2.2489,  2.2489,  2.2318,  ...,  2.2489,  2.2489,  2.2489],
          [ 2.2489,  2.2489,  2.2318,  ...,  2.2489,  2.2489,  2.2489],
          [ 2.2489,  2.2489,  2.2318,  ...,  2.2489,  2.2489,  2.2489],
          ...,
          [ 2.0263,  2.0263,  2.0263,  ...,  1.6324,  1.6324,  1.6495],
          [ 1.9920,  1.9920,  1.9920,  ...,  1.6495,  1.6495,  1.6667],
          [ 1.9749,  1.9749,  1.9749,  ...,  1.6667,  1.6667,  1.6838]],
 
         [[ 2.0609,  2.0609,  2.0609,  ...,  2.3060,  2.3060,  2.3060],
          [ 2.0609,  2.0609,  2.0609,  ...,  2.3060,  2.3060,  2.3060],
          [ 2.0609,  2.0609,  2.0609,  ...,  2.3060,  2.3060,  2.3060],
          ...,
          [ 1.8158,  1.8158,  1.8158,  ...,  1.0280,  0.9930,  0.9755],
          [ 1.7633,  1.7633,  1.7633,  ...,  1.0630,  1.0105,  0.9930],
          [ 1.7458,  1.7458,  1.7458,  ...,  1.0630,  1.0280,  1.0105]],
 
         [[-0.6715, -0.6715, -0.6715,  ..., -0.6018, -0.6018, -0.6018],
          [-0

Create Dataloader

In [None]:
challenge_dataloader = DataLoader(deepfakechallenge_dataset, batch_size=64, shuffle=False, num_workers=8, collate_fn=collate_fn)

#Evaluation Routine

In [None]:
def evaluate_model(model, dataloader):

  # Set the model to evaluation mode
  model.eval()

  # Initialize lists to hold true and predicted labels
  all_labels = []
  all_preds = []

  # Disable gradient computation for evaluation
  with torch.no_grad():
    for step, batch in enumerate(tqdm(dataloader)):

        vgg16_images = batch['vgg16_images']
        beit_images = batch['beit_images']
        labels = batch['labels']

        #data to device
        vgg16_images = vgg16_images.to(device)
        beit_images = beit_images.to(device)
        labels = labels.to(device)

        # Get the model's predictions
        outputs = model(vgg16_images, beit_images)
        _, preds = torch.max(outputs, 1)

        # Store the true labels and predicted labels
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

  # Calculate accuracy
  accuracy = accuracy_score(all_labels, all_preds)
  print(f'Accuracy: {accuracy * 100:.2f}%')

  # Calculate precision, recall, and F1-score
  precision = precision_score(all_labels, all_preds, average='weighted')
  recall = recall_score(all_labels, all_preds, average='weighted')
  f1 = f1_score(all_labels, all_preds, average='weighted')

  print(f'Precision: {precision * 100:.2f}%')
  print(f'Recall: {recall * 100:.2f}%')
  print(f'F1-Score: {f1 * 100:.2f}%')

  # Print the classification report
  print('\nClassification Report:')
  print(classification_report(all_labels, all_preds))

  # Compute and print the confusion matrix
  conf_matrix = confusion_matrix(all_labels, all_preds)
  print('\nConfusion Matrix:')
  print(conf_matrix)

##Evaluating Model with FFHQ

In [None]:
evaluate_model(model, ffhq_dataloader)

  self.pid = os.fork()
  self.pid = os.fork()
100%|██████████| 547/547 [56:27<00:00,  6.19s/it]
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 90.50%
Precision: 100.00%
Recall: 90.50%
F1-Score: 95.01%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.91      0.95     35000
           1       0.00      0.00      0.00         0

    accuracy                           0.91     35000
   macro avg       0.50      0.45      0.48     35000
weighted avg       1.00      0.91      0.95     35000


Confusion Matrix:
[[31675  3325]
 [    0     0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##Evaluating Model with DeepfakeChallenge

In [None]:
evaluate_model(model, challenge_dataloader)

  self.pid = os.fork()
100%|██████████| 110/110 [02:18<00:00,  1.26s/it]

Accuracy: 64.79%
Precision: 77.59%
Recall: 64.79%
F1-Score: 66.32%

Classification Report:
              precision    recall  f1-score   support

           0       0.44      0.86      0.58      2000
           1       0.91      0.56      0.70      5000

    accuracy                           0.65      7000
   macro avg       0.68      0.71      0.64      7000
weighted avg       0.78      0.65      0.66      7000


Confusion Matrix:
[[1722  278]
 [2187 2813]]



