This notebook is generates predictions from two of the top Deepfake detection models on Huggingface.  


# Setup

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Common imports
import numpy as np
import pandas as pd
import os

In [None]:
# Config
DATASET_FOLDER_PATH = "/content/drive/Shareddrives/CS152 Project/dataset/"
DATASET_TREE_PATH = "/content/drive/Shareddrives/CS152 Project/dataset/all/"

In [None]:
train_df = pd.read_csv(f"{DATASET_FOLDER_PATH}train.csv")
test_df = pd.read_csv(f"{DATASET_FOLDER_PATH}test.csv")

In [None]:
def add_model_predictions(df, predict_function):

  # Dataset includes leading slash, hence the [1:]
  path_list = [os.path.join(DATASET_TREE_PATH, path[1:]) for path in df["Image Path"]]

  new_columns = predict_function(path_list)

  for col_name, col_data in new_columns.items():
    df[col_name] = col_data

# Huggingface Models

In [None]:
# Dependencies
!pip install -q transformers
from transformers import pipeline

In [None]:
# dima806 model

dima806_pipeline = pipeline("image-classification", model="dima806/deepfake_vs_real_image_detection")

def predict_dima806(path_list):

  all_predictions = dima806_pipeline(path_list)

  predictions = []
  for prediction_dicts in all_predictions:
    for prediction_dict in prediction_dicts:
      if prediction_dict["label"] == "Fake":
        predictions.append(prediction_dict["score"])
        break

  assert len(predictions) == len(path_list)

  # 1 is fake, 0 is real
  return {"dima806_score": predictions}

In [None]:
# wvolf model

wvolf_pipeline = pipeline("image-classification", model="Wvolf/ViT_Deepfake_Detection")

def predict_wvolf(path_list):

  all_predictions = wvolf_pipeline(path_list)

  predictions = []
  for prediction_dicts in all_predictions:
    for prediction_dict in prediction_dicts:
      if prediction_dict["label"] == "Fake":
        predictions.append(prediction_dict["score"])
        break

  assert len(predictions) == len(path_list)

  # 1 is fake, 0 is real
  return {"wvolf_score": predictions}

# Run models

In [None]:
# df = train_df.copy()
df = test_df.copy()

# Use only a couple samples when testing
# train_df = train_df.sample(10)

In [None]:
add_model_predictions(df, predict_dima806)

In [None]:
# Save new dataframe
df.to_csv(os.path.join(DATASET_FOLDER_PATH, "test_with_hugginface.csv"), index=False)

In [None]:
add_model_predictions(df, predict_wvolf)

In [None]:
# Save new dataframe
df.to_csv(os.path.join(DATASET_FOLDER_PATH, "test_with_huggingface.csv"), index=False)

# Evaluation

In [None]:
def eval_model(df, column_name):
  # Calculate eval metrics
  true_positives = 0
  false_positives = 0
  true_negatives = 0
  false_negatives = 0
  for index, row in df.iterrows():
    if row["is_ai"] == 1 and row[column_name] > 0.5:
      true_positives += 1
    elif row["is_ai"] == 1 and row[column_name] <= 0.5:
      false_negatives += 1
    elif row["is_ai"] == 0 and row[column_name] > 0.5:
      false_positives += 1
    elif row["is_ai"] == 0 and row[column_name] <= 0.5:
      true_negatives += 1

  assert true_positives + false_positives + true_negatives + false_negatives == len(df)

  print(f"Model: {column_name}")
  print(f"Dataset size {len(df)}")
  print()
  print(f"True Positives: {true_positives}")
  print(f"False Positives: {false_positives}")
  print(f"True Negatives: {true_negatives}")
  print(f"False Negatives: {false_negatives}")
  print()

  accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)

  if true_positives + false_positives == 0:
    precision = 0
  else:

    precision = true_positives / (true_positives + false_positives)
  if true_positives + false_negatives == 0:
    recall = 0
  else:
    recall = true_positives / (true_positives + false_negatives)

  if precision + recall == 0:
    f1_score = 0
  else:
    f1_score = 2 * (precision * recall) / (precision + recall)

  print(f"Accuracy: {accuracy}")
  print(f"Precision: {precision}")
  print(f"Recall: {recall}")
  print(f"F1 Score: {f1_score}")
  print()
  print()


In [None]:
df.head()

In [None]:
# Load dataframe
df = pd.read_csv(f"{DATASET_FOLDER_PATH}/train_with_huggingface.csv")

# Filtering for looking at performance on subsets
# filtered_rows = [index_row[1] for index_row in df.iterrows() if "GAN" in index_row[1]["Image Path"]]
# filtered_rows = [index_row[1] for index_row in df.iterrows() if index_row[1]["Skin Tone"] > 8]
# df = pd.DataFrame(filtered_rows)

eval_model(df, "dima806_score")

eval_model(df, "wvolf_score")