In [None]:
TRAIN_DIR = 'train_sample_videos/'

BATCH_SIZE = 1
SCALE = 0.25
N_FRAMES = None

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions download -c deepfake-detection-challenge

Downloading deepfake-detection-challenge.zip to /content
100% 4.13G/4.13G [03:16<00:00, 21.9MB/s]
100% 4.13G/4.13G [03:16<00:00, 22.6MB/s]


In [None]:
!unzip deepfake-detection-challenge.zip

Archive:  deepfake-detection-challenge.zip
  inflating: sample_submission.csv   
  inflating: test_videos/aassnaulhq.mp4  
  inflating: test_videos/aayfryxljh.mp4  
  inflating: test_videos/acazlolrpz.mp4  
  inflating: test_videos/adohdulfwb.mp4  
  inflating: test_videos/ahjnxtiamx.mp4  
  inflating: test_videos/ajiyrjfyzp.mp4  
  inflating: test_videos/aktnlyqpah.mp4  
  inflating: test_videos/alrtntfxtd.mp4  
  inflating: test_videos/aomqqjipcp.mp4  
  inflating: test_videos/apedduehoy.mp4  
  inflating: test_videos/apvzjkvnwn.mp4  
  inflating: test_videos/aqrsylrzgi.mp4  
  inflating: test_videos/axfhbpkdlc.mp4  
  inflating: test_videos/ayipraspbn.mp4  
  inflating: test_videos/bcbqxhziqz.mp4  
  inflating: test_videos/bcvheslzrq.mp4  
  inflating: test_videos/bdshuoldwx.mp4  
  inflating: test_videos/bfdopzvxbi.mp4  
  inflating: test_videos/bfjsthfhbd.mp4  
  inflating: test_videos/bjyaxvggle.mp4  
  inflating: test_videos/bkcyglmfci.mp4  
  inflating: test_videos/bktkwbcawi.m

In [None]:
!pip install facenet_pytorch

from facenet_pytorch.models.inception_resnet_v1 import get_torch_home
torch_home = get_torch_home()

!mkdir -p $torch_home/checkpoints/

Collecting facenet_pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet_pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m57.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet_pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m64.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.1->torchvision->facenet_pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl 

In [None]:
import os
import glob
import json
import torch
import cv2
from PIL import Image
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from facenet_pytorch import MTCNN, InceptionResnetV1

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f'Running on device: {device}')

Running on device: cuda:0


<a id="define_useful_classes"></a>
# Define useful classes
[Back to Table of Contents](#toc)

In [None]:
class DetectionPipeline:


    def __init__(self, detector, n_frames=None, batch_size=60, resize=None):

        self.detector = detector
        self.n_frames = n_frames
        self.batch_size = batch_size
        self.resize = resize

    def __call__(self, filename):


        v_cap = cv2.VideoCapture(filename)
        v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))


        if self.n_frames is None:
            sample = np.arange(0, v_len)
        else:
            sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)


        faces = []
        frames = []
        for j in range(v_len):
            success = v_cap.grab()
            if j in sample:
                # Load frame
                success, frame = v_cap.retrieve()
                if not success:
                    continue
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = Image.fromarray(frame)


                if self.resize is not None:
                    frame = frame.resize([int(d * self.resize) for d in frame.size])
                frames.append(frame)


                if len(frames) % self.batch_size == 0 or j == sample[-1]:
                    faces.extend(self.detector(frames))
                    frames = []

        v_cap.release()

        return faces

<a id="define_helper_functions"></a>
# Define helper-functions
[Back to Table of Contents](#toc)

In [None]:
def process_faces(faces, feature_extractor):
    # Filter out frames without faces
    faces = [f for f in faces if f is not None]
    if len(faces) == 0:
        return None
    faces = torch.cat(faces).to(device)

    # Generate facial feature vectors using a pretrained model
    embeddings = feature_extractor(faces)

    # Calculate centroid for video and distance of each face's feature vector from centroid
    centroid = embeddings.mean(dim=0)
    x = (embeddings - centroid).norm(dim=1).cpu().numpy()

    return x

<a id="start_data_preparation_process"></a>
# Start data-preparation process
[Back to Table of Contents](#toc)

In [None]:
face_detector = MTCNN(margin=14, keep_all=True, factor=0.5, device=device).eval()


feature_extractor = InceptionResnetV1(pretrained='vggface2', device=device).eval()


detection_pipeline = DetectionPipeline(detector=face_detector, n_frames=N_FRAMES, batch_size=BATCH_SIZE, resize=SCALE)

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [None]:

all_train_videos = glob.glob(os.path.join(TRAIN_DIR, '*.mp4'))


metadata_path = TRAIN_DIR + 'metadata.json'


with open(metadata_path, 'r') as f:
    metadata = json.load(f)

In [None]:
df = pd.DataFrame(columns=['filename', 'distance', 'label'])

with torch.no_grad():
    for path in tqdm(all_train_videos):
        file_name = path.split('/')[-1]


        faces = detection_pipeline(path)


        distances = process_faces(faces, feature_extractor)
        if distances is None:
            continue

        for distance in distances:
            row = [
                file_name,
                distance,
                1 if metadata[file_name]['label'] == 'FAKE' else 0
            ]


            df.loc[len(df)] = row

  0%|          | 0/400 [00:00<?, ?it/s]

In [None]:
df.head()

Unnamed: 0,filename,distance,label
0,eczrseixwq.mp4,0.494105,1
1,eczrseixwq.mp4,0.495248,1
2,eczrseixwq.mp4,0.518513,1
3,eczrseixwq.mp4,0.497832,1
4,eczrseixwq.mp4,0.507992,1


# save_prepared_train_data


In [None]:
df.to_csv('train.csv', index=False)