### **Conexão com Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

output_dir = '/content/drive/MyDrive/face_forensic/output_frames'

Mounted at /content/drive


### **Installs**

In [None]:
!pip install opencv-python mtcnn numpy scikit-learn tqdm matplotlib tensorflow kagglehub

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lz4-4.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lz4, mtcnn
Successfully installed lz4-4.4.4 mtcnn-1.0.0


### **Imports**

In [None]:
import os
import cv2
from mtcnn import MTCNN
import random
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers
import kagglehub
import glob

### **Função para extrair o rosto da imagem**

In [None]:
def extract_face(image, IMG_SIZE=224):
    detector = MTCNN()
    result = detector.detect_faces(image)
    if result:
        x, y, w, h = result[0]['box']
        x, y = abs(x), abs(y)
        face = image[y:y+h, x:x+w]
        return cv2.resize(face, (IMG_SIZE, IMG_SIZE))
    return cv2.resize(image, (IMG_SIZE, IMG_SIZE))

### **Função para extrair frames aleatórios de um vídeo**

In [None]:
def video_to_frames(video_path, max_frames=5):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if total_frames == 0:
        return []

    random_frames = sorted(random.sample(range(total_frames), min(max_frames, total_frames)))
    frames = []

    for frame_idx in random_frames:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        if not ret:
            continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        face = extract_face(frame)
        frames.append(face / 255.0)

    cap.release()
    return frames

### **Função Principal**

In [None]:
def process_technique(technique, num_videos, num_frames, split_ratio=0.7):
    dataset_name = "xdxd003/ff-c23"
    download_path = kagglehub.dataset_download(dataset_name)
    path = os.path.join(download_path, 'FaceForensics++_C23')

    technique_path = os.path.join(path, technique)
    all_videos = [f for f in os.listdir(technique_path) if f.endswith('.mp4')]
    selected_videos = random.sample(all_videos, min(num_videos, len(all_videos)))

    print(f"📁 Técnica: {technique}")
    print(f"🎥 Vídeos selecionados: {len(selected_videos)}")

    for subset in ['train', 'test']:
      subset_dir = os.path.join(output_dir, technique, subset)
      if os.path.exists(subset_dir):
        jpg_files = glob.glob(os.path.join(subset_dir, '*.jpg'))
        for f in jpg_files:
          os.remove(f)


    # Divide vídeos em treino e teste
    train_videos, test_videos = train_test_split(selected_videos, train_size=split_ratio, random_state=42)

    def extract_and_save(videos, subset):
        subset_dir = os.path.join(output_dir, technique, subset)
        os.makedirs(subset_dir, exist_ok=True)

        for video_file in tqdm(videos, desc=f"Extraindo para {subset}"):
            video_path = os.path.join(technique_path, video_file)
            frames = video_to_frames(video_path, max_frames=num_frames)

            for idx, frame in enumerate(frames):
                filename = f'{technique}_{video_file[:-4]}_frame{idx + 1}.jpg'
                img_path = os.path.join(subset_dir, filename)
                frame_bgr = cv2.cvtColor((frame * 255).astype('uint8'), cv2.COLOR_RGB2BGR)
                cv2.imwrite(img_path, frame_bgr)

    extract_and_save(train_videos, 'train')
    extract_and_save(test_videos, 'test')

    print(f"\n✅ Finalizado: {technique}")
    print(f"Treino: {len(train_videos)} vídeos | Teste: {len(test_videos)} vídeos")


### **Exemplo de uso**

In [None]:
techniques = [
    'Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures', 'original'
]

process_technique(
    technique='Deepfakes',
    num_videos=2,
    num_frames=51,
)

📁 Técnica: Deepfakes
🎥 Vídeos selecionados: 2


Extraindo para train: 100%|██████████| 1/1 [01:34<00:00, 94.34s/it]
Extraindo para test: 100%|██████████| 1/1 [00:40<00:00, 40.55s/it]


✅ Finalizado: Deepfakes
Treino: 1 vídeos | Teste: 1 vídeos



