# Frame Generator

In [None]:
import cv2
import os
import numpy as np
import dlib

def extract_faces_dlib(video_path, output_folder, label, max_frames=None):
    # Initialize dlib's face detector (CNN-based)
    face_detector = dlib.cnn_face_detection_model_v1("/content/drive/MyDrive/Project Data/Capstone_Deepfake/mmod_human_face_detector.dat")

    # Ensure the output directory exists
    os.makedirs(output_folder, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    count = 0
    frame_number = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if max_frames is not None and frame_number >= max_frames:
            break

        # Convert frame to RGB as dlib expects RGB format
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Detect faces in the frame
        detections = face_detector(rgb_frame, 1)

        for detection in detections:
            x, y, w, h = detection.rect.left(), detection.rect.top(), detection.rect.width(), detection.rect.height()

            # Ensure the bounding box is within the frame
            x, y, w, h = max(0, x), max(0, y), min(w, frame.shape[1] - x), min(h, frame.shape[0] - y)

            # Extract the face region
            face_img = frame[y:y+h, x:x+w]

            if face_img.size == 0:
                continue  # Skip empty images

            resized_face = cv2.resize(face_img, (224, 224))

            # Save the face image
            output_path = os.path.join(output_folder, f"{label}_{video_name}_face_{count}.jpg")
            cv2.imwrite(output_path, resized_face)
            count += 1

        frame_number += 1

    cap.release()
    # print(f"Processed {video_name}, extracted {count} faces.")


In [None]:
%%time
test = "/content/drive/MyDrive/Project Data/Capstone_Deepfake/Celeb-real/"
og_folder = "/content/drive/MyDrive/Project Data/Capstone_Deepfake/real_images_15/"
k = len(os.listdir(test))
i=1
for video_file in os.listdir(test):
    video_path = os.path.join(test, video_file)
    extract_faces_dlib(video_path, og_folder, label="real",max_frames=15)
    print(f'{i} out of {k} videos processed')
    i+=1

In [None]:
%%time
test = "/content/drive/MyDrive/Project Data/Capstone_Deepfake/Celeb-synthesis/"
og_folder = "/content/drive/MyDrive/Project Data/Capstone_Deepfake/fake_images_2/"
k = len(os.listdir(test))
i=1
for video_file in os.listdir(test):
    video_path = os.path.join(test, video_file)
    extract_faces_dlib(video_path, og_folder, label="fake",max_frames=2)
    print(f'{i} out of {k} videos processed')
    i+=1

# Creating CSV with Image names and labels

In [None]:
import os
import pandas as pd
df = pd.DataFrame(columns=['FileName', 'Label', 'ClassName'])
path = "/content/drive/MyDrive/Project Data/Capstone_Deepfake/train/"
data=[]
dir_list = os.listdir(path)
for i in dir_list:
  frame_path = os.path.join(path,i)
  for j in os.listdir(frame_path):
    j_path = os.path.join(frame_path,j)
    label = 1 if i.startswith("real") else 0
    # Append the new row to the DataFrame
    data.append([j_path, i, label])
# Create a DataFrame from the accumulated data
df = pd.DataFrame(data, columns=['filename', 'classname', 'label'])
df.to_csv("f.csv",index=False)