In [1]:
pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lz4, mtcnn
Successfully installed lz4-4.3.3 mtcnn-1.0.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import json
import numpy as np
from PIL import Image
from mtcnn import MTCNN
import shutil
from tqdm.notebook import tqdm

# Initialize paths
root_path = '/kaggle/working'
data_dir = "/kaggle/input/deepfake/DFWILD/train_fake/fake"
output_file = "/kaggle/working/faces_data_train_fake.json"

# Check if the file exists before deleting
if os.path.exists(output_file):
    os.remove(output_file)
    print(f"{output_file} has been deleted successfully.")
else:
    print(f"{output_file} does not exist.")


# Initialize detector
detector = MTCNN(device="GPU:0")

# Prepare results dictionary and load real images
final_results = {}
images = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith((".png", ".jpg", ".jpeg"))]

# Define batch size
batch_size = 64  # Adjust batch size based on available resources

# Helper function to process a batch
def process_batch(image_names):
    batch_results = {}
    results = detector.detect_faces(image_names)
    for image, result in zip(image_names, results):
        if len(result) == 0:
            continue
        coordinates = result[0]
        box = result[0]['box']
        confidence = result[0]['confidence']
        keypoints = result[0]['keypoints']
        nose = keypoints['nose']
        mouth_right = keypoints['mouth_right']
        mouth_left = keypoints['mouth_left']
        right_eye = keypoints['right_eye']
        left_eye = keypoints['left_eye']
        batch_results[image] = {
            'box': [int(c) for c in box],
            'confidence': float(confidence),
            'keypoints': {
                'nose': [int(c) for c in nose],
                'mouth_right': [int(c) for c in mouth_right],
                'mouth_left': [int(c) for c in mouth_left],
                'right_eye': [int(c) for c in right_eye],
                'left_eye': [int(c) for c in left_eye],
            }
        }
    return batch_results

# Process images in batches
for i in tqdm(range(0, len(images), batch_size), desc="Processing Batches"):
    batch_filenames = images[i:i + batch_size]
    batch_results = process_batch(batch_filenames)
    final_results.update(batch_results)

# Save results to JSON file
with open(output_file, "w") as f:
    json.dump(results, f, indent=4)

print(f"Results saved to {output_file}")

/kaggle/working/faces_data_train_fake.json does not exist.


Processing Batches:   0%|          | 0/3430 [00:00<?, ?it/s]

In [None]:
import os
import json
import numpy as np
from PIL import Image
from mtcnn import MTCNN
import shutil
from tqdm.notebook import tqdm

# Initialize paths
root_path = '/kaggle/working'
data_dir = "/kaggle/input/deepfake/DFWILD/train_real"
output_file = "/kaggle/working/faces_data_train_real.json"

# Check if the file exists before deleting
if os.path.exists(output_file):
    os.remove(output_file)
    print(f"{output_file} has been deleted successfully.")
else:
    print(f"{output_file} does not exist.")


# Initialize detector
detector = MTCNN(device="GPU:0")

# Prepare results dictionary and load real images
final_results = {}
images = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith((".png", ".jpg", ".jpeg"))]

# Define batch size
batch_size = 64  # Adjust batch size based on available resources

# Helper function to process a batch
def process_batch(image_names):
    batch_results = {}
    results = detector.detect_faces(image_names)
    for image, result in zip(image_names, results):
        if len(result) == 0:
            continue
        coordinates = result[0]
        box = result[0]['box']
        confidence = result[0]['confidence']
        keypoints = result[0]['keypoints']
        nose = keypoints['nose']
        mouth_right = keypoints['mouth_right']
        mouth_left = keypoints['mouth_left']
        right_eye = keypoints['right_eye']
        left_eye = keypoints['left_eye']
        batch_results[image] = {
            'box': [int(c) for c in box],
            'confidence': float(confidence),
            'keypoints': {
                'nose': [int(c) for c in nose],
                'mouth_right': [int(c) for c in mouth_right],
                'mouth_left': [int(c) for c in mouth_left],
                'right_eye': [int(c) for c in right_eye],
                'left_eye': [int(c) for c in left_eye],
            }
        }
    return batch_results

# Process images in batches
for i in tqdm(range(0, len(images), batch_size), desc="Processing Batches"):
    batch_filenames = images[i:i + batch_size]
    batch_results = process_batch(batch_filenames)
    final_results.update(batch_results)

# Save results to JSON file
with open(output_file, "w") as f:
    json.dump(final_results, f, indent=4)

print(f"Results saved to {output_file}")