In [10]:
from google.colab import drive
import os

# Perintah ini akan memunculkan jendela pop-up untuk meminta izin
drive.mount('/content/drive')

base_dir = '/content/drive/MyDrive/Dataset-Skripsi'
dataset_dir = os.path.join(base_dir, 'Dataset')
output_dir = os.path.join(base_dir, 'Output')

# Membuat folder jika belum ada
os.makedirs(dataset_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

print("Google Drive terhubung dan folder siap.")
print(f"Folder Dataset: {dataset_dir}")
print(f"Folder Output: {output_dir}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive terhubung dan folder siap.
Folder Dataset: /content/drive/MyDrive/Dataset-Skripsi/Dataset
Folder Output: /content/drive/MyDrive/Dataset-Skripsi/Output


In [11]:
# Install MTCNN untuk deteksi wajah
!pip install mtcnn



In [12]:
# Untuk Deep Learning (TensorFlow & Keras)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Untuk Pengolahan Angka dan Gambar
import numpy as np
import cv2  # Ini adalah OpenCV
import matplotlib.pyplot as plt

# Untuk Utilitas Sistem dan File
import os
import glob
import random

# Untuk Deteksi Wajah (yang baru kita install)
from mtcnn.mtcnn import MTCNN

# Untuk Evaluasi Model
from sklearn.metrics import classification_report, confusion_matrix

print("Semua library dasar berhasil di-impor.")

Semua library dasar berhasil di-impor.


In [13]:
# Tentukan path output Anda (sudah didefinisikan di sel 1, tapi kita definisikan lagi di sini untuk jelas)
output_dir = '/content/drive/MyDrive/Dataset-Skripsi/Output'

# Tentukan path untuk train, validation, dan test
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'validation')
test_dir = os.path.join(output_dir, 'test')

# Buat folder-folder tersebut
for dir_path in [train_dir, val_dir, test_dir]:
    os.makedirs(os.path.join(dir_path, 'REAL'), exist_ok=True)
    os.makedirs(os.path.join(dir_path, 'FAKE'), exist_ok=True)

print("Struktur folder train/validation/test telah dibuat di dalam Output.")

Struktur folder train/validation/test telah dibuat di dalam Output.


In [14]:
# Inisialisasi detektor MTCNN
detector = MTCNN()
print("Detektor MTCNN siap digunakan.")

Detektor MTCNN siap digunakan.


In [15]:
import cv2
import os
from mtcnn.mtcnn import MTCNN
import glob

# Inisialisasi detektor lagi (untuk memastikan, jika sel terpisah)
try:
    detector
except NameError:
    detector = MTCNN()

# Path dari sel 1
dataset_dir = '/content/drive/MyDrive/Dataset-Skripsi/Dataset'
output_dir = '/content/drive/MyDrive/Dataset-Skripsi/Output'

# Tentukan berapa frame yang ingin Anda ambil per video
# Mengambil 20 frame akan memberi Anda 20 gambar wajah per video.
# Sesuaikan angka ini berdasarkan ukuran dataset Anda.
FRAMES_PER_VIDEO = 20

def extract_faces_from_video(video_path, output_folder, frames_to_extract):
    """Membaca video, mengekstrak N frame, mendeteksi wajah, dan menyimpannya."""
    video_capture = cv2.VideoCapture(video_path)
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))

    # Jika video tidak bisa dibuka
    if not video_capture.isOpened():
        print(f"Error: Tidak bisa membuka video {video_path}")
        return

    # Hitung interval frame yang akan diambil
    if total_frames <= 0:
        print(f"Error: Video {video_path} tidak memiliki frame.")
        return

    frame_interval = max(1, total_frames // frames_to_extract)

    frame_count = 0
    saved_face_count = 0

    while frame_count < total_frames and saved_face_count < frames_to_extract:
        # Set video ke frame yang diinginkan
        video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
        success, frame = video_capture.read()

        if not success:
            frame_count += frame_interval # Lompat ke frame selanjutnya jika gagal baca
            continue

        # Konversi frame BGR (OpenCV) ke RGB (MTCNN)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Deteksi wajah
        faces = detector.detect_faces(frame_rgb)

        if faces:
            # Ambil wajah pertama (biasanya yang paling besar/paling jelas)
            face_data = faces[0]
            x, y, w, h = face_data['box']

            # Pastikan koordinat tidak negatif (beberapa bug di MTCNN)
            x, y = max(0, x), max(0, y)

            # Potong (crop) wajah dari frame ASLI (BGR)
            face_image = frame[y:y+h, x:x+w]

            # Simpan gambar wajah
            # Resize wajah ke ukuran standar, misal 224x224 (untuk model CNN nanti)
            face_image_resized = cv2.resize(face_image, (224, 224))

            # Buat nama file yang unik
            video_name = os.path.basename(video_path).split('.')[0]
            output_filename = os.path.join(output_folder, f"{video_name}_frame_{frame_count}_face_{saved_face_count}.jpg")

            cv2.imwrite(output_filename, face_image_resized)
            saved_face_count += 1

        # Lompat ke interval frame selanjutnya
        frame_count += frame_interval

    video_capture.release()
    # print(f"Selesai memproses {video_path}. Total wajah disimpan: {saved_face_count}")

print("Fungsi extract_faces_from_video() telah didefinisikan.")

Fungsi extract_faces_from_video() telah didefinisikan.


In [16]:
import random

# Tentukan rasio pembagian data
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

# Path folder output (dari sel 4)
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'validation')
test_dir = os.path.join(output_dir, 'test')

# Path folder input (Dataset)
real_videos_path = os.path.join(dataset_dir, 'REAL')
fake_videos_path = os.path.join(dataset_dir, 'FAKE')

# Loop untuk REAL dan FAKE
for (video_type, source_path) in [("REAL", real_videos_path), ("FAKE", fake_videos_path)]:
    print(f"\nMemulai memproses video kategori: {video_type}...")

    # Ambil semua file video (misal .mp4)
    video_files = glob.glob(os.path.join(source_path, "*.mp4"))

    if not video_files:
        print(f"PERINGATAN: Tidak ada file .mp4 ditemukan di {source_path}")
        continue

    random.shuffle(video_files) # Acak urutan video

    # Hitung jumlah data untuk tiap set
    total_videos = len(video_files)
    train_count = int(total_videos * TRAIN_RATIO)
    val_count = int(total_videos * VAL_RATIO)

    # Bagi daftar video
    train_videos = video_files[:train_count]
    val_videos = video_files[train_count : train_count + val_count]
    test_videos = video_files[train_count + val_count:]

    print(f"Total video {video_type}: {total_videos}")
    print(f"Data latih (train): {len(train_videos)} video")
    print(f"Data validasi (val): {len(val_videos)} video")
    print(f"Data uji (test): {len(test_videos)} video")

    # Proses dan simpan ke folder yang sesuai
    # 1. Proses TRAIN
    print(f"Memproses {video_type} untuk TRAIN...")
    output_folder_train = os.path.join(train_dir, video_type)
    for video_file in train_videos:
        extract_faces_from_video(video_file, output_folder_train, FRAMES_PER_VIDEO)

    # 2. Proses VALIDATION
    print(f"Memproses {video_type} untuk VALIDATION...")
    output_folder_val = os.path.join(val_dir, video_type)
    for video_file in val_videos:
        extract_faces_from_video(video_file, output_folder_val, FRAMES_PER_VIDEO)

    # 3. Proses TEST
    print(f"Memproses {video_type} untuk TEST...")
    output_folder_test = os.path.join(test_dir, video_type)
    for video_file in test_videos:
        extract_faces_from_video(video_file, output_folder_test, FRAMES_PER_VIDEO)

print("\n--- PROSES EKSTRAKSI SELESAI ---")


Memulai memproses video kategori: REAL...
PERINGATAN: Tidak ada file .mp4 ditemukan di /content/drive/MyDrive/Dataset-Skripsi/Dataset/REAL

Memulai memproses video kategori: FAKE...
PERINGATAN: Tidak ada file .mp4 ditemukan di /content/drive/MyDrive/Dataset-Skripsi/Dataset/FAKE

--- PROSES EKSTRAKSI SELESAI ---
