In [None]:
from pathlib import Path

# URL langsung
URL_ORIG = "https://drive.usercontent.google.com/download?id=1StM28VCyWsO_MQnxNn5lNm7xugqq-FDs&export=download&authuser=0&confirm=t&uuid=e073f0ba-18a3-4d64-bb61-2f7aabe5aadc&at=AKSUxGPlb1TnAwAeu18-3oUtk25f%3A1762002831851"
URL_FAKE = "https://drive.usercontent.google.com/download?id=1YIRG3BMOCvSNYqN5llRe7EUZn0yMq4GI&export=download&authuser=1&confirm=t&uuid=9b598e74-500e-4b68-be6e-2a190884ece7&at=AKSUxGPSeC79xkvr9ZU-X2SzXtIV%3A1762003073715"

# Direktori target
ROOT    = "/content/FFPP_LOCAL"
OUT_DIR = "/content/project"

# LIMIT video yang diekstrak (ringan)
LIMIT_REAL = 10
LIMIT_FAKE = 10

for p in [ROOT, OUT_DIR]:
    Path(p).mkdir(parents=True, exist_ok=True)
print("ROOT:", ROOT, "| OUT_DIR:", OUT_DIR)
print("LIMIT_REAL:", LIMIT_REAL, "| LIMIT_FAKE:", LIMIT_FAKE)


In [None]:
%%bash -s "$URL_ORIG" "$URL_FAKE" "$LIMIT_REAL" "$LIMIT_FAKE"
set -euo pipefail
TMP="/content/_zip_tmp"
ROOT="/content/FFPP_LOCAL"
URL_ORIG="$1"
URL_FAKE="$2"
LIM_REAL="$3"
LIM_FAKE="$4"

mkdir -p "$TMP" \
         "$ROOT/original_sequences/youtube/c23/videos" \
         "$ROOT/manipulated_sequences/Deepfakes/c23/videos"

echo "[Download]"
wget -O "$TMP/original.zip" --no-verbose --timeout=180 "$URL_ORIG"
wget -O "$TMP/deepfakes.zip" --no-verbose --timeout=180 "$URL_FAKE"

echo "[Selective unzip REAL]"
rm -rf "$TMP/original"; mkdir -p "$TMP/original"
mapfile -t REALS < <(unzip -Z1 "$TMP/original.zip" | grep -i "\.mp4$" | head -n "$LIM_REAL")
for f in "${REALS[@]}"; do
  unzip -o "$TMP/original.zip" "$f" -d "$TMP/original" >/dev/null || true
done

echo "[Selective unzip FAKE]"
rm -rf "$TMP/deepfakes"; mkdir -p "$TMP/deepfakes"
mapfile -t FAKES < <(unzip -Z1 "$TMP/deepfakes.zip" | grep -i "\.mp4$" | head -n "$LIM_FAKE")
for f in "${FAKES[@]}"; do
  unzip -o "$TMP/deepfakes.zip" "$f" -d "$TMP/deepfakes" >/dev/null || true
done

echo "[Move MP4s -> FF++]"
find "$TMP/original"  -type f \( -iname "*.mp4" -o -iname "*.MP4" \) -exec cp -n "{}" "$ROOT/original_sequences/youtube/c23/videos"/ \;
find "$TMP/deepfakes" -type f \( -iname "*.mp4" -o -iname "*.MP4" \) -exec cp -n "{}" "$ROOT/manipulated_sequences/Deepfakes/c23/videos"/ \;

echo "[Counts]"
echo -n "real: "; ls "$ROOT/original_sequences/youtube/c23/videos" 2>/dev/null | wc -l || true
echo -n "fake: "; ls "$ROOT/manipulated_sequences/Deepfakes/c23/videos" 2>/dev/null | wc -l || true

In [None]:
import glob
ROOT="/content/FFPP_LOCAL"
n_real=len(glob.glob(f"{ROOT}/original_sequences/youtube/c23/videos/*.mp4"))
n_fake=len(glob.glob(f"{ROOT}/manipulated_sequences/Deepfakes/c23/videos/*.mp4"))
print("real:", n_real, "| fake:", n_fake)
assert n_real>0 and n_fake>0, "Video real/fake tidak ditemukan (cek URL/ZIP atau LIMIT terlalu kecil)."


In [None]:
!pip install -U ultralytics opencv-python scikit-learn tensorflow


In [None]:
%%writefile prepare_data.py
import os, glob, random, cv2
from pathlib import Path
from ultralytics import YOLO
from sklearn.model_selection import train_test_split

# versi ringan
FPS_SAMPLE = 1              # lebih jarang ambil frame
IMG_SIZE = (299,299)
CONF_THRES = 0.25
MAX_CROPS_PER_VIDEO = 20    # batasi crop
random.seed(42)

def _yolo():
    return YOLO("yolov8n-face.pt") if Path("yolov8n-face.pt").exists() else YOLO("yolov8n.pt")

def extract_frames(vpath:Path, out_dir:Path, fps_sample=FPS_SAMPLE):
    out_dir.mkdir(parents=True, exist_ok=True)
    cap=cv2.VideoCapture(str(vpath))
    if not cap.isOpened(): return 0
    fps=cap.get(cv2.CAP_PROP_FPS) or 30
    interval=max(1, round(fps/fps_sample))
    i=saved=0
    while True:
        ret,frame=cap.read()
        if not ret: break
        if i%interval==0:
            cv2.imwrite(str(out_dir/f"frame_{i:06d}.jpg"), frame); saved+=1
        i+=1
    cap.release(); return saved

def crop_faces(frames_dir:Path, yolo, conf=CONF_THRES, max_per_video=MAX_CROPS_PER_VIDEO):
    crops=[]; frames=sorted(frames_dir.glob("*.jpg")); random.shuffle(frames)
    for f in frames:
        img=cv2.imread(str(f));
        if img is None: continue
        res=yolo(img, conf=conf, verbose=False)[0]
        for b in res.boxes.xyxy.cpu().numpy():
            x1,y1,x2,y2=map(int,b[:4]); face=img[y1:y2,x1:x2]
            if face.size==0: continue
            face=cv2.resize(face, IMG_SIZE, interpolation=cv2.INTER_LANCZOS4)
            crops.append(face)
            if len(crops)>=max_per_video: return crops
    return crops

def run(root:str, out:str):
    root=Path(root); out=Path(out)
    frames_dir=out/"data_frames"; crops_dir=out/"data_crops"
    for sp in ["train","val","test"]:
        for c in ["real","fake"]:
            (crops_dir/sp/c).mkdir(parents=True, exist_ok=True)

    real = [(p, p.stem) for p in (root/"original_sequences/youtube/c23/videos").glob("*.mp4")]
    fake = [(p, "Deepfakes_"+p.stem) for p in (root/"manipulated_sequences/Deepfakes/c23/videos").glob("*.mp4")]
    assert real and fake, "Video real/fake tidak ditemukan."

    def split(ids):
        tr,tmp=train_test_split(ids, test_size=0.3, random_state=42)
        va,te=train_test_split(tmp, test_size=0.5, random_state=42)
        return set(tr), set(va), set(te)
    tr_r, va_r, te_r = split([vid for _,vid in real])
    tr_f, va_f, te_f = split([vid for _,vid in fake])

    yolo=_yolo()
    counts={"train":0,"val":0,"test":0}
    for name,lst in [("real",real),("fake",fake)]:
        for vpath,vid in lst:
            if (name=="real" and vid in tr_r) or (name=="fake" and vid in tr_f): sp="train"
            elif (name=="real" and vid in va_r) or (name=="fake" and vid in va_f): sp="val"
            else: sp="test"
            fdir=frames_dir/name/vid
            if not fdir.exists() or not list(fdir.glob("*.jpg")):
                n=extract_frames(vpath,fdir,FPS_SAMPLE); print(f"[extract] {vpath.name} -> {n} frames")
            faces=crop_faces(fdir,yolo,CONF_THRES,MAX_CROPS_PER_VIDEO)
            for i,im in enumerate(faces):
                cv2.imwrite(str((crops_dir/sp/name/f"{vid}_{i:03d}.jpg")), im)
            counts[sp]+=len(faces); print(f"[{sp}/{name}] {vid}: {len(faces)} crops")
    print("Selesai. Counts:", counts)


In [None]:
import prepare_data
prepare_data.run(root="/content/FFPP_LOCAL", out="/content/project")


In [None]:
import os, glob
base="/content/project/data_crops"
print("data_crops exists:", os.path.isdir(base))
for sp in ["train","val","test"]:
    for c in ["real","fake"]:
        print(f"{sp:5s} {c:5s} ->", len(glob.glob(f"{base}/{sp}/{c}/*.jpg")))


In [None]:
%%writefile train_xception_tf.py
import tensorflow as tf, os
from tensorflow.keras import layers, models
# Pastikan Anda mengimpor preprocess_input yang benar
from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# --- 1. KONFIGURASI DAN SETUP ---
# BATCH: 32 adalah titik awal yang baik. EPOCHS: 100
IMG_SIZE=(299,299); BATCH=32; EPOCHS=100
DATA_DIR=os.environ.get("DATA_DIR","/content/project/data_crops")
MODEL_DIR=os.environ.get("MODEL_DIR","/content/project/models")
os.makedirs(MODEL_DIR, exist_ok=True)

# --- 2. FUNGSI DATASET ---
def ds(split, shuffle=True):
    # Menggunakan tf.keras.utils.image_dataset_from_directory (API Modern)
    ds = tf.keras.utils.image_dataset_from_directory(
        f"{DATA_DIR}/{split}", image_size=IMG_SIZE, batch_size=BATCH,
        label_mode="binary", shuffle=shuffle, interpolation="bilinear") # Menggunakan bilinear interpolation

    # Casting ke float32 untuk memastikan input siap sebelum Augmentasi/Preprocessing
    ds = ds.map(lambda x,y:(tf.cast(x,tf.float32),y))
    return ds.prefetch(tf.data.AUTOTUNE)

tr,va,te=ds("train"), ds("val",False), ds("test",False)

# --- 3. PEMBANGUNAN MODEL ---

# 1. Definisikan layer Augmentasi (Lebih Kuat untuk Deepfake)
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    # Peningkatan: Tambah variasi pencahayaan
    layers.RandomBrightness(0.1, value_range=(0, 255)),
    layers.RandomContrast(0.1),
], name="data_augmentation")

base=Xception(weights="imagenet", include_top=False, input_shape=IMG_SIZE+(3,))
# base.trainable akan diatur di setiap Fase

inp=layers.Input(shape=IMG_SIZE+(3,))
x = data_augmentation(inp)   # Terapkan Augmentasi
x = preprocess_input(x)      # Terapkan Preprocessing Xception
x = base(x, training=False)  # Base model dibekukan untuk Phase 1
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)   # Peningkatan: Dropout 0.5
out = layers.Dense(1, activation="sigmoid")(x)

model = models.Model(inp,out)

# --- 4. CALLBACKS ---

ck=ModelCheckpoint(f"{MODEL_DIR}/xception_deepfake_best.keras", save_best_only=True, monitor="val_auc", mode="max")
es=EarlyStopping(patience=10, restore_best_weights=True, monitor="val_auc", mode="max")
reduce_lr = ReduceLROnPlateau(monitor='val_auc', factor=0.5, patience=5, min_lr=1e-6) # LR Scheduler

# --- 5. PELATIHAN FASE 1: Training Head (Dibekukan) ---
print("--- STARTING PHASE 1: Training Head (Freeze) ---")

base.trainable = False # Pastikan Xception dibekukan
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), # LR 1e-4
              loss="binary_crossentropy",
              metrics=[tf.keras.metrics.AUC(name="auc"), "accuracy"])

# Melatih 10 Epoch awal, hanya melatih head.
history_phase1 = model.fit(tr, validation_data=va, epochs=10,
                            callbacks=[ck, es, reduce_lr])

# --- 6. PELATIHAN FASE 2: Fine Tuning (Dibuka) ---
# Jika Early Stopping belum terpicu di Phase 1, kita lanjutkan.
print("\n--- STARTING PHASE 2: Fine Tuning Base Model (Unfreeze) ---")

base.trainable = True # Unfreeze Base Model
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), # LR JAUH LEBIH KECIL (1e-5)
              loss="binary_crossentropy",
              metrics=[tf.keras.metrics.AUC(name="auc"), "accuracy"])

# Lanjutkan pelatihan, dimulai dari epoch terakhir Phase 1.
initial_epoch = len(history_phase1.history['loss'])
model.fit(tr, validation_data=va, epochs=EPOCHS, initial_epoch=initial_epoch,
          callbacks=[ck, es, reduce_lr])

# --- 7. EVALUASI DAN SIMPAN AKHIR ---

# Muat bobot terbaik yang disimpan oleh ModelCheckpoint
best_model = tf.keras.models.load_model(f"{MODEL_DIR}/xception_deepfake_best.keras")

print("TEST:", best_model.evaluate(te, return_dict=True))

# Perbaikan Wajib: Simpan model terbaik dalam format .keras
best_model.save(f"{MODEL_DIR}/xception_deepfake_final.keras")

In [None]:
import os
os.environ["DATA_DIR"] = "/content/project/data_crops"
os.environ["MODEL_DIR"] = "/content/project/models"

%run -i train_xception_tf.py


In [None]:
import tensorflow as tf, numpy as np, cv2, glob, os
from tensorflow.keras.applications.xception import preprocess_input

MODEL="/content/project/models/xception_deepfake.h5"
assert os.path.exists(MODEL), "Model belum ada. Pastikan training selesai."
model=tf.keras.models.load_model(MODEL)

cands=glob.glob("/content/project/data_crops/test/real/*.jpg")+glob.glob("/content/project/data_crops/test/fake/*.jpg")
assert cands, "Tidak ada sampel test."
img_path=cands[0]
img=cv2.imread(img_path)[:,:,::-1]; img=cv2.resize(img,(299,299))
x=np.expand_dims(img.astype(np.float32),0); x=preprocess_input(x)
p=float(model.predict(x, verbose=0)[0][0])
print(os.path.basename(img_path), "| fake_prob:", round(p,4), "| pred:", "FAKE" if p>=0.5 else "REAL")
