In [10]:
from pathlib import Path
import zipfile
import random
import tensorflow as tf
import os
from PIL import Image, UnidentifiedImageError
import numpy as np

#Funkcja do rozpakowania plików zip do nowej ścieżki
def unzip_file(zip_path: Path, extract_path: Path) -> Path:
    extract_path.mkdir(parents=True, exist_ok=True)
    # Jeśli katalog nie jest pusty, zakładamy, że już rozpakowane
    if any(extract_path.iterdir()):
        return extract_path

    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(extract_path)
    return extract_path



In [11]:
#https://www.kaggle.com/datasets/ayushmandatta1/deepdetect-2025/data
# Archive-> ddata-> {test,train}-> {real,fake} 
#--------------------------------------------------------
#https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images
# Dataset-> {test,train,validation}->{real,fake}
#--------------------------------------------------------
#https://www.kaggle.com/datasets/saurabhbagchi/deepfake-image-detection
#indian_deepfakes-> {}
#_____________________________________________________________________

#Ścieżki do danych w .zip
zip_path_A = Path("Dane/archive.zip")
zip_path_B = Path("Dane/Dataset.zip")
zip_path_C = Path("Dane/indian_deepfakes.zip")

#Nowe ścieżki do rozpakowania zip
extract_path_A = Path("Dane/archive_extract")
extract_path_B = Path("Dane/Dataset_extract")
extract_path_C = Path("Dane/indian_deepfakes_extract")

#Rozpakowanie
unzip_file(zip_path_A,extract_path_A)
unzip_file(zip_path_B,extract_path_B)
unzip_file(zip_path_C,extract_path_C)

train_path_A = extract_path_A / "ddata" / "train"
test_path_A = extract_path_A / "ddata" / "test"

train_path_B = extract_path_B / "Train"
test_path_B = extract_path_B / "Test"

train_path_C = extract_path_C /"indian_deepfakes" /"train"
test_path_C = extract_path_C / "indian_deepfakes"/"test" / "test"

In [12]:
#Akceptowane formaty plików
EXTS = {".jpg",".jpeg",".png",".bmp",".webp"}
LABEL_MAP = {"fake": 0.0, "real": 1.0}

#Sprawdzanie czy tensorflow potrafi poprawnie odczytać zdjęcie
def tf_ok(p: Path) -> bool:
    try:
        b = tf.io.read_file(str(p))
        _ = tf.io.decode_image(b, channels=3, expand_animations=False)
        return True
    except (tf.errors.InvalidArgumentError, tf.errors.DataLossError, tf.errors.NotFoundError):
        return False

#przetworzenie plików ze ścieżek w listy poprawnych zdjęć i ich etykiet oraz odpadów
def collect_good(root: Path, fake_dir="fake", real_dir="real"):
    fake_folder = root / fake_dir
    real_folder = root / real_dir
    if not fake_folder.exists() or not real_folder.exists():
        raise FileNotFoundError(f"Expected {fake_folder} and {real_folder}")

    paths, labels, bad = [], [], 0
    for folder, lab in [(fake_folder, LABEL_MAP["fake"]), (real_folder, LABEL_MAP["real"])]:
        for p in folder.rglob("*"):
            if p.is_file() and p.suffix.lower() in EXTS:
                if tf_ok(p):
                    paths.append(str(p))
                    labels.append(lab)
                else:
                    bad += 1
    return paths, labels, bad


#Listy train/test i odpady
train_dir_A, train_labels_A, bad_train_A = collect_good(train_path_A)
test_dir_A,  test_labels_A,  bad_test_A  = collect_good(test_path_A)

train_dir_B, train_labels_B, bad_train_B = collect_good(train_path_B, "Fake", "Real")
test_dir_B,  test_labels_B,  bad_test_B  = collect_good(test_path_B, "Fake", "Real")

train_dir_C, train_labels_C, bad_train_C = collect_good(train_path_C)
test_dir_C,  test_labels_C,  bad_test_C  = collect_good(test_path_C)

#Raport ile odpadów
print("Bad A train/test:", bad_train_A, bad_test_A)
print("Bad B train/test:", bad_train_B, bad_test_B)
print("Bad C train/test:", bad_train_C, bad_test_C)


Bad A train/test: 2 0
Bad B train/test: 0 0
Bad C train/test: 0 0


Bad A train/test: 2 0
Bad B train/test: 0 0
Bad C train/test: 0 0

In [13]:
#Deklarowanie parametrów
IMG_SIZE=(256,256);
BATCH=32;
SEED=42;
VAL_SPLIT=0.2
AUTOTUNE=tf.data.AUTOTUNE #Wbudowana optymalizacja CPU/GPU dla tensorflow

#Tworzenie tensorflow dataset do szybkiego czytania obrazów 
def make_ds(paths, labels, training: bool):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    #Tasowanie danych dla treningu
    if training:
        ds = ds.shuffle(min(len(paths), 20000), seed=SEED, reshuffle_each_iteration=True)

        
    def load(path, label):
        #Dekoduje obraz ze ścieżki w RGB i zamraża GIF
        img = tf.io.decode_image(tf.io.read_file(path), channels=3, expand_animations=False)
        #Resize do 256x256
        img = tf.image.resize(img, IMG_SIZE)
        #konwersja na float na potrzeby uczenia mdelu
        img = tf.cast(img, tf.float32)  # keep 0..255 for EfficientNet preprocess_input
        #Reshpe dla Dense(1, sigmoid) i loss binary_crossentropy
        return img, tf.reshape(label, (1,))
    #Wielowątkowe ładowanie dla batcha po 32 obrazy
    return ds.map(load, num_parallel_calls=AUTOTUNE).batch(BATCH).prefetch(AUTOTUNE)

#Split na dane treningowe i walidacyjne
def split_train_val(paths, labels):
    n = len(paths)
    idx = tf.random.shuffle(tf.range(n), seed=SEED).numpy()
    cut = int(n*(1-VAL_SPLIT))
    tr, va = idx[:cut], idx[cut:]
    tr_paths  = [paths[i] for i in tr]; tr_labels = [labels[i] for i in tr]
    va_paths  = [paths[i] for i in va]; va_labels = [labels[i] for i in va]
    return tr_paths, tr_labels, va_paths, va_labels

#Tworzenie folda na potrzeby LODO
def build_fold(train_pairs, test_pair):
    # train_pairs: list of (train_paths, train_labels) for 2 datasets
    # test_pair:   (test_paths, test_labels) for held-out dataset
    train_paths = sum([p for p,_ in train_pairs], [])
    train_labels= sum([y for _,y in train_pairs], [])

    trp, try_, vap, vay = split_train_val(train_paths, train_labels)

    train_ds = make_ds(trp, try_, True)
    val_ds   = make_ds(vap, vay, False)

    test_paths, test_labels = test_pair
    test_ds  = make_ds(test_paths, test_labels, False)

    return train_ds, val_ds, test_ds


In [14]:
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input
from sklearn.metrics import roc_auc_score, balanced_accuracy_score, f1_score, precision_score, recall_score
import numpy as np

def probs_and_labels(ds, model):
    y_true = np.concatenate([y.numpy().reshape(-1) for _, y in ds], axis=0).astype(int)
    y_prob = model.predict(ds, verbose=0).reshape(-1)
    return y_true, y_prob

def best_threshold_on_val(model, val_ds):
    yv, pv = probs_and_labels(val_ds, model)
    ths = np.linspace(0.05, 0.95, 91)
    best = {"t": 0.5, "f1": -1}
    for t in ths:
        pred = (pv >= t).astype(int)
        f1 = f1_score(yv, pred)
        if f1 > best["f1"]:
            best = {
                "t": float(t),
                "f1": float(f1),
                "precision": float(precision_score(yv, pred)),
                "recall": float(recall_score(yv, pred)),
                "bal_acc": float(balanced_accuracy_score(yv, pred)),
            }
    return best

def run_fold(train_ds, val_ds, test_ds):
    base = EfficientNetB0(include_top=False, weights="imagenet", input_shape=(256,256,3))
    base.trainable = False

    model = tf.keras.Sequential([
        tf.keras.layers.Input((256,256,3)),
        tf.keras.layers.Lambda(preprocess_input),
        base,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(1, activation="sigmoid")
    ])

    def compile_model(lr):
        model.compile(
            optimizer=tf.keras.optimizers.Adam(lr),
            loss="binary_crossentropy",
            metrics=[tf.keras.metrics.AUC(name="auc"),
                     tf.keras.metrics.BinaryAccuracy(name="acc", threshold=0.5)]
        )

    # stage 1
    compile_model(1e-3)
    model.fit(train_ds, validation_data=val_ds, epochs=3,
              callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=1, restore_best_weights=True)])

    # stage 2 fine-tune
    base.trainable = True
    for layer in base.layers[:-30]:
        layer.trainable = False

    compile_model(1e-5)
    model.fit(train_ds, validation_data=val_ds, epochs=3,
              callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=1, restore_best_weights=True)])

    # threshold on val
    th = best_threshold_on_val(model, val_ds)

    # test metrics
    yt, pt = probs_and_labels(test_ds, model)
    pred = (pt >= th["t"]).astype(int)

    return {
        "model": model,
        "auc": float(roc_auc_score(yt, pt)),
        "bal_acc": float(balanced_accuracy_score(yt, pred)),
        "f1": float(f1_score(yt, pred)),
        "precision": float(precision_score(yt, pred)),
        "recall": float(recall_score(yt, pred)),
        "thr": th
    }


In [15]:
import numpy as np
from sklearn.metrics import confusion_matrix

def get_y_true(ds):
    return np.concatenate([y.numpy().reshape(-1) for _, y in ds]).astype(int)

def get_y_prob(model, ds):
    return model.predict(ds, verbose=0).reshape(-1)

# Dataset units
A_train = (train_dir_A, train_labels_A)
A_test  = (test_dir_A,  test_labels_A)
B_train = (train_dir_B, train_labels_B)
B_test  = (test_dir_B,  test_labels_B)
C_train = (train_dir_C, train_labels_C)
C_test  = (test_dir_C,  test_labels_C)

folds = [
    ([A_train, B_train], C_test, "AB->C"),
    ([A_train, C_train], B_test, "AC->B"),
    ([B_train, C_train], A_test, "BC->A"),
]

results = []
for train_pairs, test_pair, name in folds:
    train_ds, val_ds, test_ds = build_fold(train_pairs, test_pair)

    r = run_fold(train_ds, val_ds, test_ds)   # <- r must include r["model"] and r["thr"]["t"] (or pick 0.5)
    r["fold"] = name

    model = r["model"]
    thr = r["thr"]["t"] if isinstance(r.get("thr"), dict) and "t" in r["thr"] else 0.5

    y_true = get_y_true(test_ds)
    y_prob = get_y_prob(model, test_ds)
    y_pred = (y_prob >= thr).astype(int)

    cm = confusion_matrix(y_true, y_pred, labels=[0,1])
    r["cm"] = cm

    results.append(r)

    print("\n", name, {k: r[k] for k in ["auc","bal_acc","f1","precision","recall"] if k in r})
    print("threshold:", thr)
    print("confusion matrix [[TN FP],[FN TP]]:\n", cm)

# average
for k in ["auc","bal_acc","f1","precision","recall"]:
    vals = [r[k] for r in results]
    print(k, "mean±std =", float(np.mean(vals)), "±", float(np.std(vals)))



Epoch 1/3
[1m5761/5761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3270s[0m 566ms/step - acc: 0.7094 - auc: 0.7826 - loss: 0.5599 - val_acc: 0.7539 - val_auc: 0.8382 - val_loss: 0.5051
Epoch 2/3
[1m5761/5761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3197s[0m 555ms/step - acc: 0.7229 - auc: 0.7982 - loss: 0.5442 - val_acc: 0.7608 - val_auc: 0.8423 - val_loss: 0.4973
Epoch 3/3
[1m5761/5761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3211s[0m 557ms/step - acc: 0.7232 - auc: 0.7995 - loss: 0.5429 - val_acc: 0.7559 - val_auc: 0.8422 - val_loss: 0.4994
Epoch 1/3
[1m5761/5761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3821s[0m 662ms/step - acc: 0.7797 - auc: 0.8646 - loss: 0.4569 - val_acc: 0.8823 - val_auc: 0.9525 - val_loss: 0.2865
Epoch 2/3
[1m5761/5761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3740s[0m 649ms/step - acc: 0.8743 - auc: 0.9473 - loss: 0.2941 - val_acc: 0.9172 - val_auc: 0.9755 - val_loss: 0.2057
Epoch 3/3
[1m5761/5761[0m [32m━━━━━━━━━━━━━━━━━

Epoch 1/3
5761/5761 ━━━━━━━━━━━━━━━━━━━━ 3270s 566ms/step - acc: 0.7094 - auc: 0.7826 - loss: 0.5599 - val_acc: 0.7539 - val_auc: 0.8382 - val_loss: 0.5051
Epoch 2/3
5761/5761 ━━━━━━━━━━━━━━━━━━━━ 3197s 555ms/step - acc: 0.7229 - auc: 0.7982 - loss: 0.5442 - val_acc: 0.7608 - val_auc: 0.8423 - val_loss: 0.4973
Epoch 3/3
5761/5761 ━━━━━━━━━━━━━━━━━━━━ 3211s 557ms/step - acc: 0.7232 - auc: 0.7995 - loss: 0.5429 - val_acc: 0.7559 - val_auc: 0.8422 - val_loss: 0.4994
Epoch 1/3
5761/5761 ━━━━━━━━━━━━━━━━━━━━ 3821s 662ms/step - acc: 0.7797 - auc: 0.8646 - loss: 0.4569 - val_acc: 0.8823 - val_auc: 0.9525 - val_loss: 0.2865
Epoch 2/3
5761/5761 ━━━━━━━━━━━━━━━━━━━━ 3740s 649ms/step - acc: 0.8743 - auc: 0.9473 - loss: 0.2941 - val_acc: 0.9172 - val_auc: 0.9755 - val_loss: 0.2057
Epoch 3/3
5761/5761 ━━━━━━━━━━━━━━━━━━━━ 3767s 654ms/step - acc: 0.9037 - auc: 0.9670 - loss: 0.2331 - val_acc: 0.9338 - val_auc: 0.9833 - val_loss: 0.1685

 AB->C {'auc': 0.4099789670483758, 'bal_acc': 0.5068357092778687, 'f1': 0.3614457831325301, 'precision': 0.2229299363057325, 'recall': 0.9545454545454546}
threshold: 0.43999999999999995
confusion matrix [[TN FP],[FN TP]]:
 [[ 23 366]
 [  5 105]]
Epoch 1/3
2273/2273 ━━━━━━━━━━━━━━━━━━━━ 1279s 560ms/step - acc: 0.7757 - auc: 0.8587 - loss: 0.4707 - val_acc: 0.8243 - val_auc: 0.9117 - val_loss: 0.4005
Epoch 2/3
2273/2273 ━━━━━━━━━━━━━━━━━━━━ 1274s 560ms/step - acc: 0.8066 - auc: 0.8870 - loss: 0.4234 - val_acc: 0.8284 - val_auc: 0.9244 - val_loss: 0.3840
Epoch 3/3
2273/2273 ━━━━━━━━━━━━━━━━━━━━ 1282s 564ms/step - acc: 0.8098 - auc: 0.8901 - loss: 0.4176 - val_acc: 0.8467 - val_auc: 0.9243 - val_loss: 0.3627
Epoch 1/3
2273/2273 ━━━━━━━━━━━━━━━━━━━━ 1570s 686ms/step - acc: 0.8249 - auc: 0.9053 - loss: 0.3890 - val_acc: 0.9021 - val_auc: 0.9657 - val_loss: 0.2496
Epoch 2/3
2273/2273 ━━━━━━━━━━━━━━━━━━━━ 1538s 677ms/step - acc: 0.8882 - auc: 0.9564 - loss: 0.2694 - val_acc: 0.9313 - val_auc: 0.9814 - val_loss: 0.1841
Epoch 3/3
2273/2273 ━━━━━━━━━━━━━━━━━━━━ 1522s 670ms/step - acc: 0.9134 - auc: 0.9729 - loss: 0.2139 - val_acc: 0.9429 - val_auc: 0.9879 - val_loss: 0.1507

 AC->B {'auc': 0.5179217232017711, 'bal_acc': 0.5132683126820073, 'f1': 0.5412313271738192, 'precision': 0.50810635538262, 'recall': 0.5789765379641604}
threshold: 0.3499999999999999
confusion matrix [[TN FP],[FN TP]]:
 [[2458 3034]
 [2279 3134]]
Epoch 1/3
3512/3512 ━━━━━━━━━━━━━━━━━━━━ 2000s 568ms/step - acc: 0.7692 - auc: 0.8514 - loss: 0.4771 - val_acc: 0.8074 - val_auc: 0.8899 - val_loss: 0.4234
Epoch 2/3
3512/3512 ━━━━━━━━━━━━━━━━━━━━ 1959s 558ms/step - acc: 0.7842 - auc: 0.8665 - loss: 0.4549 - val_acc: 0.8115 - val_auc: 0.8940 - val_loss: 0.4184
Epoch 3/3
3512/3512 ━━━━━━━━━━━━━━━━━━━━ 1971s 561ms/step - acc: 0.7866 - auc: 0.8688 - loss: 0.4513 - val_acc: 0.8134 - val_auc: 0.8963 - val_loss: 0.4142
Epoch 1/3
3512/3512 ━━━━━━━━━━━━━━━━━━━━ 2393s 679ms/step - acc: 0.8261 - auc: 0.9101 - loss: 0.3784 - val_acc: 0.9056 - val_auc: 0.9675 - val_loss: 0.2345
Epoch 2/3
3512/3512 ━━━━━━━━━━━━━━━━━━━━ 2324s 662ms/step - acc: 0.8980 - auc: 0.9636 - loss: 0.2442 - val_acc: 0.9296 - val_auc: 0.9810 - val_loss: 0.1781
Epoch 3/3
3512/3512 ━━━━━━━━━━━━━━━━━━━━ 2371s 675ms/step - acc: 0.9159 - auc: 0.9745 - loss: 0.2040 - val_acc: 0.9394 - val_auc: 0.9859 - val_loss: 0.1529

 BC->A {'auc': 0.9723292243594156, 'bal_acc': 0.9270961282602148, 'f1': 0.9278915229885057, 'precision': 0.9484167049105094, 'recall': 0.9082359145644722}
threshold: 0.41999999999999993
confusion matrix [[TN FP],[FN TP]]:
 [[ 9837   562]
 [ 1044 10333]]
auc mean±std = 0.6334099715365208 ± 0.24367000171272593
bal_acc mean±std = 0.6490667167400302 ± 0.19661402094685215
f1 mean±std = 0.6101895444316184 ± 0.2363353843527968
precision mean±std = 0.559817665532954 ± 0.29842733068900634
recall mean±std = 0.813919302358029 ± 0.1672019144210321

In [17]:
# --- FINAL TRAIN POOL: A+B+C (train splits only) ---
ALL_train_paths  = train_dir_A + train_dir_B + train_dir_C
ALL_train_labels = train_labels_A + train_labels_B + train_labels_C

# internal split for early stopping + threshold calibration
trp, try_, vap, vay = split_train_val(ALL_train_paths, ALL_train_labels)

final_train_ds = make_ds(trp, try_, training=True)
final_val_ds   = make_ds(vap, vay, training=False)


from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input

base = EfficientNetB0(include_top=False, weights="imagenet", input_shape=(256,256,3))
base.trainable = False

final_model = tf.keras.Sequential([
    tf.keras.layers.Input((256,256,3)),
    tf.keras.layers.Lambda(preprocess_input),
    base,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

def compile_final(lr):
    final_model.compile(
        optimizer=tf.keras.optimizers.Adam(lr),
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.AUC(name="auc"),
                 tf.keras.metrics.BinaryAccuracy(name="acc", threshold=0.5)]
    )

# stage 1
compile_final(1e-3)
final_model.fit(
    final_train_ds,
    validation_data=final_val_ds,
    epochs=3,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=1, restore_best_weights=True)]
)

# stage 2 fine-tune
base.trainable = True
for layer in base.layers[:-30]:
    layer.trainable = False

compile_final(1e-5)
final_model.fit(
    final_train_ds,
    validation_data=final_val_ds,
    epochs=3,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=1, restore_best_weights=True)]
)


# threshold calibrated on internal validation set
final_thr = best_threshold_on_val(final_model, final_val_ds)
print("FINAL threshold:", final_thr)

from pathlib import Path
import json

Path("Models").mkdir(exist_ok=True)

# save model
final_model.save("Models/deepfake_detector.keras")

# save threshold + val metrics (so your app can use the same threshold)
with open("Models/threshold.json", "w", encoding="utf-8") as f:
    json.dump(final_thr, f, indent=2)

print("Saved:",
      Path("Models/deepfake_detector.keras").resolve(),
      Path("Models/threshold.json").resolve())


Epoch 1/3
[1m5773/5773[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3260s[0m 564ms/step - acc: 0.7071 - auc: 0.7808 - loss: 0.5617 - val_acc: 0.7588 - val_auc: 0.8400 - val_loss: 0.5007
Epoch 2/3
[1m5773/5773[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3199s[0m 554ms/step - acc: 0.7229 - auc: 0.7968 - loss: 0.5459 - val_acc: 0.7607 - val_auc: 0.8426 - val_loss: 0.4959
Epoch 3/3
[1m5773/5773[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3099s[0m 537ms/step - acc: 0.7246 - auc: 0.7990 - loss: 0.5438 - val_acc: 0.7646 - val_auc: 0.8456 - val_loss: 0.4936
Epoch 1/3
[1m5773/5773[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3723s[0m 643ms/step - acc: 0.7795 - auc: 0.8629 - loss: 0.4596 - val_acc: 0.8801 - val_auc: 0.9513 - val_loss: 0.2896
Epoch 2/3
[1m5773/5773[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3698s[0m 641ms/step - acc: 0.8749 - auc: 0.9476 - loss: 0.2935 - val_acc: 0.9166 - val_auc: 0.9745 - val_loss: 0.2098
Epoch 3/3
[1m5773/5773[0m [32m━━━━━━━━━━━━━━━━━

Epoch 1/3
5773/5773 ━━━━━━━━━━━━━━━━━━━━ 3260s 564ms/step - acc: 0.7071 - auc: 0.7808 - loss: 0.5617 - val_acc: 0.7588 - val_auc: 0.8400 - val_loss: 0.5007
Epoch 2/3
5773/5773 ━━━━━━━━━━━━━━━━━━━━ 3199s 554ms/step - acc: 0.7229 - auc: 0.7968 - loss: 0.5459 - val_acc: 0.7607 - val_auc: 0.8426 - val_loss: 0.4959
Epoch 3/3
5773/5773 ━━━━━━━━━━━━━━━━━━━━ 3099s 537ms/step - acc: 0.7246 - auc: 0.7990 - loss: 0.5438 - val_acc: 0.7646 - val_auc: 0.8456 - val_loss: 0.4936
Epoch 1/3
5773/5773 ━━━━━━━━━━━━━━━━━━━━ 3723s 643ms/step - acc: 0.7795 - auc: 0.8629 - loss: 0.4596 - val_acc: 0.8801 - val_auc: 0.9513 - val_loss: 0.2896
Epoch 2/3
5773/5773 ━━━━━━━━━━━━━━━━━━━━ 3698s 641ms/step - acc: 0.8749 - auc: 0.9476 - loss: 0.2935 - val_acc: 0.9166 - val_auc: 0.9745 - val_loss: 0.2098
Epoch 3/3
5773/5773 ━━━━━━━━━━━━━━━━━━━━ 3699s 641ms/step - acc: 0.9030 - auc: 0.9665 - loss: 0.2349 - val_acc: 0.9332 - val_auc: 0.9826 - val_loss: 0.1707
FINAL threshold: {'t': 0.43999999999999995, 'f1': 0.9367072789753202, 'precision': 0.9324156231860021, 'recall': 0.9410386240950747, 'bal_acc': 0.9339410615201822}
Saved: C:\Users\blend\Inzynierka\Models\deepfake_detector.keras C:\Users\blend\Inzynierka\Models\threshold.json

In [24]:
from pathlib import Path

Path("Modele").mkdir(exist_ok=True)

# TensorFlow checkpoint format (recommended; avoids .h5 / h5py issues)
final_model.save_weights("Modele/deepfake_model_weights.weights.h5")
with open("Modele/threshold.json", "w", encoding="utf-8") as f:
    json.dump(final_thr, f, indent=2)

print("Saved weights:")
print(Path("Models").resolve())


Saved weights:
C:\Users\blend\Inzynierka\Models


In [22]:
import sys, tensorflow as tf, keras
print("python:", sys.executable)
print("tf:", tf.__version__)
print("keras:", keras.__version__)


python: C:\Users\blend\anaconda3\python.exe
tf: 2.20.0
keras: 3.13.1


In [28]:
# ===== CNN (baseline) =====
TH = 0.5  # próg klasyfikacji

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(256, 256, 3)),

    tf.keras.layers.Conv2D(32, 3, padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(64, 3, padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(128, 3, padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name="acc", threshold=TH),
        tf.keras.metrics.Precision(name="precision", thresholds=TH),
        tf.keras.metrics.Recall(name="recall", thresholds=TH),
        tf.keras.metrics.AUC(name="auc"),
    ]
)

model.summary()

# ===== Trening (na start krótko) =====
history = model.fit(
    model_train,
    validation_data=model_val,
    epochs=3,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=1, restore_best_weights=True)
    ]
)

# ===== Ewaluacja na teście =====
print("\nTEST (Keras metrics):")
print(dict(zip(model.metrics_names, model.evaluate(model_test, return_dict=True, verbose=0))))

# ===== Dodatkowe metryki: confusion matrix, report, ROC-AUC =====
y_true = np.concatenate([y.numpy().reshape(-1) for _, y in model_test], axis=0)
y_prob = model.predict(model_test, verbose=0).reshape(-1)
y_pred = (y_prob >= TH).astype(int)

try:
    from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score

    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion matrix:\n", cm)

    # class_names: np. ['fake','real'] => 0->fake, 1->real
    print("\nClassification report:\n",
          classification_report(y_true, y_pred, target_names=class_names, digits=4))

    print("ROC-AUC:", roc_auc_score(y_true, y_prob))
except Exception as e:
    print("Brak sklearn lub błąd w metrykach dodatkowych:", e)


Epoch 1/3
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m853s[0m 377ms/step - acc: 0.5680 - auc: 0.5908 - loss: 0.6810 - precision: 0.5655 - recall: 0.8652 - val_acc: 0.5925 - val_auc: 0.6338 - val_loss: 0.6744 - val_precision: 0.6447 - val_recall: 0.5411
Epoch 2/3
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m863s[0m 382ms/step - acc: 0.6122 - auc: 0.6464 - loss: 0.6612 - precision: 0.6290 - recall: 0.6880 - val_acc: 0.6326 - val_auc: 0.6925 - val_loss: 0.6503 - val_precision: 0.6102 - val_recall: 0.8789
Epoch 3/3
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m864s[0m 382ms/step - acc: 0.6526 - auc: 0.7003 - loss: 0.6313 - precision: 0.6686 - recall: 0.7082 - val_acc: 0.6061 - val_auc: 0.7240 - val_loss: 0.6571 - val_precision: 0.5808 - val_recall: 0.9643

TEST (Keras metrics):
{'loss': 0.7267244458198547, 'compile_metrics': 0.4897134602069855}

Confusion matrix:
 [[3630 6769]
 [4343 7034]]

Classification report:
               precision 

Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                 │ (None, 256, 256, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 128, 128, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 128, 128, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 64, 64, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 64, 64, 128)    │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_2 (MaxPooling2D)  │ (None, 32, 32, 128)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 128)            │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 1)              │           129 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 93,377 (364.75 KB)
 Trainable params: 93,377 (364.75 KB)
 Non-trainable params: 0 (0.00 B)
Epoch 1/3
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 853s 377ms/step - acc: 0.5680 - auc: 0.5908 - loss: 0.6810 - precision: 0.5655 - recall: 0.8652 - val_acc: 0.5925 - val_auc: 0.6338 - val_loss: 0.6744 - val_precision: 0.6447 - val_recall: 0.5411
Epoch 2/3
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 863s 382ms/step - acc: 0.6122 - auc: 0.6464 - loss: 0.6612 - precision: 0.6290 - recall: 0.6880 - val_acc: 0.6326 - val_auc: 0.6925 - val_loss: 0.6503 - val_precision: 0.6102 - val_recall: 0.8789
Epoch 3/3
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 864s 382ms/step - acc: 0.6526 - auc: 0.7003 - loss: 0.6313 - precision: 0.6686 - recall: 0.7082 - val_acc: 0.6061 - val_auc: 0.7240 - val_loss: 0.6571 - val_precision: 0.5808 - val_recall: 0.9643

TEST (Keras metrics):
{'loss': 0.7267244458198547, 'compile_metrics': 0.4897134602069855}

Confusion matrix:
 [[3630 6769]
 [4343 7034]]

Classification report:
               precision    recall  f1-score   support

        fake     0.4553    0.3491    0.3952     10399
        real     0.5096    0.6183    0.5587     11377

    accuracy                         0.4897     21776
   macro avg     0.4824    0.4837    0.4769     21776
weighted avg     0.4837    0.4897    0.4806     21776

ROC-AUC: 0.4987999138496348

In [9]:
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score

TH = 0.45

base = EfficientNetB0(include_top=False, weights="imagenet", input_shape=(256,256,3))
base.trainable = False

model = tf.keras.Sequential([
    tf.keras.layers.Input((256,256,3)),
    tf.keras.layers.Lambda(preprocess_input),
    base,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

def compile_model(lr):
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr),
        loss="binary_crossentropy",
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name="acc", threshold=TH),
            tf.keras.metrics.Precision(name="precision", thresholds=TH),
            tf.keras.metrics.Recall(name="recall", thresholds=TH),
            tf.keras.metrics.AUC(name="auc"),
        ]
    )

# ---- Stage 1: train head (feature extractor) ----
compile_model(1e-3)
history_1 = model.fit(
    model_train,
    validation_data=model_val,
    epochs=3,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=1, restore_best_weights=True)]
)

# ---- Stage 2: fine-tuning ----
base.trainable = True
for layer in base.layers[:-30]:
    layer.trainable = False

compile_model(1e-5)
history_2 = model.fit(
    model_train,
    validation_data=model_val,
    epochs=3,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=1, restore_best_weights=True)]
)

# ---- Final test evaluation ----
print("Test (Keras):", model.evaluate(model_test, return_dict=True, verbose=0))

y_true = np.concatenate([y.numpy().reshape(-1) for _, y in model_test], axis=0)
y_prob = model.predict(model_test, verbose=0).reshape(-1)
y_pred = (y_prob >= TH).astype(int)

print("ROC-AUC:", roc_auc_score(y_true, y_prob))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=class_names, digits=4))



Epoch 1/6
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1253s[0m 552ms/step - acc: 0.7688 - auc: 0.8620 - loss: 0.4654 - precision: 0.7410 - recall: 0.8793 - val_acc: 0.8344 - val_auc: 0.9120 - val_loss: 0.3980 - val_precision: 0.8422 - val_recall: 0.8522
Epoch 2/6
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1193s[0m 528ms/step - acc: 0.8017 - auc: 0.8900 - loss: 0.4174 - precision: 0.7816 - recall: 0.8784 - val_acc: 0.8428 - val_auc: 0.9222 - val_loss: 0.3664 - val_precision: 0.8284 - val_recall: 0.8932
Epoch 3/6
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1252s[0m 554ms/step - acc: 0.8072 - auc: 0.8936 - loss: 0.4104 - precision: 0.7877 - recall: 0.8806 - val_acc: 0.8525 - val_auc: 0.9280 - val_loss: 0.3586 - val_precision: 0.8524 - val_recall: 0.8783
Epoch 4/6
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1247s[0m 551ms/step - acc: 0.8074 - auc: 0.8950 - loss: 0.4075 - precision: 0.7899 - recall: 0.8766 - val_acc: 0

Epoch 1/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1253s 552ms/step - acc: 0.7688 - auc: 0.8620 - loss: 0.4654 - precision: 0.7410 - recall: 0.8793 - val_acc: 0.8344 - val_auc: 0.9120 - val_loss: 0.3980 - val_precision: 0.8422 - val_recall: 0.8522
Epoch 2/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1193s 528ms/step - acc: 0.8017 - auc: 0.8900 - loss: 0.4174 - precision: 0.7816 - recall: 0.8784 - val_acc: 0.8428 - val_auc: 0.9222 - val_loss: 0.3664 - val_precision: 0.8284 - val_recall: 0.8932
Epoch 3/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1252s 554ms/step - acc: 0.8072 - auc: 0.8936 - loss: 0.4104 - precision: 0.7877 - recall: 0.8806 - val_acc: 0.8525 - val_auc: 0.9280 - val_loss: 0.3586 - val_precision: 0.8524 - val_recall: 0.8783
Epoch 4/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1247s 551ms/step - acc: 0.8074 - auc: 0.8950 - loss: 0.4075 - precision: 0.7899 - recall: 0.8766 - val_acc: 0.8543 - val_auc: 0.9300 - val_loss: 0.3519 - val_precision: 0.8502 - val_recall: 0.8855
Epoch 5/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1231s 545ms/step - acc: 0.8095 - auc: 0.8955 - loss: 0.4067 - precision: 0.7917 - recall: 0.8785 - val_acc: 0.8527 - val_auc: 0.9297 - val_loss: 0.3486 - val_precision: 0.8416 - val_recall: 0.8951
Epoch 1/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1502s 660ms/step - acc: 0.8188 - auc: 0.9052 - loss: 0.3917 - precision: 0.7989 - recall: 0.8882 - val_acc: 0.9061 - val_auc: 0.9682 - val_loss: 0.2370 - val_precision: 0.9051 - val_recall: 0.9225
Epoch 2/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1474s 652ms/step - acc: 0.8838 - auc: 0.9559 - loss: 0.2683 - precision: 0.8709 - recall: 0.9215 - val_acc: 0.9344 - val_auc: 0.9831 - val_loss: 0.1764 - val_precision: 0.9396 - val_recall: 0.9385
Epoch 3/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1493s 661ms/step - acc: 0.9109 - auc: 0.9720 - loss: 0.2149 - precision: 0.9005 - recall: 0.9388 - val_acc: 0.9520 - val_auc: 0.9899 - val_loss: 0.1383 - val_precision: 0.9562 - val_recall: 0.9546
Epoch 4/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1469s 650ms/step - acc: 0.9295 - auc: 0.9815 - loss: 0.1755 - precision: 0.9207 - recall: 0.9514 - val_acc: 0.9606 - val_auc: 0.9928 - val_loss: 0.1148 - val_precision: 0.9629 - val_recall: 0.9640
Epoch 5/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1425s 630ms/step - acc: 0.9394 - auc: 0.9863 - loss: 0.1505 - precision: 0.9310 - recall: 0.9589 - val_acc: 0.9656 - val_auc: 0.9945 - val_loss: 0.1001 - val_precision: 0.9694 - val_recall: 0.9666
Epoch 6/6
2261/2261 ━━━━━━━━━━━━━━━━━━━━ 1469s 650ms/step - acc: 0.9468 - auc: 0.9893 - loss: 0.1327 - precision: 0.9392 - recall: 0.9638 - val_acc: 0.9697 - val_auc: 0.9958 - val_loss: 0.0896 - val_precision: 0.9759 - val_recall: 0.9677
Test (Keras): {'acc': 0.5729243159294128, 'auc': 0.6681026220321655, 'loss': 1.969002604484558, 'precision': 0.9001926779747009, 'recall': 0.20532654225826263}
ROC-AUC: 0.6697694612203459
Confusion matrix:
 [[10140   259]
 [ 9041  2336]]
              precision    recall  f1-score   support

        fake     0.5286    0.9751    0.6856     10399
        real     0.9002    0.2053    0.3344     11377

    accuracy                         0.5729     21776
   macro avg     0.7144    0.5902    0.5100     21776
weighted avg     0.7228    0.5729    0.5021     21776