
# Unmasking Deepfakes: Robust and Interpretable ML Approaches
**Author:** Gaurav Dalvi 
**Use:** Paste your dataset path and run top-to-bottom.  

## 1. Setup & Environment

In [1]:
import sys, subprocess, pkgutil

print("Installing into:", sys.executable)

# Packages we need (TF 2.16 works with Python 3.11)
pkgs = [
    "numpy",
    "pandas",
    "scikit-learn",
    "scikit-image",
    "opencv-python",
    "matplotlib",
    "joblib",
    "tensorflow==2.16.*",  # regular TF; 'tensorflow-metal' is optional
]

for p in pkgs:
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", p])
    except subprocess.CalledProcessError:
        # fallback for locked systems
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--user", p])

print("Done. Restart the kernel if imports still fail (but try the next cell first).")


Installing into: /Users/zaddyd/Desktop/DeepFake Detection Project/.venv/bin/python
Done. Restart the kernel if imports still fail (but try the next cell first).


In [2]:
import sys, subprocess

print("Installing into:", sys.executable)
# Keep versions compatible with TF 2.16
pkgs = [
    "numpy==1.26.4",
    "tensorflow==2.16.*",
    "opencv-python==4.10.0.84",
    "scikit-image==0.22.0",   # works well with numpy 1.26
    "pandas",
    "scikit-learn",
    "matplotlib",
    "joblib",
]

for p in pkgs:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", p])

print("Done. If imports still use old versions, restart the kernel and run the next cell.")

Installing into: /Users/zaddyd/Desktop/DeepFake Detection Project/.venv/bin/python
Done. If imports still use old versions, restart the kernel and run the next cell.


In [3]:
import os, sys, math, json, time, random, shutil, gc, itertools
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support, roc_auc_score,
                             confusion_matrix, roc_curve, auc, classification_report)
from sklearn.model_selection import train_test_split

from skimage.feature import local_binary_pattern, hog

# Prefer new location + American spelling
try:
    from skimage.feature.texture import graycomatrix, graycoprops  # skimage >= 0.20
except Exception:
    try:
        from skimage.feature import graycomatrix, graycoprops      # sometimes re-exported
    except Exception:
        # Last resort: older API (British spelling)
        from skimage.feature import greycomatrix as graycomatrix, greycoprops as graycoprops
import cv2

import tensorflow as tf
from tensorflow.keras.applications import vgg16, resnet50
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input as vgg_preprocess
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input as resnet_preprocess
from tensorflow.keras.preprocessing import image as kimage
from tensorflow.keras import Model

np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

print("Python:", sys.version)
print("NumPy:", np.__version__)
print("Pandas:", pd.__version__)
import skimage, sklearn
print("scikit-image:", skimage.__version__)
print("scikit-learn:", sklearn.__version__)
print("OpenCV:", cv2.__version__)
print("TensorFlow:", tf.__version__)

# Create artefacts dir
ARTE_DIR = Path("artefacts")
ARTE_DIR.mkdir(exist_ok=True)
print("Artefacts dir:", ARTE_DIR.resolve())

Python: 3.11.13 (main, Jun  3 2025, 18:38:25) [Clang 17.0.0 (clang-1700.0.13.3)]
NumPy: 1.26.4
Pandas: 2.3.2
scikit-image: 0.22.0
scikit-learn: 1.7.2
OpenCV: 4.10.0
TensorFlow: 2.16.2
Artefacts dir: /Users/zaddyd/Desktop/DeepFake Detection Project/artefacts


## 2. Configuration

In [4]:
DATASET_ROOT = Path("/Users/zaddyd/Desktop/DeepFake Detection Project/PhDPeterDetaset")

# If your class folder names differ (e.g., 'fake' vs 'real'), update here:
CLASS_NAMES = ["fake", "real"]  # order matters for label encoding

# Image sizing for feature extraction
IMG_SIZE = (224, 224)  # for CNN backbones
GRAY_SIZE = (256, 256) # for handcrafted features; kept slightly larger for textures

# Feature flags
USE_LBP = True
USE_HOG = True
USE_GLCM = True
USE_DCT = True

USE_VGG16 = True
USE_RESNET50 = False  # You can enable later if you want to concatenate both CNNs

# Compute controls
MAX_IMAGES_PER_CLASS_PER_SPLIT = None   # e.g., 150 to cap; set None for all
CACHE_DIR = Path("cache_features")
CACHE_DIR.mkdir(exist_ok=True)

# SVM / RF configs
SVM_C = 10.0
SVM_KERNEL = "rbf"
SVM_GAMMA = "scale"

RF_TREES = 300
RF_MAX_DEPTH = None

# Robustness perturbations
ROBUSTNESS = {
    "jpeg_50": {"jpeg_quality": 50},
    "blur_3": {"blur_ksize": 3},
    "gauss_noise_0.02": {"gauss_sigma": 0.02},
}

print("Dataset root:", DATASET_ROOT.resolve())

Dataset root: /Users/zaddyd/Desktop/DeepFake Detection Project/PhDPeterDetaset


## 3. Dataset Scan & Indexing

In [5]:
def scan_split(split_dir: Path, class_names, max_per_class=None):
    rows = []
    for lbl, cname in enumerate(class_names):
        cdir = split_dir / cname
        if not cdir.exists():
            continue
        images = []
        for ext in ("*.jpg","*.jpeg","*.png","*.bmp","*.tif","*.tiff","*.webp"):
            images.extend(list(cdir.rglob(ext)))
        if max_per_class:
            images = images[:max_per_class]
        for p in images:
            rows.append({"path": str(p), "label": lbl, "class": cname})
    return pd.DataFrame(rows)

def build_index(root: Path, class_names, max_per_class=None):
    idx = {}
    for split in ["train","val","test"]:
        split_dir = root / split
        if not split_dir.exists():
            print(f"[WARN] Missing split: {split_dir}")
            idx[split] = pd.DataFrame(columns=["path","label","class"])
        else:
            idx[split] = scan_split(split_dir, class_names, max_per_class)
            print(split, ":", idx[split].shape)
    return idx

index = build_index(DATASET_ROOT, CLASS_NAMES, MAX_IMAGES_PER_CLASS_PER_SPLIT)

# Save indices
for k,v in index.items():
    v.to_csv(ARTE_DIR / f"{k}_index.csv", index=False)
print("Saved split indices to artefacts/.")

train : (14000, 3)
val : (6000, 3)
test : (2000, 3)
Saved split indices to artefacts/.


## 4. Handcrafted Feature Extractors (LBP, HOG, GLCM, DCT)

In [6]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from skimage import io, color, transform, util
from skimage.feature import local_binary_pattern, hog, graycomatrix, graycoprops

# Define constants if not set earlier
GRAY_SIZE = (128, 128)  # adjust to your project
USE_LBP = True
USE_HOG = True
USE_GLCM = True
USE_DCT = True


def read_gray(path, size=GRAY_SIZE):
    img = io.imread(path)
    if img.ndim == 3:
        img = color.rgb2gray(img)
    img = transform.resize(img, size, anti_aliasing=True)
    img = util.img_as_float32(img)
    return img

def feat_lbp(gray, P=8, R=1):
    # LBP expects integers; convert to uint8 to suppress warnings
    g8 = util.img_as_ubyte(gray)  # uint8 [0..255]
    lbp = local_binary_pattern(g8, P=P, R=R, method="uniform")
    n_bins = P + 2
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_bins+1), density=True)
    return hist.astype(np.float32)

def feat_hog(gray):
    h = hog(gray, orientations=9, pixels_per_cell=(16,16), cells_per_block=(2,2),
            block_norm="L2-Hys", transform_sqrt=True, feature_vector=True)
    return h.astype(np.float32)

def feat_glcm(gray):
    g8 = util.img_as_ubyte(gray)  # uint8
    distances = [1, 2, 4]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    glcm = graycomatrix(g8, distances=distances, angles=angles,
                        levels=256, symmetric=True, normed=True)
    props = []
    for prop in ["contrast","dissimilarity","homogeneity","ASM","energy","correlation"]:
        props.append(graycoprops(glcm, prop).ravel())
    return np.concatenate(props).astype(np.float32)

def feat_dct(gray, keep=32):
    # resize to square small size for stable DCT
    g = transform.resize(gray, (128,128), anti_aliasing=True)
    g32 = np.float32(g)
    dct = cv2.dct(g32)
    # take top-left kxk low frequencies
    k = keep
    block = dct[:k, :k]
    return block.flatten().astype(np.float32)

def handcrafted_features(path):
    gray = read_gray(path)
    feats = []
    if USE_LBP:
        feats.append(feat_lbp(gray))
    if USE_HOG:
        feats.append(feat_hog(gray))
    if USE_GLCM:
        feats.append(feat_glcm(gray))
    if USE_DCT:
        feats.append(feat_dct(gray, keep=32))
    if len(feats)==0:
        return np.empty((0,), dtype=np.float32)
    return np.concatenate(feats)

# Quick smoke test on first available image (optional)
for split in ["train","val","test"]:
    if len(index[split])>0:
        sample_path = index[split]["path"].iloc[0]
        print("Sample handcrafted feat length:", handcrafted_features(sample_path).shape[0])
        break

Sample handcrafted feat length: 2870


## 5. CNN Embeddings (VGG16 / ResNet50)

In [7]:

vgg_model = None
resnet_model = None

if USE_VGG16:
    base = VGG16(weights="imagenet", include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    vgg_model = Model(inputs=base.input, outputs=tf.keras.layers.GlobalAveragePooling2D()(base.output))
    print("VGG16 embedding output:", vgg_model.output_shape)

if USE_RESNET50:
    base_r = ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    resnet_model = Model(inputs=base_r.input, outputs=tf.keras.layers.GlobalAveragePooling2D()(base_r.output))
    print("ResNet50 embedding output:", resnet_model.output_shape)

def read_rgb_for_cnn(path, size=IMG_SIZE):
    img = kimage.load_img(path, target_size=size)
    arr = kimage.img_to_array(img)
    arr = np.expand_dims(arr, axis=0)
    return arr

def cnn_embed_batch(paths, backbone="vgg"):
    # batch predict for speed
    X = []
    preprocess = vgg_preprocess if backbone=="vgg" else resnet_preprocess
    model = vgg_model if backbone=="vgg" else resnet_model
    # guard
    if model is None:
        return np.zeros((len(paths), 0), dtype=np.float32)
    batch = []
    for p in paths:
        arr = read_rgb_for_cnn(p)
        batch.append(arr[0])
    batch = np.stack(batch, axis=0)
    batch = preprocess(batch.copy())
    emb = model.predict(batch, verbose=0)
    return emb.astype(np.float32)

VGG16 embedding output: (None, 512)


## 6. Extract & Cache Features

In [8]:

def extract_split_features(df, split, cache_dir=CACHE_DIR):
    cache_path = cache_dir / f"{split}_features.npz"
    if cache_path.exists():
        data = np.load(cache_path, allow_pickle=True)
        return (data["X_hand"], data["X_vgg"], data["X_resnet"], data["y"])

    paths = df["path"].tolist()
    y = df["label"].astype(int).to_numpy()

    # Handcrafted
    Xh = []
    for i,p in enumerate(paths):
        Xh.append(handcrafted_features(p))
        if (i+1)%100==0:
            print(f"{split}: handcrafted {i+1}/{len(paths)}")
    Xh = np.stack(Xh, axis=0) if len(Xh)>0 else np.zeros((len(paths),0), dtype=np.float32)

    # CNNs
    Xv = cnn_embed_batch(paths, backbone="vgg") if USE_VGG16 else np.zeros((len(paths),0), dtype=np.float32)
    Xr = cnn_embed_batch(paths, backbone="resnet") if USE_RESNET50 else np.zeros((len(paths),0), dtype=np.float32)

    np.savez_compressed(cache_path, X_hand=Xh, X_vgg=Xv, X_resnet=Xr, y=y)
    return (Xh, Xv, Xr, y)

feat = {}
for split in ["train","val","test"]:
    if len(index[split])==0:
        feat[split] = (np.zeros((0,0)), np.zeros((0,0)), np.zeros((0,0)), np.zeros((0,)))
        continue
    feat[split] = extract_split_features(index[split], split)
    print(split, "handcrafted:", feat[split][0].shape, "vgg:", feat[split][1].shape, "resnet:", feat[split][2].shape, "labels:", feat[split][3].shape)


train handcrafted: (14000, 2870) vgg: (14000, 512) resnet: (14000, 0) labels: (14000,)
val handcrafted: (6000, 2870) vgg: (6000, 512) resnet: (6000, 0) labels: (6000,)
test handcrafted: (2000, 2870) vgg: (2000, 512) resnet: (2000, 0) labels: (2000,)


## 7. Feature Fusion & Scaling

In [9]:
def fuse_features(Xh, Xv, Xr):
    parts = [X for X in [Xh, Xv, Xr] if X is not None and X.size>0]
    if len(parts)==0:
        raise ValueError("No features enabled. Enable at least one handcrafted or CNN feature.")
    return np.concatenate(parts, axis=1)

def split_trainval(feat_dict):
    Xh_tr, Xv_tr, Xr_tr, y_tr = feat_dict["train"]
    Xh_va, Xv_va, Xr_va, y_va = feat_dict["val"]
    Xh_te, Xv_te, Xr_te, y_te = feat_dict["test"]

    X_tr = fuse_features(Xh_tr, Xv_tr, Xr_tr)
    X_va = fuse_features(Xh_va, Xv_va, Xr_va) if Xh_va.size+Xv_va.size+Xr_va.size>0 else None
    X_te = fuse_features(Xh_te, Xv_te, Xr_te) if Xh_te.size+Xv_te.size+Xr_te.size>0 else None
    return X_tr, y_tr, X_va, y_va, X_te, y_te

X_tr, y_tr, X_va, y_va, X_te, y_te = split_trainval(feat)

scaler = StandardScaler()
X_trs = scaler.fit_transform(X_tr)
X_vas = scaler.transform(X_va) if X_va is not None and X_va.size>0 else None
X_tes = scaler.transform(X_te) if X_te is not None and X_te.size>0 else None

print("Train fused shape:", X_trs.shape)
if X_vas is not None: print("Val fused shape:", X_vas.shape)
if X_tes is not None: print("Test fused shape:", X_tes.shape)

Train fused shape: (14000, 3382)
Val fused shape: (6000, 3382)
Test fused shape: (2000, 3382)


## 8. Train Classifiers (SVM, RandomForest)

In [None]:
models = {}

# SVM
svm_clf = SVC(C=SVM_C, kernel=SVM_KERNEL, gamma=SVM_GAMMA, probability=True, random_state=42)
svm_clf.fit(X_trs, y_tr)
models["SVM"] = svm_clf

# Random Forest
rf_clf = RandomForestClassifier(n_estimators=RF_TREES, max_depth=RF_MAX_DEPTH, random_state=42, n_jobs=-1)
rf_clf.fit(X_trs, y_tr)
models["RF"] = rf_clf

print("Trained models:", list(models.keys()))

## 9. Evaluation Utilities

In [None]:
def eval_model(name, clf, X, y, split_name):
    y_pred = clf.predict(X)
    y_proba = None
    try:
        y_proba = clf.predict_proba(X)[:,1]
    except Exception:
        pass

    acc = accuracy_score(y, y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(y, y_pred, average="binary", pos_label=1, zero_division=0)
    metrics = {"model":name, "split":split_name, "accuracy":acc, "precision":prec, "recall":rec, "f1":f1}
    if y_proba is not None:
        try:
            auc_roc = roc_auc_score(y, y_proba)
        except Exception:
            auc_roc = np.nan
        metrics["auc"] = auc_roc
    else:
        metrics["auc"] = np.nan

    # Confusion matrix
    cm = confusion_matrix(y, y_pred, labels=[0,1])
    return metrics, cm, (y, y_pred, y_proba)

def plot_confusion_matrix(cm, classes, title, fname):
    fig = plt.figure(figsize=(4,4))
    plt.imshow(cm, interpolation='nearest')
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    # annotations
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True')
    plt.xlabel('Predicted')
    fig.tight_layout()
    fig.savefig(ARTE_DIR / fname, dpi=150, bbox_inches="tight")
    plt.show()

def plot_roc(y_true, y_proba, title, fname):
    if y_proba is None:
        print("[ROC] Skipping (no probabilities available)")
        return
    fpr, tpr, _ = roc_curve(y_true, y_proba)
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(5,4))
    plt.plot(fpr, tpr, label=f"AUC={roc_auc:.3f}")
    plt.plot([0,1],[0,1],'--')
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(title)
    plt.legend(loc="lower right")
    plt.savefig(ARTE_DIR / fname, dpi=150, bbox_inches="tight")
    plt.show()


## 10. In-dataset Evaluation (Val/Test)

In [None]:

results = []
for name, clf in models.items():
    if X_vas is not None:
        m, cm, triplet = eval_model(name, clf, X_vas, y_va, "val")
        results.append(m)
        plot_confusion_matrix(cm, classes=CLASS_NAMES, title=f"{name} Confusion Matrix (val)", fname=f"{name}_cm_val.png")
        plot_roc(triplet[0], triplet[2], title=f"{name} ROC (val)", fname=f"{name}_roc_val.png")

    if X_tes is not None:
        m, cm, triplet = eval_model(name, clf, X_tes, y_te, "test")
        results.append(m)
        plot_confusion_matrix(cm, classes=CLASS_NAMES, title=f"{name} Confusion Matrix (test)", fname=f"{name}_cm_test.png")
        plot_roc(triplet[0], triplet[2], title=f"{name} ROC (test)", fname=f"{name}_roc_test.png")

res_df = pd.DataFrame(results)
res_df.to_csv(ARTE_DIR / "results_indataset.csv", index=False)
print(res_df)


## 11. Robustness Evaluation (JPEG, Blur, Noise)

In [None]:

def apply_perturbations_to_paths(paths, tag):
    # returns list of temp file paths after applying a perturbation
    tmp_dir = Path("tmp_perturb") / tag
    if tmp_dir.exists():
        shutil.rmtree(tmp_dir)
    tmp_dir.mkdir(parents=True, exist_ok=True)

    out_paths = []
    for p in paths:
        img = io.imread(p)
        if img.ndim == 2:
            img = color.gray2rgb(img)
        img = util.img_as_ubyte(img)

        if "jpeg_quality" in ROBUSTNESS[tag]:
            q = ROBUSTNESS[tag]["jpeg_quality"]
            tmp = tmp_dir / (Path(p).stem + f"_jpeg{q}.jpg")
            cv2.imwrite(str(tmp), cv2.cvtColor(img, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), q])
            out_paths.append(str(tmp))
        elif "blur_ksize" in ROBUSTNESS[tag]:
            k = ROBUSTNESS[tag]["blur_ksize"]
            if k%2==0: k += 1
            bl = cv2.GaussianBlur(cv2.cvtColor(img, cv2.COLOR_RGB2BGR), (k,k), 0)
            tmp = tmp_dir / (Path(p).stem + f"_blur{k}.png")
            cv2.imwrite(str(tmp), bl)
            out_paths.append(str(tmp))
        elif "gauss_sigma" in ROBUSTNESS[tag]:
            s = ROBUSTNESS[tag]["gauss_sigma"]
            noise = np.random.normal(0, s*255, img.shape).astype(np.int16)
            noisy = np.clip(img.astype(np.int16) + noise, 0, 255).astype(np.uint8)
            tmp = tmp_dir / (Path(p).stem + f"_noise{s}.png")
            cv2.imwrite(str(tmp), cv2.cvtColor(noisy, cv2.COLOR_RGB2BGR))
            out_paths.append(str(tmp))
        else:
            raise ValueError("Unknown perturbation spec.")
    return out_paths

def features_from_paths(paths):
    # handcrafted
    Xh = []
    for i,p in enumerate(paths):
        Xh.append(handcrafted_features(p))
        if (i+1)%100==0:
            print(f"perturb: handcrafted {i+1}/{len(paths)}")
    Xh = np.stack(Xh, axis=0) if len(Xh)>0 else np.zeros((len(paths),0), dtype=np.float32)
    # cnn
    Xv = cnn_embed_batch(paths, backbone="vgg") if USE_VGG16 else np.zeros((len(paths),0), dtype=np.float32)
    Xr = cnn_embed_batch(paths, backbone="resnet") if USE_RESNET50 else np.zeros((len(paths),0), dtype=np.float32)
    X = fuse_features(Xh, Xv, Xr)
    Xs = scaler.transform(X)
    return Xs

if X_tes is not None and len(index["test"])>0:
    test_paths = index["test"]["path"].tolist()
    rob_results = []
    for tag in ROBUSTNESS.keys():
        ppaths = apply_perturbations_to_paths(test_paths, tag)
        Xp = features_from_paths(ppaths)
        y = index["test"]["label"].astype(int).to_numpy()
        for name, clf in models.items():
            m, cm, triplet = eval_model(name, clf, Xp, y, f"test_{tag}")
            rob_results.append(m)
            plot_confusion_matrix(cm, classes=CLASS_NAMES, title=f"{name} Confusion Matrix (test {tag})", fname=f"{name}_cm_test_{tag}.png")
            plot_roc(triplet[0], triplet[2], title=f"{name} ROC (test {tag})", fname=f"{name}_roc_test_{tag}.png")
    rob_df = pd.DataFrame(rob_results)
    rob_df.to_csv(ARTE_DIR / "results_robustness.csv", index=False)
    print(rob_df)
else:
    print("[Robustness] Skipped (no test set).")


## 12. Best Model Selection + Report Tables

In [None]:
import sys, subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "tabulate"])

def pick_best_by(res_df, metric="f1", split="val", fallback_split="test"):
    sub = res_df[res_df["split"]==split]
    if len(sub)==0 and fallback_split is not None:
        sub = res_df[res_df["split"]==fallback_split]
    if len(sub)==0:
        return None
    row = sub.sort_values(by=metric, ascending=False).iloc[0].to_dict()
    return row

# Combine indataset + robustness if available
tables = {}
tables["in_dataset"] = pd.read_csv(ARTE_DIR / "results_indataset.csv") if (ARTE_DIR / "results_indataset.csv").exists() else pd.DataFrame()
tables["robustness"] = pd.read_csv(ARTE_DIR / "results_robustness.csv") if (ARTE_DIR / "results_robustness.csv").exists() else pd.DataFrame()

best = pick_best_by(tables["in_dataset"], metric="f1", split="val", fallback_split="test")
print("Best (by F1):", best)

# Export nicely formatted tables (for dissertation Appendix A / Chapter 5)
def format_table(df):
    if "auc" in df.columns:
        df["auc"] = df["auc"].round(4)
    for col in ["accuracy","precision","recall","f1"]:
        if col in df.columns:
            df[col] = df[col].round(4)
    return df

for name,df in tables.items():
    if len(df)==0: continue
    df2 = format_table(df.copy())
    df2.to_csv(ARTE_DIR / f"table_{name}.csv", index=False)
    with open(ARTE_DIR / f"table_{name}.md","w") as f:
        f.write(df2.to_markdown(index=False))
    print(f"[Saved] artefacts/table_{name}.csv and .md")


## 13. Save Artefacts (Models, Scaler)

In [None]:

import joblib
joblib.dump(scaler, ARTE_DIR / "scaler.joblib")
for name, clf in models.items():
    joblib.dump(clf, ARTE_DIR / f"model_{name}.joblib")
print("Saved scaler + models to artefacts/.")


## 14. Appendix: Environment & Config Snapshot

In [None]:

env_info = {
    "python": sys.version,
    "numpy": np.__version__,
    "pandas": pd.__version__,
    "scikit-image": skimage.__version__,
    "scikit-learn": sklearn.__version__,
    "opencv": cv2.__version__,
    "tensorflow": tf.__version__,
    "class_names": CLASS_NAMES,
    "img_size": IMG_SIZE,
    "gray_size": GRAY_SIZE,
    "use_features": {
        "LBP": USE_LBP, "HOG": USE_HOG, "GLCM": USE_GLCM, "DCT": USE_DCT,
        "VGG16": USE_VGG16, "RESNET50": USE_RESNET50
    },
    "svm": {"C": SVM_C, "kernel": SVM_KERNEL, "gamma": SVM_GAMMA},
    "rf": {"trees": RF_TREES, "max_depth": RF_MAX_DEPTH},
    "robustness": ROBUSTNESS
}
with open(ARTE_DIR / "environment_config.json","w") as f:
    json.dump(env_info, f, indent=2)
print(json.dumps(env_info, indent=2))
