# üî¨ Step 1: Semantic Signal Proof
## Testing Content-Level Signals That Survive CRF23 Compression

**Previous finding:** Pixel-level noise signals (HLL, gradient sharpness, autocorrelation) ‚Üí all failed.  
**Root cause:** H.264 CRF23 compression homogenizes the noise floor across real and fake equally.

**New hypothesis:** The signal is in the *content*, not the noise.  
Signals that live in low-frequency content survive compression.

### Two Tests This Session

| Test | Signal | Method | Needs Pretrained? |
|------|--------|--------|-------------------|
| A | Color statistics inconsistency (face vs background) | Wasserstein distance on histograms | ‚ùå No |
| B | Optical flow boundary inconsistency | OpenCV Farneback flow | ‚ùå No |

### Go/No-Go
- **Either test separates real/fake with p < 0.05** ‚Üí we have a content-level anchor ‚Üí add to V8.0 on top of pretrained backbone
- **Both fail** ‚Üí signal requires pretrained semantic features ‚Üí skip straight to baseline training

## Section 1 ‚Äî Setup

In [None]:
import os, json, random, warnings
from pathlib import Path
from typing import Optional, Dict, List, Tuple
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scipy import stats
from scipy.stats import wasserstein_distance
warnings.filterwarnings('ignore')

SEED = 42
np.random.seed(SEED)
random.seed(SEED)

OUTPUT_DIR = Path('/kaggle/working/semantic_signal')
PLOTS_DIR  = OUTPUT_DIR / 'plots'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
PLOTS_DIR.mkdir(exist_ok=True)

N_SAMPLE  = 25    # more samples this time for better statistics
N_FRAMES  = 32
FACE_SIZE = 224   # larger crop for better color/flow measurement

print(f"NumPy {np.__version__}, OpenCV {cv2.__version__}")
print(f"Outputs ‚Üí {OUTPUT_DIR}")


## Section 2 ‚Äî Dataset

In [None]:
KAGGLE_INPUT = Path('/kaggle/input')

def locate_ff_root(base):
    known = base / 'datasets' / 'xdxd003' / 'ff-c23' / 'FaceForensics++_C23'
    if known.exists():
        return known
    for d in sorted(base.rglob('*')):
        if d.is_dir():
            hits = sum(1 for m in ['Deepfakes','Face2Face','FaceSwap','NeuralTextures']
                       if (d / m).exists())
            if hits >= 2:
                return d
    return None

def locate_celeb_root(base):
    known = base / 'datasets' / 'reubensuju' / 'celeb-df-v2'
    if known.exists():
        return known
    for d in sorted(base.rglob('*')):
        if d.is_dir() and (d/'Celeb-real').exists() and (d/'Celeb-synthesis').exists():
            return d
    return None

FF_ROOT    = locate_ff_root(KAGGLE_INPUT)
CELEB_ROOT = locate_celeb_root(KAGGLE_INPUT)
print(f"FF++    : {FF_ROOT}")
print(f"Celeb-DF: {CELEB_ROOT}")

FF_METHODS = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']
FF_VIDEOS  = {}
CELEB_VIDEOS = {}

if FF_ROOT:
    real = list(FF_ROOT.rglob('original*/*.mp4'))
    if not real:
        real = [p for p in FF_ROOT.rglob('*.mp4') if 'original' in str(p).lower()]
    FF_VIDEOS['real'] = sorted(real)
    for m in FF_METHODS:
        paths = list((FF_ROOT / m).glob('*.mp4')) if (FF_ROOT / m).exists() else []
        if paths:
            FF_VIDEOS[m] = sorted(paths)
    for k, v in FF_VIDEOS.items():
        print(f"  FF++/{k:20s}: {len(v):4d} videos")

if CELEB_ROOT:
    CELEB_VIDEOS['real'] = (list((CELEB_ROOT/'Celeb-real').glob('*.mp4')) +
                            list((CELEB_ROOT/'YouTube-real').glob('*.mp4')))
    CELEB_VIDEOS['fake'] = list((CELEB_ROOT/'Celeb-synthesis').glob('*.mp4'))
    for k, v in CELEB_VIDEOS.items():
        print(f"  Celeb-DF/{k:10s}: {len(v):4d} videos")


In [None]:
def extract_frames(video_path: str, n_frames: int = N_FRAMES,
                   size: int = FACE_SIZE) -> Optional[np.ndarray]:
    """Extract n evenly-spaced frames. Returns (T, H, W, 3) uint8 or None."""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total < n_frames:
        cap.release()
        return None
    indices = np.linspace(0, total - 1, n_frames, dtype=int)
    frames  = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
        ret, frame = cap.read()
        if not ret:
            continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Keep full frame ‚Äî we need face AND background for color comparison
        frame = cv2.resize(frame, (size, size))
        frames.append(frame)
    cap.release()
    if len(frames) < n_frames // 2:
        return None
    while len(frames) < n_frames:
        frames.append(frames[-1])
    return np.stack(frames[:n_frames], axis=0)

def sample(lst, n=N_SAMPLE, seed=SEED):
    rng = random.Random(seed)
    return rng.sample(lst, min(n, len(lst)))

# ‚îÄ‚îÄ‚îÄ Define face and background regions ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# Face: center 50% of the frame (where the swapped face lives)
# Background: outer ring (where the original video content remains)
# This split is the key to color inconsistency detection

def get_regions(H, W):
    """Return slice objects for face center and background border."""
    cy1, cy2 = H // 4, 3 * H // 4
    cx1, cx2 = W // 4, 3 * W // 4
    return (cy1, cy2, cx1, cx2)   # face region coords

# Test
frames = extract_frames(str(FF_VIDEOS['real'][0])) if FF_VIDEOS.get('real') else None
if frames is not None:
    H, W = frames.shape[1], frames.shape[2]
    cy1, cy2, cx1, cx2 = get_regions(H, W)
    print(f"‚úÖ Loader OK ‚Äî frames: {frames.shape}")
    print(f"   Face region:       [{cy1}:{cy2}, {cx1}:{cx2}]  "
          f"({cy2-cy1}√ó{cx2-cx1} px)")
    print(f"   Background region: outer ring")
else:
    print("‚ö†Ô∏è  Loader failed ‚Äî check dataset path")


## Section 3 ‚Äî Test A: Color Statistics Inconsistency

### The Hypothesis
In a deepfake, the swapped face region comes from a different video (different camera,
different lighting session, different color grading) than the background.

Even after compression, the **color distribution** of the face center vs the background
should be more inconsistent in fakes than in real videos.

**Metric:** Wasserstein distance between the color histogram of the face region  
and the color histogram of the background region, averaged across all frames.

- Real video: face and background have similar color stats (same camera, same scene)  
- Deepfake: face has different color stats (different source video)

We also measure **temporal consistency** of this ratio ‚Äî in fakes it should be stable
(persistent color mismatch) while in real videos it should fluctuate naturally.

In [None]:
def color_inconsistency(frames_rgb: np.ndarray, n_bins: int = 32) -> Dict:
    """
    Measure color histogram distance between face center and background.
    
    Returns:
        wasserstein_mean: mean W-distance across frames and channels
        wasserstein_std:  std across frames (low = persistent mismatch)
        per_frame_wdist:  list of per-frame distances
        ratio_temporal_std: how much the face/bg ratio changes over time
    """
    T, H, W, C = frames_rgb.shape
    cy1, cy2, cx1, cx2 = get_regions(H, W)

    per_frame = []

    for t in range(T):
        frame = frames_rgb[t].astype(np.float32) / 255.0

        # Extract regions
        face_region = frame[cy1:cy2, cx1:cx2]           # center

        # Background: combine top, bottom, left, right strips
        bg_top    = frame[:cy1, :]
        bg_bottom = frame[cy2:, :]
        bg_left   = frame[cy1:cy2, :cx1]
        bg_right  = frame[cy1:cy2, cx2:]
        background = np.concatenate([
            bg_top.reshape(-1, C),
            bg_bottom.reshape(-1, C),
            bg_left.reshape(-1, C),
            bg_right.reshape(-1, C)
        ], axis=0)

        face_flat = face_region.reshape(-1, C)

        # Wasserstein distance per channel, then average
        channel_dists = []
        for c in range(C):
            # Histogram-based: bin both distributions
            bins  = np.linspace(0, 1, n_bins + 1)
            face_hist, _ = np.histogram(face_flat[:, c], bins=bins, density=True)
            bg_hist,   _ = np.histogram(background[:, c], bins=bins, density=True)
            # Normalize to probability
            face_hist = face_hist / (face_hist.sum() + 1e-8)
            bg_hist   = bg_hist   / (bg_hist.sum()   + 1e-8)
            wd = wasserstein_distance(face_hist, bg_hist)
            channel_dists.append(wd)

        per_frame.append(float(np.mean(channel_dists)))

    per_frame = np.array(per_frame)

    return {
        'wdist_mean':      float(per_frame.mean()),
        'wdist_median':    float(np.median(per_frame)),
        'wdist_std':       float(per_frame.std()),
        # Temporal consistency: low std = persistent mismatch = fake signal
        'temporal_consistency': float(1.0 / (per_frame.std() + 1e-8)),
        'per_frame':       per_frame.tolist(),
    }

# Quick test
if frames is not None:
    result = color_inconsistency(frames)
    print(f"‚úÖ color_inconsistency test:")
    print(f"   wdist_mean={result['wdist_mean']:.5f}")
    print(f"   wdist_std ={result['wdist_std']:.5f}")
    print(f"   temporal_consistency={result['temporal_consistency']:.3f}")


## Section 4 ‚Äî Test B: Optical Flow Boundary Inconsistency

### The Hypothesis
In a real video, the face moves coherently with the scene ‚Äî the optical flow vectors
at the face boundary are smooth and continuous.

In a deepfake, the generated face has slightly different motion than the background it's
composited into. The flow vectors at the **face boundary** should be more discontinuous
in fakes than in real videos.

**Metric:** Mean flow magnitude discontinuity at the face boundary  
(difference between flow inside face region vs flow just outside it).

This signal survives compression because optical flow operates on content,
not on pixel-level noise statistics.

In [None]:
def optical_flow_inconsistency(frames_rgb: np.ndarray) -> Dict:
    """
    Measure optical flow discontinuity at the face/background boundary.
    
    Returns:
        boundary_discontinuity_mean: mean flow magnitude difference at boundary
        interior_flow_mean: mean flow magnitude inside face region  
        exterior_flow_mean: mean flow magnitude in background
        flow_ratio: interior/exterior flow ratio
        per_frame: per consecutive-frame-pair measurements
    """
    T, H, W, C = frames_rgb.shape
    cy1, cy2, cx1, cx2 = get_regions(H, W)

    # Convert to grayscale for optical flow
    gray_frames = [cv2.cvtColor(f, cv2.COLOR_RGB2GRAY) for f in frames_rgb]

    # Boundary mask: thin ring (8px) around the face region
    boundary_mask = np.zeros((H, W), dtype=bool)
    ring = 8
    # Inner edge of face region
    boundary_mask[cy1:cy1+ring, cx1:cx2] = True   # top edge
    boundary_mask[cy2-ring:cy2, cx1:cx2] = True   # bottom edge
    boundary_mask[cy1:cy2, cx1:cx1+ring] = True   # left edge
    boundary_mask[cy1:cy2, cx2-ring:cx2] = True   # right edge

    # Face interior (excluding boundary)
    face_mask = np.zeros((H, W), dtype=bool)
    face_mask[cy1+ring:cy2-ring, cx1+ring:cx2-ring] = True

    # Background mask
    bg_mask = np.zeros((H, W), dtype=bool)
    bg_mask[:cy1, :]  = True
    bg_mask[cy2:, :]  = True
    bg_mask[:, :cx1]  = True
    bg_mask[:, cx2:]  = True

    per_frame_boundary = []
    per_frame_interior = []
    per_frame_exterior = []

    for t in range(T - 1):
        f1 = gray_frames[t]
        f2 = gray_frames[t + 1]

        # Farneback optical flow
        flow = cv2.calcOpticalFlowFarneback(
            f1, f2, None,
            pyr_scale=0.5, levels=3, winsize=15,
            iterations=3, poly_n=5, poly_sigma=1.2,
            flags=0
        )  # flow: (H, W, 2)

        mag = np.sqrt(flow[..., 0]**2 + flow[..., 1]**2)   # (H, W)

        boundary_mag = mag[boundary_mask].mean() if boundary_mask.any() else 0.0
        interior_mag = mag[face_mask].mean()     if face_mask.any()     else 0.0
        exterior_mag = mag[bg_mask].mean()       if bg_mask.any()       else 0.0

        per_frame_boundary.append(float(boundary_mag))
        per_frame_interior.append(float(interior_mag))
        per_frame_exterior.append(float(exterior_mag))

    interior = np.array(per_frame_interior)
    exterior = np.array(per_frame_exterior)
    boundary = np.array(per_frame_boundary)

    # Key metric: how different is the flow at the boundary vs interior?
    # High discontinuity = face and background moving differently = fake
    boundary_vs_interior = np.abs(boundary - interior)
    flow_ratio = interior / (exterior + 1e-8)   # >1 = face moves more than bg

    return {
        'boundary_discontinuity_mean': float(boundary_vs_interior.mean()),
        'boundary_discontinuity_std':  float(boundary_vs_interior.std()),
        'interior_flow_mean':          float(interior.mean()),
        'exterior_flow_mean':          float(exterior.mean()),
        'flow_ratio_mean':             float(flow_ratio.mean()),
        'flow_ratio_std':              float(flow_ratio.std()),
        'per_frame_interior':          interior.tolist(),
        'per_frame_exterior':          exterior.tolist(),
        'per_frame_boundary_disc':     boundary_vs_interior.tolist(),
    }

# Quick test
if frames is not None:
    result = optical_flow_inconsistency(frames)
    print(f"‚úÖ optical_flow_inconsistency test:")
    print(f"   boundary_discontinuity={result['boundary_discontinuity_mean']:.5f}")
    print(f"   flow_ratio={result['flow_ratio_mean']:.4f}")
    print(f"   interior={result['interior_flow_mean']:.5f}, "
          f"exterior={result['exterior_flow_mean']:.5f}")


## Section 5 ‚Äî Run on FF++ c23

In [None]:
COLORS = {
    'real':           '#2ecc71',
    'Deepfakes':      '#e74c3c',
    'Face2Face':      '#e67e22',
    'FaceSwap':       '#9b59b6',
    'NeuralTextures': '#3498db',
}

def analyze_class(video_paths, label, n=N_SAMPLE, verbose=True):
    results = []
    sampled = sample(video_paths, n)

    for i, vpath in enumerate(sampled):
        frames = extract_frames(str(vpath))
        if frames is None:
            continue

        color = color_inconsistency(frames)
        flow  = optical_flow_inconsistency(frames)

        results.append({
            'label':  label,
            'video':  Path(vpath).name,
            # Test A
            'wdist_mean':              color['wdist_mean'],
            'wdist_std':               color['wdist_std'],
            'temporal_consistency':    color['temporal_consistency'],
            'per_frame_wdist':         color['per_frame'],
            # Test B
            'boundary_disc':           flow['boundary_discontinuity_mean'],
            'flow_ratio':              flow['flow_ratio_mean'],
            'flow_ratio_std':          flow['flow_ratio_std'],
            'interior_flow':           flow['interior_flow_mean'],
            'exterior_flow':           flow['exterior_flow_mean'],
            'per_frame_boundary_disc': flow['per_frame_boundary_disc'],
        })

        if verbose and (i + 1) % 5 == 0:
            r = results[-1]
            print(f"  [{label:15s}] {i+1:2d}/{n} | "
                  f"wdist={r['wdist_mean']:.5f} | "
                  f"flow_disc={r['boundary_disc']:.5f} | "
                  f"flow_ratio={r['flow_ratio']:.4f}")
    return results

print("=" * 70)
print("RUNNING SEMANTIC SIGNAL TESTS ON FF++ c23")
print("=" * 70)

FF_RESULTS = {}
run_order  = ['real'] + [m for m in FF_METHODS if m in FF_VIDEOS]

for label in run_order:
    print(f"\n[{label}]")
    FF_RESULTS[label] = analyze_class(FF_VIDEOS[label], label)
    print(f"  ‚Üí {len(FF_RESULTS[label])} videos done")

print("\n‚úÖ FF++ analysis complete")


## Section 6 ‚Äî Results & Statistics

In [None]:
def get_metric(rd, key):
    return {k: [r[key] for r in v] for k, v in rd.items()}

def boxplot_row(ax, data_dict, title, ylabel, colors=COLORS):
    """Plot a single boxplot panel."""
    groups = list(data_dict.keys())
    data   = [data_dict[k] for k in groups]
    cols   = [colors.get(k, '#95a5a6') for k in groups]
    bp = ax.boxplot(data, patch_artist=True,
                    medianprops=dict(color='black', linewidth=2.5))
    for patch, c in zip(bp['boxes'], cols):
        patch.set_facecolor(c); patch.set_alpha(0.75)
    ax.set_xticklabels([g.replace('NeuralTextures','NeuralTex.') for g in groups],
                       rotation=30, ha='right')
    ax.set_ylabel(ylabel)
    ax.set_title(title, fontweight='bold')
    ax.grid(True, alpha=0.3)
    for i, (g, vals) in enumerate(zip(groups, data)):
        med = np.median(vals)
        ax.text(i+1, med, f'{med:.4f}', ha='center', va='bottom',
                fontsize=8, fontweight='bold')

# ‚îÄ‚îÄ‚îÄ Figure 1: Test A ‚Äî Color inconsistency ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle('Test A: Color Statistics Inconsistency (Face vs Background)\n'
             'Higher Wasserstein distance = more color mismatch = fake?',
             fontsize=13, fontweight='bold')

for ax, key, title, ylabel in zip(
    axes,
    ['wdist_mean', 'wdist_std', 'temporal_consistency'],
    ['W-Distance (mean)', 'W-Distance (std across frames)', 'Temporal Consistency (1/std)'],
    ['Wasserstein distance', 'Std', '1/std (higher = more stable)']
):
    boxplot_row(ax, get_metric(FF_RESULTS, key), title, ylabel)

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'test_a_color.png', dpi=150, bbox_inches='tight')
plt.show()
print("‚úÖ test_a_color.png")


In [None]:
# ‚îÄ‚îÄ‚îÄ Figure 2: Test B ‚Äî Optical flow ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle('Test B: Optical Flow Boundary Inconsistency\n'
             'Higher discontinuity at face boundary = face moving differently from background?',
             fontsize=13, fontweight='bold')

for ax, key, title, ylabel in zip(
    axes,
    ['boundary_disc', 'flow_ratio', 'flow_ratio_std'],
    ['Boundary Discontinuity', 'Flow Ratio (interior/exterior)', 'Flow Ratio Std'],
    ['|boundary - interior| flow', 'Ratio', 'Std across frames']
):
    boxplot_row(ax, get_metric(FF_RESULTS, key), title, ylabel)

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'test_b_flow.png', dpi=150, bbox_inches='tight')
plt.show()
print("‚úÖ test_b_flow.png")


In [None]:
# ‚îÄ‚îÄ‚îÄ Statistics table ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
print("\n" + "=" * 80)
print("STATISTICAL RESULTS ‚Äî FF++ c23")
print("=" * 80)

metrics = [
    ('wdist_mean',           'Test A ‚Äî W-Distance Mean'),
    ('temporal_consistency', 'Test A ‚Äî Temporal Consistency'),
    ('boundary_disc',        'Test B ‚Äî Boundary Discontinuity'),
    ('flow_ratio',           'Test B ‚Äî Flow Ratio (interior/exterior)'),
]

for metric_key, metric_name in metrics:
    real_vals = [r[metric_key] for r in FF_RESULTS.get('real', [])]
    real_med  = np.median(real_vals) if real_vals else 1.0

    print(f"\n{metric_name}:")
    print(f"  {'Class':<18} {'Median':>9} {'Mean':>9} {'vs Real':>9} {'p-value':>10} {'Signal?':>10}")
    print(f"  {'-'*68}")

    for label, results in FF_RESULTS.items():
        vals   = [r[metric_key] for r in results]
        median = np.median(vals)
        mean   = np.mean(vals)
        ratio  = median / max(real_med, 1e-10)

        if label != 'real' and real_vals and len(vals) > 1:
            _, p = stats.mannwhitneyu(real_vals, vals, alternative='two-sided')
            sig  = '‚úÖ YES' if p < 0.05 else ('‚ö†Ô∏è  WEAK' if p < 0.20 else '‚ùå NO')
        else:
            p, sig = 1.0, '‚Äî'

        print(f"  {label:<18} {median:>9.5f} {mean:>9.5f} {ratio:>9.2f}x "
              f"{p:>10.4f} {sig:>10}")


In [None]:
# ‚îÄ‚îÄ‚îÄ Figure 3: Temporal trajectories (per-frame wdist) ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
methods_show = ['real'] + [m for m in FF_METHODS if m in FF_RESULTS][:3]

fig, axes = plt.subplots(1, len(methods_show), figsize=(5*len(methods_show), 5))
fig.suptitle('Test A: Temporal Profile of Color Distance\n'
             'Real: fluctuates randomly | Fake: persistently elevated?',
             fontsize=13, fontweight='bold')

for col, method in enumerate(methods_show):
    ax  = axes[col]
    res = FF_RESULTS.get(method, [])[:8]
    color = COLORS.get(method, '#95a5a6')

    all_trajs = []
    for r in res:
        traj = np.array(r['per_frame_wdist'])
        ax.plot(traj, alpha=0.3, linewidth=1, color=color)
        all_trajs.append(traj)

    if all_trajs:
        min_len = min(len(t) for t in all_trajs)
        stacked = np.stack([t[:min_len] for t in all_trajs])
        mean_t  = stacked.mean(axis=0)
        std_t   = stacked.std(axis=0)
        x = np.arange(min_len)
        ax.plot(mean_t, color='black', linewidth=2.5, label='Mean')
        ax.fill_between(x, mean_t-std_t, mean_t+std_t, alpha=0.2, color='black')

    overall_mean = np.mean([np.mean(r['per_frame_wdist']) for r in res]) if res else 0
    variation    = np.mean([np.std(r['per_frame_wdist'])  for r in res]) if res else 0
    ax.set_title(f'{method}\nmean={overall_mean:.4f}, var={variation:.4f}',
                 color=color, fontweight='bold')
    ax.set_xlabel('Frame index'); ax.set_ylabel('W-Distance (color)')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'test_a_temporal.png', dpi=150, bbox_inches='tight')
plt.show()
print("‚úÖ test_a_temporal.png")


## Section 7 ‚Äî Cross-Dataset Validation: Celeb-DF

In [None]:
CELEB_RESULTS = {}

if CELEB_VIDEOS:
    print("=" * 70)
    print("RUNNING ON CELEB-DF v2")
    print("=" * 70)
    for split in ['real', 'fake']:
        if CELEB_VIDEOS.get(split):
            print(f"\n[CelebDF_{split}]")
            CELEB_RESULTS[f'CelebDF_{split}'] = analyze_class(
                CELEB_VIDEOS[split], f'CelebDF_{split}')
            print(f"  ‚Üí {len(CELEB_RESULTS[f'CelebDF_{split}'])} done")
else:
    print("‚ö†Ô∏è  Celeb-DF not found")


In [None]:
if CELEB_RESULTS:
    CELEB_COLORS = {'CelebDF_real':'#27ae60', 'CelebDF_fake':'#c0392b',
                    'FF_real': COLORS['real'], 'FF_Dfakes': COLORS['Deepfakes']}

    compare = {
        'FF_real':  FF_RESULTS.get('real', []),
        'FF_Dfakes':FF_RESULTS.get('Deepfakes', []),
        'CDF_real': CELEB_RESULTS.get('CelebDF_real', []),
        'CDF_fake': CELEB_RESULTS.get('CelebDF_fake', []),
    }

    fig, axes = plt.subplots(1, 4, figsize=(22, 6))
    fig.suptitle('Cross-Dataset Validation ‚Äî FF++ c23 vs Celeb-DF\n'
                 'Does color/flow signal transfer across datasets?',
                 fontsize=13, fontweight='bold')

    for ax_idx, (key, title) in enumerate([
        ('wdist_mean',    'Test A: W-Distance Mean'),
        ('temporal_consistency', 'Test A: Temporal Consistency'),
        ('boundary_disc', 'Test B: Boundary Discontinuity'),
        ('flow_ratio',    'Test B: Flow Ratio'),
    ]):
        ax = axes[ax_idx]
        data_d = {k: [r[key] for r in v] for k, v in compare.items() if v}

        groups = list(data_d.keys())
        data   = [data_d[g] for g in groups]
        cols   = [CELEB_COLORS.get(g, '#95a5a6') for g in groups]

        bp = ax.boxplot(data, patch_artist=True,
                        medianprops=dict(color='black', linewidth=2))
        for patch, c in zip(bp['boxes'], cols):
            patch.set_facecolor(c); patch.set_alpha(0.75)
        ax.set_xticklabels(groups, rotation=30, ha='right', fontsize=9)
        ax.set_title(title, fontweight='bold')
        ax.grid(True, alpha=0.3)
        ax.axvline(2.5, color='gray', linestyle='--', alpha=0.6)

        for i, (g, vals) in enumerate(zip(groups, data)):
            ax.text(i+1, np.median(vals), f'{np.median(vals):.4f}',
                    ha='center', va='bottom', fontsize=8)

    plt.tight_layout()
    plt.savefig(PLOTS_DIR / 'cross_dataset.png', dpi=150, bbox_inches='tight')
    plt.show()
    print("‚úÖ cross_dataset.png")


## Section 8 ‚Äî Go/No-Go Decision

In [None]:
print("=" * 70)
print("SEMANTIC SIGNAL TEST ‚Äî GO / NO-GO DECISION")
print("=" * 70)

metrics_to_check = [
    ('wdist_mean',           'Test A: Color W-Distance'),
    ('temporal_consistency', 'Test A: Temporal Consistency'),
    ('boundary_disc',        'Test B: Boundary Discontinuity'),
    ('flow_ratio',           'Test B: Flow Ratio'),
]

real_vals_all = {k: [r[k] for r in FF_RESULTS.get('real', [])]
                 for k, _ in metrics_to_check}

signals_found = []
for metric_key, metric_name in metrics_to_check:
    real_vals  = real_vals_all[metric_key]
    n_sig = 0
    details = []
    for label in FF_METHODS:
        vals = [r[metric_key] for r in FF_RESULTS.get(label, [])]
        if not vals or not real_vals:
            continue
        _, p = stats.mannwhitneyu(real_vals, vals, alternative='two-sided')
        ratio = np.median(vals) / max(np.median(real_vals), 1e-10)
        if p < 0.05:
            n_sig += 1
        details.append(f"{label}={ratio:.2f}x(p={p:.3f})")

    verdict = 'üü¢ SIGNAL' if n_sig >= 2 else ('üü° WEAK' if n_sig >= 1 else 'üî¥ NONE')
    if n_sig >= 1:
        signals_found.append(metric_key)
    print(f"\n{metric_name}")
    print(f"  Significant: {n_sig}/{len(FF_METHODS)} methods ‚Üí {verdict}")
    for d in details:
        print(f"  {d}")

print("\n" + "=" * 70)
print("FINAL VERDICT")
print("=" * 70)

if len(signals_found) >= 2:
    print("üü¢ GO ‚Äî Content-level signal found")
    print(f"   Signals: {signals_found}")
    print("   V8.0 plan: add these as explicit features on top of pretrained backbone")
elif len(signals_found) >= 1:
    print("üü° WEAK SIGNAL ‚Äî Partial evidence")
    print(f"   Signal: {signals_found}")
    print("   V8.0 plan: include as auxiliary feature, not primary signal")
else:
    print("üî¥ NO SIGNAL ‚Äî Content-level analysis insufficient on FF++ c23")
    print("   Conclusion: semantic signal requires pretrained backbone")
    print("   Next step: Train EfficientNet-B0 baseline (Step 2)")
    print("   The pretrained backbone IS the feature extractor ‚Äî no handcrafted signal needed")

# Save
summary = {}
for metric_key, _ in metrics_to_check:
    real_v = [r[metric_key] for r in FF_RESULTS.get('real', [])]
    summary[metric_key] = {
        label: {
            'median': float(np.median([r[metric_key] for r in res])),
            'mean':   float(np.mean([r[metric_key] for r in res])),
        }
        for label, res in FF_RESULTS.items()
    }

with open(OUTPUT_DIR / 'semantic_results.json', 'w') as f:
    json.dump(summary, f, indent=2)
print(f"\n‚úÖ Results saved ‚Üí {OUTPUT_DIR / 'semantic_results.json'}")
print(f"‚úÖ Plots saved   ‚Üí {PLOTS_DIR}")
