In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install --upgrade numpy scipy torchvision

In [None]:
!pip install "numpy<2.0"

In [None]:
# Install a specific version of numpy to avoid conflicts
%pip install numpy==1.26.4 --force-reinstall

In [None]:
# Final Consolidated Pipeline - GenConViT Ensemble Model (AI-Only Scoring)
# This script uses a dual autoencoder (AE and VAE) model for deepfake
# detection. The final verdict is based exclusively on the AI model's
# prediction, with forensic analysis provided for context only.

# --- 1. SETUP AND INSTALLATIONS ---
import sys
import subprocess
import importlib
import os
import json
import warnings
from datetime import datetime
from io import BytesIO
from collections import OrderedDict

def install_packages(packages):
    """Installs required Python packages using pip."""
    print("Upgrading pip...")
    try:
        subprocess.run([sys.executable, "-m", "pip", "install", "--quiet", "--upgrade", "pip"], check=True)
        print(f"Installing/Updating packages: {', '.join(packages)}")
        subprocess.run([sys.executable, "-m", "pip", "install", "--quiet", "--no-cache-dir"] + packages, check=True)
        importlib.invalidate_caches()
        print("All required packages are installed.")
    except subprocess.CalledProcessError as e:
        print(f"ERROR: Failed to install packages. {e}")
        sys.exit(1)

print("Checking for required packages...")
# A comprehensive list of dependencies for all forensic analyses.
install_packages([
    "numpy<2.0", "torch", "torchvision", "pandas", "opencv-python",
    "Pillow", "moviepy", "mediapipe", "scikit-image", "scipy",
    "exifread", "librosa", "dlib", "face_alignment"
])

# --- Imports (post-install) ---
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
import numpy as np
import cv2
import exifread
import librosa
import dlib
import face_alignment
import pandas as pd
from PIL import Image, ImageChops
from moviepy.editor import VideoFileClip
from scipy.signal import welch
from scipy.spatial.distance import euclidean

warnings.filterwarnings("ignore", category=UserWarning)
# Ensure the device is set correctly for PyTorch operations.
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")


# --- 2. GENCONVIT MODEL ARCHITECTURE ---
# Defines the neural network components for the GenConViT autoencoders.

class ED_Encoder(nn.Module):
    """The convolutional encoder part of the GenConViT model."""
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2)
        )
    def forward(self, x):
        return self.features(x)

class ED_Decoder_Config1(nn.Module):
    """Decoder for the AE model, reconstructs to full resolution (224x224)."""
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.ConvTranspose2d(256, 128, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(16, 3, 2, 2), nn.ReLU(True)
        )
    def forward(self, x):
        return self.features(x)

class ED_Decoder_Config2(nn.Module):
    """Decoder for the VAE model, reconstructs to half resolution (112x112)."""
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.ConvTranspose2d(256, 64, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, 2, 2), nn.ReLU(True),
            nn.ConvTranspose2d(16, 3, 2, 2), nn.ReLU(True)
        )
    def forward(self, x):
        return self.features(x)

class GenConViT(nn.Module):
    """The complete GenConViT model, combining an encoder and a decoder."""
    def __init__(self, decoder_config=1):
        super().__init__()
        self.encoder = ED_Encoder()
        self.decoder = ED_Decoder_Config1() if decoder_config == 1 else ED_Decoder_Config2()

    def forward(self, images):
        encimg = self.encoder(images)
        decimg = self.decoder(encimg)
        return decimg


# --- 3. CONFIGURE DIRECTORIES & LOAD ASSETS ---
try:
    from google.colab import drive
    if not os.path.exists('/content/drive'):
        print("Mounting Google Drive...")
        drive.mount('/content/drive')
        print("Drive mounted.")
except ImportError:
    print("Not in a Colab environment. Using local directories.")
    pass

BASE_DIR = '/content/drive/MyDrive/AuraVerity-Reports' if os.path.exists('/content/drive/MyDrive') else os.path.expanduser('~/AuraVerity-Reports')
REPORTS_DIR = os.path.join(BASE_DIR, 'reports')
EXPLAIN_DIR = os.path.join(BASE_DIR, 'explainability_outputs')
TEMP_DIR = os.path.join(BASE_DIR, 'temp')
for path in [REPORTS_DIR, EXPLAIN_DIR, TEMP_DIR]:
    os.makedirs(path, exist_ok=True)
print(f"\nProject directories configured in: {BASE_DIR}")

DLIB_MODEL_PATH = "shape_predictor_68_face_landmarks.dat"
if not os.path.exists(DLIB_MODEL_PATH):
    print("Downloading dlib facial landmark predictor...")
    os.system("wget -q http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
    os.system("bzip2 -df shape_predictor_68_face_landmarks.dat.bz2")
    print("Dlib model downloaded.")
face_detector = dlib.get_frontal_face_detector()
landmark_predictor = dlib.shape_predictor(DLIB_MODEL_PATH)


# --- 4. AI MODEL LOADING & PREDICTION ---
def load_genconvit_models(ed_path, vae_path):
    """Loads both the AE and VAE GenConViT models from specified paths."""
    loaded_models = {}

    def _load_single_model(weight_path, decoder_config):
        if not os.path.exists(weight_path):
            raise FileNotFoundError(f"Model file not found: {weight_path}")
        model = GenConViT(decoder_config=decoder_config)
        checkpoint = torch.load(weight_path, map_location=DEVICE)
        state_dict = checkpoint.get('state_dict', checkpoint)

        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k.replace('module.', '').replace('model.', '')
            new_state_dict[name] = v

        model.load_state_dict(new_state_dict, strict=False)
        model.to(DEVICE)
        model.eval()
        return model

    print("\nLoading GenConViT-ED model...")
    loaded_models['ed'] = _load_single_model(ed_path, decoder_config=1)
    print("Loading GenConViT-VAE model...")
    loaded_models['vae'] = _load_single_model(vae_path, decoder_config=2)

    return loaded_models

# --- Model Paths (USER-PROVIDED) ---
ED_MODEL_PATH = '/content/drive/MyDrive/ThruthChain-v2-Models/weights/genconvit_ed_inference.pth'
VAE_MODEL_PATH = '/content/drive/MyDrive/ThruthChain-v2-Models/weights/genconvit_vae_inference.pth'

try:
    loaded_models = load_genconvit_models(ED_MODEL_PATH, VAE_MODEL_PATH)
    print("Successfully loaded GenConViT ensemble models.")
except FileNotFoundError as e:
    print(f"ERROR: Could not load models. {e}")
    sys.exit(1)


def predict_with_genconvit_ensemble(image_path, models):
    """
    Generates a fake probability score based on the average reconstruction
    error from the AE and VAE models.
    """
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    try:
        img = Image.open(image_path).convert('RGB')
        img_tensor = transform(img).unsqueeze(0).to(DEVICE)

        with torch.no_grad():
            recon_ed = models['ed'](img_tensor)
            recon_vae_half = models['vae'](img_tensor)
            recon_vae = F.resize(recon_vae_half, img_tensor.shape[-2:],
                                 interpolation=transforms.InterpolationMode.BILINEAR)

            mse_loss = nn.MSELoss()
            error_ed = mse_loss(recon_ed, img_tensor).item()
            error_vae = mse_loss(recon_vae, img_tensor).item()
            avg_mse = (error_ed + error_vae) / 2.0

            k = 0.5
            fake_prob = 1 - np.exp(-k * avg_mse)

            return {
                "fake_probability": float(fake_prob),
                "avg_mse": float(avg_mse),
                "original_tensor": img_tensor,
                "recon_ed": recon_ed,
                "recon_vae": recon_vae
            }

    except Exception as e:
        print(f"Prediction error: {e}")
        return None

def generate_error_map(pred_data, output_path):
    """
    Generates and saves a visual heatmap of the reconstruction error.
    """
    print("-> Generating reconstruction error map...")
    try:
        original = pred_data['original_tensor'].squeeze().cpu().numpy().transpose(1, 2, 0)
        recon_ed = pred_data['recon_ed'].squeeze().cpu().numpy().transpose(1, 2, 0)
        recon_vae = pred_data['recon_vae'].squeeze().cpu().numpy().transpose(1, 2, 0)

        mean, std = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])
        original, recon_ed, recon_vae = [np.clip(std * img + mean, 0, 1) for img in [original, recon_ed, recon_vae]]

        avg_error = (np.abs(original - recon_ed) + np.abs(original - recon_vae)) / 2.0

        error_gray = np.mean(avg_error, axis=2)
        error_map = (error_gray / np.max(error_gray) * 255).astype(np.uint8)
        heatmap = cv2.applyColorMap(error_map, cv2.COLORMAP_HOT)

        original_uint8 = (original * 255).astype(np.uint8)
        superimposed = cv2.addWeighted(heatmap, 0.6, cv2.cvtColor(original_uint8, cv2.COLOR_RGB2BGR), 0.4, 0)

        cv2.imwrite(output_path, superimposed)
        return {"status": "performed", "output_path": output_path}
    except Exception as e:
        return {"status": "error", "message": str(e)}


# --- 5. FORENSIC ANALYSIS FUNCTIONS ---
# (These functions are independent of the core AI model and remain unchanged)

def analyze_prnu(image_path):
    print("-> Running PRNU Analysis...")
    try:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if img is None: return {"status": "error", "message": "Cannot read image"}
        denoised = cv2.medianBlur(img, 3)
        residual = img.astype(float) - denoised.astype(float)
        return {"status": "performed", "residual_variance": float(np.var(residual))}
    except Exception as e: return {"status": "error", "message": str(e)}

def analyze_eye_reflections(image_path):
    print("-> Running Eye Reflection Consistency Analysis...")
    try:
        img = cv2.imread(image_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray)
        if not faces: return {"status": "performed", "face_detected": False}
        landmarks = landmark_predictor(gray, faces[0])
        left_pts = np.array([(landmarks.part(i).x, landmarks.part(i).y) for i in range(36, 42)])
        right_pts = np.array([(landmarks.part(i).x, landmarks.part(i).y) for i in range(42, 48)])
        (lx, ly, lw, lh), (rx, ry, rw, rh) = cv2.boundingRect(left_pts), cv2.boundingRect(right_pts)
        if min(lw, lh, rw, rh) < 6: return {"status": "performed", "face_detected": True, "roi_quality": "low"}
        left_eye, right_eye = gray[ly:ly+lh, lx:lx+lw], gray[ry:ry+rh, rx:rx+rw]
        left_hist = cv2.calcHist([left_eye], [0], None, [256], [0, 256])
        right_hist = cv2.calcHist([right_eye], [0], None, [256], [0, 256])
        corr = cv2.compareHist(left_hist, right_hist, cv2.HISTCMP_CORREL)
        return {"status": "performed", "face_detected": True, "reflection_correlation": round(float(corr), 4)}
    except Exception as e: return {"status": "error", "message": str(e)}

def analyze_pulse(video_path):
    print("-> Running Physiological Signal (Pulse) Analysis...")
    try:
        cap = cv2.VideoCapture(video_path)
        means, fps = [], cap.get(cv2.CAP_PROP_FPS) or 30
        max_frames = int(min(fps * 8, cap.get(cv2.CAP_PROP_FRAME_COUNT)))
        count = 0
        while count < max_frames:
            ret, frame = cap.read()
            if not ret: break
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_detector(gray)
            if faces:
                lm = landmark_predictor(gray, faces[0])
                pts = np.array([(lm.part(i).x, lm.part(i).y) for i in [19, 24, 27]])
                (x, y, w, h) = cv2.boundingRect(pts)
                roi = frame[y:y+h, x:x+w, 1]
                if roi.size > 0: means.append(np.mean(roi))
            count += 1
        cap.release()
        if len(means) < fps: return {"status": "not_enough_data"}
        means_array = np.array(means) - np.mean(means)
        freqs, psd = welch(means_array, fs=fps, nperseg=min(len(means_array), 256))
        bpm = freqs[np.argmax(psd)] * 60
        is_plausible = 40 <= bpm <= 180
        return {"status": "performed", "estimated_bpm": round(bpm, 2), "is_plausible": is_plausible}
    except Exception as e: return {"status": "error", "message": str(e)}

def analyze_lip_sync(video_path):
    print("-> Running Audio-Visual (Lip-Sync) Analysis...")
    try:
        with VideoFileClip(video_path) as clip:
            if not clip.audio: return {"status": "no_audio"}
            audio_path = os.path.join(TEMP_DIR, "temp_lipsync_audio.wav")
            clip.audio.write_audiofile(audio_path, codec='pcm_s16le', logger=None)
        y, sr = librosa.load(audio_path, sr=None, mono=True)
        os.remove(audio_path)
        audio_energy = librosa.feature.rms(y=y)[0]
        fa = face_alignment.FaceAlignment(face_alignment.LandmarksType.TWO_D, device=str(DEVICE), flip_input=False)
        cap = cv2.VideoCapture(video_path)
        mouth_openings = []
        while True:
            ret, frame = cap.read()
            if not ret: break
            preds = fa.get_landmarks(frame)
            if preds:
                mouth_pts = preds[0][48:68]
                mouth_height = euclidean(mouth_pts[13], mouth_pts[19])
                mouth_openings.append(mouth_height)
            else:
                mouth_openings.append(0)
        cap.release()
        if len(mouth_openings) < 5 or len(audio_energy) < 5: return {"status": "not_enough_data"}
        min_len = min(len(audio_energy), len(mouth_openings))
        a = (audio_energy[:min_len] - np.mean(audio_energy[:min_len])) / (np.std(audio_energy[:min_len]) + 1e-8)
        m = (np.array(mouth_openings[:min_len]) - np.mean(mouth_openings[:min_len])) / (np.std(mouth_openings[:min_len]) + 1e-8)
        corr = np.corrcoef(a, m)[0, 1]
        return {"status": "performed", "correlation_score": round(float(corr), 4)}
    except Exception as e: return {"status": "error", "message": str(e)}

def analyze_ela(image_path, quality=90):
    print("-> Running Error Level Analysis (ELA)...")
    try:
        img = Image.open(image_path).convert('RGB')
        buffer = BytesIO()
        img.save(buffer, format='JPEG', quality=quality)
        resaved = Image.open(buffer)
        ela = ImageChops.difference(img, resaved)
        extrema = ela.getextrema()
        max_diff = max([ex[1] for ex in extrema])
        return {"status": "performed", "max_ela_difference": max_diff}
    except Exception as e: return {"status": "error", "message": str(e)}

def analyze_metadata(media_path):
    print("-> Running Metadata analysis...")
    try:
        with open(media_path, 'rb') as f:
            tags = exifread.process_file(f, details=False)
        software = tags.get('Image Software')
        if software and any(ed in str(software).lower() for ed in ("photoshop", "gimp", "stable diffusion")):
            return {"status": "performed", "anomalies_detected": True, "software": str(software)}
        return {"status": "performed", "anomalies_detected": False}
    except Exception: return {"status": "no_exif_data"}

def analyze_audio_spectrogram(audio_path):
    print("-> Running Audio Spectrogram Analysis...")
    try:
        y, sr = librosa.load(audio_path, sr=None)
        spec = np.abs(librosa.stft(y))
        freq_cutoff_strength = np.mean(spec[-int(spec.shape[0]*0.1):, :]) / (np.mean(spec) + 1e-8)
        is_suspicious = freq_cutoff_strength < 0.1
        return {"status": "performed", "freq_cutoff_strength": round(float(freq_cutoff_strength), 4), "is_suspicious": is_suspicious}
    except Exception as e:
        return {"status": "error", "message": str(e)}


# --- 6. HOLISTIC SCORING & REPORTING ---
def calculate_holistic_score(ai_fake_prob, fr):
    """
    Passes through the AI model's score and lists forensic findings as evidence
    without altering the final prediction.
    """
    # The final score is now determined SOLELY by the AI model's prediction.
    fake_score = 0.5 if ai_fake_prob is None else np.clip(ai_fake_prob, 0.0, 1.0)
    truth_score = 1.0 - fake_score

    # The evidence list will now just report the findings without score changes.
    evidence = [f"AI Model Fake Probability: {fake_score:.2%}"]

    if fr.get('prnu_analysis', {}).get('residual_variance', 0) > 2.0:
        evidence.append("[Forensic Finding] High PRNU residual variance detected.")
    if fr.get('eye_reflections', {}).get('reflection_correlation', 1.0) < 0.7:
        evidence.append("[Forensic Finding] Inconsistent eye reflections detected.")
    if not fr.get('pulse_analysis', {}).get('is_plausible', True):
        evidence.append("[Forensic Finding] Anomalous physiological pulse detected.")
    if fr.get('metadata', {}).get('anomalies_detected', False):
        evidence.append("[Forensic Finding] Editing software detected in metadata.")
    if fr.get('lip_sync_analysis', {}).get('correlation_score', 1.0) < 0.2:
        evidence.append("[Forensic Finding] Poor lip-sync correlation detected.")
    if fr.get('audio_spectrogram', {}).get('is_suspicious', False):
        evidence.append("[Forensic Finding] Suspicious audio spectrogram characteristics detected.")

    return {"truth_score": truth_score, "fake_score": fake_score, "evidence": evidence}

def build_final_report(media_path, holistic, ai_results, forensics, visuals, inconclusive_reason=None):
    """Constructs the final JSON report."""
    class NpEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, (np.integer, np.floating, np.bool_)): return obj.item()
            return super().default(obj)

    report = {
        "report_timestamp": datetime.now().isoformat(),
        "media_analyzed": os.path.basename(media_path),
        "holistic_truth_score": holistic["truth_score"],
        "holistic_fake_score": holistic["fake_score"],
        "final_verdict": "INCONCLUSIVE" if inconclusive_reason else ("REAL" if holistic["truth_score"] >= 0.5 else "FAKE"),
        "inconclusive_reason": inconclusive_reason,
        "evidence_summary": holistic["evidence"],
        "ai_model_prediction": ai_results,
        "detailed_forensic_findings": forensics,
        "visual_explainability": visuals
    }
    return json.loads(json.dumps(report, cls=NpEncoder, indent=2))


# --- 7. MAIN PIPELINE ORCHESTRATION ---
def run_ultimate_pipeline(media_path, models):
    """The main function that orchestrates the entire analysis process."""
    print(f"\n{'='*30}\nRunning Pipeline for: {os.path.basename(media_path)}\n{'='*30}")

    if not os.path.exists(media_path):
        print("ERROR: File not found.")
        return
    if os.path.getsize(media_path) > 50 * 1024 * 1024:
        print("ERROR: File exceeds 50MB size limit.")
        return

    ext = os.path.splitext(media_path)[1].lower()
    img_fmts, vid_fmts, aud_fmts = ['.jpg', '.jpeg', '.png', '.webp', '.bmp'], ['.mp4', '.mov', '.avi', '.webm', '.mkv'], ['.wav', '.mp3', '.flac', '.ogg']
    media_type = 'image' if ext in img_fmts else 'video' if ext in vid_fmts else 'audio' if ext in aud_fmts else 'unsupported'

    if media_type == 'unsupported':
        print(f"ERROR: Unsupported file format '{ext}'.")
        return

    ai_results, forensics, visuals, inconclusive_reason = {}, {}, {}, None

    if media_type in ['image', 'video']:
        frame_path = media_path
        if media_type == 'video':
            cap = cv2.VideoCapture(media_path)
            ret, frame = cap.read()
            cap.release()
            if not ret:
                print("ERROR: Could not read first frame of video.")
                return
            frame_path = os.path.join(TEMP_DIR, "temp_frame.jpg")
            cv2.imwrite(frame_path, frame)

        pred_data = predict_with_genconvit_ensemble(frame_path, models)
        if pred_data:
            ai_results = {"raw_fake_probability": pred_data["fake_probability"], "avg_mse": pred_data["avg_mse"]}
            err_map_path = os.path.join(EXPLAIN_DIR, f"{os.path.splitext(os.path.basename(media_path))[0]}_error_map.jpg")
            visuals['reconstruction_error_map'] = generate_error_map(pred_data, err_map_path)

        forensics['metadata'] = analyze_metadata(frame_path)
        forensics['prnu_analysis'] = analyze_prnu(frame_path)
        forensics['eye_reflections'] = analyze_eye_reflections(frame_path)
        forensics['ela'] = analyze_ela(frame_path)

        if media_type == 'video':
            forensics['pulse_analysis'] = analyze_pulse(media_path)
            forensics['lip_sync_analysis'] = analyze_lip_sync(media_path)
            try:
                with VideoFileClip(media_path) as clip:
                    if clip.audio:
                        audio_path = os.path.join(TEMP_DIR, "temp_audio.wav")
                        clip.audio.write_audiofile(audio_path, codec='pcm_s16le', logger=None)
                        forensics['audio_spectrogram'] = analyze_audio_spectrogram(audio_path)
                        os.remove(audio_path)
            except Exception as e: print(f"Could not process audio from video: {e}")
            if os.path.exists(frame_path): os.remove(frame_path)

    elif media_type == 'audio':
        inconclusive_reason = "AI model analysis skipped (audio-only input)."
        forensics['audio_spectrogram'] = analyze_audio_spectrogram(media_path)

    ai_prob = ai_results.get("raw_fake_probability") if ai_results else None
    holistic = calculate_holistic_score(ai_prob, forensics)
    report = build_final_report(media_path, holistic, ai_results, forensics, visuals, inconclusive_reason)

    print("\n--- FINAL HOLISTIC REPORT ---")
    print(json.dumps(report, indent=2))

    report_path = os.path.join(REPORTS_DIR, f"{os.path.splitext(os.path.basename(media_path))[0]}_holistic_report.json")
    with open(report_path, 'w') as f: json.dump(report, f, indent=4)
    print(f"\nReport saved to {report_path}")


image_test_path = '/content/drive/MyDrive/test-image.jpg'
video_test_path = '/content/drive/MyDrive/test-video.mp4'
audio_test_path = '/content/drive/MyDrive/test-audio.wav'

# Create placeholder files if they are missing, to allow the script to run.
if not os.path.exists(image_test_path):
    print(f"Creating placeholder image at: {image_test_path}")
    cv2.imwrite(image_test_path, np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8))
if not os.path.exists(video_test_path):
    print(f"SKIPPING video test: File not found at '{video_test_path}'")
if not os.path.exists(audio_test_path):
     print(f"SKIPPING audio test: File not found at '{audio_test_path}'")

for f in [image_test_path, video_test_path, audio_test_path]:
    if os.path.exists(f):
        run_ultimate_pipeline(f, loaded_models)
    else:
        print(f'\nSKIPPING: File not found at "{f}"')



In [None]:
# image_test_path = '/content/ChatGPT Image Jul 14, 2025, 03_21_14 PM.png'
video_test_path = '/content/id20_0001.mp4'
# audio_test_path = '/content/file100.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav'

if not os.path.exists(video_test_path):
    print(f"SKIPPING video test: File not found at '{video_test_path}'")



# Run the pipeline on the specified files
for f in [video_test_path]:
    if os.path.exists(f):
        run_ultimate_pipeline(f, loaded_models)
    else:
        print(f'\nSKIPPING: File not found at "{f}"')

