# 🎬 Trailer AI — Rewritten Notebook
End-to-end: CSV → Download (yt_dlp API) → Features → Quick Train → Evaluate → Trailer.


In [None]:
# 0) Install dependencies (pin numpy/librosa to avoid binary issues)
!pip install -q yt-dlp opencv-python librosa==0.10.1 numpy pandas tqdm scikit-learn joblib matplotlib python-dotenv

In [1]:
# 1) Imports & working directory + .env
import os, sys, re, json, shutil, subprocess
from dataclasses import dataclass, asdict
from typing import List, Tuple, Optional, Dict

import numpy as np, pandas as pd
import cv2, librosa, yt_dlp
import matplotlib; matplotlib.use("Agg")
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
)
import joblib
from dotenv import load_dotenv

# Always write in ./data (next to this notebook)
WORKDIR = os.path.join(os.getcwd(), "data")
for d in ("raw", "features", "models", "out"):
    os.makedirs(os.path.join(WORKDIR, d), exist_ok=True)
print("Working dir:", WORKDIR)

# .env (for cookies/API etc.) — optional
ENV_PATH = os.path.join(os.getcwd(), ".env")
if not os.path.exists(ENV_PATH):
    with open(ENV_PATH, "w") as f:
        f.write(
            "TRAILER_API_TOKEN=\n"
            "TRAILER_WRITE_SUBS=0\n"
            "TRAILER_COOKIES=\n"
            "TRAILER_SLEEP_REQUESTS=10\n"
            "TRAILER_MAX_SLEEP_INTERVAL=20\n"
        )
        print("Created .env template at", ENV_PATH)
else:
    print(".env found at", ENV_PATH)

load_dotenv(ENV_PATH)
API_TOKEN = os.getenv("TRAILER_API_TOKEN", "").strip() or None
WRITE_SUBS = os.getenv("TRAILER_WRITE_SUBS", "0") != "0"
COOKIES_PATH = os.getenv("TRAILER_COOKIES", "").strip() or None
SLEEP_REQUESTS = int(os.getenv("TRAILER_SLEEP_REQUESTS", "10") or 10)
MAX_SLEEP_INTERVAL = int(os.getenv("TRAILER_MAX_SLEEP_INTERVAL", "20") or 20)

print("Config → WRITE_SUBS:", WRITE_SUBS, "| cookies:", COOKIES_PATH or "None")

Working dir: /Users/dhruvpatel/Desktop/trailer_ai_package/data
.env found at /Users/dhruvpatel/Desktop/trailer_ai_package/.env
Config → WRITE_SUBS: False | cookies: None


In [2]:
# 2) Helpers
def basename_noext(p: str) -> str:
    return os.path.splitext(os.path.basename(p))[0]

def seconds_from_vtt_ts(ts: str) -> float:
    parts = re.split(r"[,:.]", ts)
    if len(parts) < 3:
        return 0.0
    h, m, s = int(parts[0]), int(parts[1]), float(parts[2])
    return h * 3600 + m * 60 + s

def parse_vtt(vtt_path: str) -> List[Tuple[float, float, str]]:
    if not vtt_path or not os.path.exists(vtt_path):
        return []
    entries = []
    with open(vtt_path, "r", encoding="utf-8", errors="ignore") as f:
        block = []
        for line in f:
            line = line.rstrip("\n")
            if not line.strip():
                if block:
                    for i, ln in enumerate(block):
                        if "-->" in ln:
                            t1, t2 = [x.strip() for x in ln.split("-->")]
                            try:
                                s = seconds_from_vtt_ts(t1)
                                e = seconds_from_vtt_ts(t2.split(" ")[0])
                                text = " ".join(block[i+1:]).strip()
                                if e > s:
                                    entries.append((s, e, text))
                            except Exception:
                                pass
                            break
                block = []
            else:
                block.append(line)
        if block:
            for i, ln in enumerate(block):
                if "-->" in ln:
                    t1, t2 = [x.strip() for x in ln.split("-->")]
                    try:
                        s = seconds_from_vtt_ts(t1)
                        e = seconds_from_vtt_ts(t2.split(" ")[0])
                        text = " ".join(block[i+1:]).strip()
                        if e > s:
                            entries.append((s, e, text))
                    except Exception:
                        pass
                    break
    return entries

def caption_overlap(captions, start: float, end: float) -> float:
    if not captions:
        return 0.0
    dur = max(1e-6, end - start)
    covered = 0.0
    for s, e, _ in captions:
        inter = max(0.0, min(end, e) - max(start, s))
        covered += inter
    covered = min(covered, dur)
    return covered / dur

def caption_keyword_density(captions, start: float, end: float) -> float:
    tot_words = 0.0
    dur = max(1e-6, end - start)
    for s, e, text in captions:
        inter = max(0.0, min(end, e) - max(start, s))
        if inter > 0:
            w = len(re.findall(r"\w+", text.lower()))
            tot_words += w * (inter / (e - s + 1e-6))
    return float(tot_words / dur)

def normalize(vals: List[float]) -> np.ndarray:
    a = np.asarray(vals, dtype=np.float32)
    if a.size == 0:
        return a
    mn, mx = float(np.min(a)), float(np.max(a))
    if mx - mn < 1e-12:
        return np.zeros_like(a, dtype=np.float32)
    return (a - mn) / (mx - mn)

In [3]:
# 3) Download via yt_dlp Python API (no shell)
def download_video(url: str, raw_dir: str, write_subs: bool=False, cookies_path: Optional[str]=None,
                   sleep_requests: Optional[int]=10, max_sleep_interval: Optional[int]=20) -> Tuple[str, Optional[str]]:
    os.makedirs(raw_dir, exist_ok=True)
    ydl_opts = {
        "outtmpl": os.path.join(raw_dir, "%(id)s.%(ext)s"),
        "format": "mp4/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best",
        "merge_output_format": "mp4",
        "noplaylist": True,
        "quiet": True,
        "retries": 3,
    }
    if write_subs:
        ydl_opts.update({
            "writesubtitles": True,
            "writeautomaticsub": True,
            "subtitleslangs": ["en"],
            "subtitlesformat": "vtt",
        })
    if cookies_path and os.path.exists(cookies_path):
        ydl_opts["cookiefile"] = cookies_path
    if sleep_requests and max_sleep_interval:
        ydl_opts["sleep_interval_requests"] = sleep_requests
        ydl_opts["max_sleep_interval_requests"] = max_sleep_interval

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        if info is None:
            raise RuntimeError("yt_dlp failed to fetch info")
        if "requested_downloads" in info and info["requested_downloads"]:
            filepath = info["requested_downloads"][0]["filepath"]
        else:
            vid = info.get("id")
            ext = info.get("ext", "mp4")
            filepath = os.path.join(raw_dir, f"{vid}.{ext}")

    # locate VTT if present
    vtt_path = None
    vid = os.path.splitext(os.path.basename(filepath))[0]
    for cand in (os.path.join(raw_dir, f"{vid}.en.vtt"), os.path.join(raw_dir, f"{vid}.vtt")):
        if os.path.exists(cand):
            vtt_path = cand
            break
    return filepath, vtt_path

In [4]:
# 4) Feature extraction (motion, audio RMS, captions) + baseline score
@dataclass
class Chunk:
    video_id: str
    start: float
    end: float
    motion: float = 0.0
    audio: float = 0.0
    cap_overlap: float = 0.0
    kw_density: float = 0.0
    score: float = 0.0

def sample_video_histograms(mp4_path: str, fps_sample: float = 2.0):
    cap = cv2.VideoCapture(mp4_path)
    if not cap.isOpened():
        raise RuntimeError(f"Cannot open video: {mp4_path}")
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    step = max(1, int(round(fps / fps_sample)))
    ts_list, diffs = [], []
    prev_hist = None
    frame_idx = 0
    while True:
        ret = cap.grab()
        if not ret:
            break
        if frame_idx % step == 0:
            ret, frame = cap.retrieve()
            if not ret:
                break
            ts = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
            hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
            h0 = cv2.calcHist([hsv], [0], None, [64], [0, 180])
            h1 = cv2.calcHist([hsv], [1], None, [64], [0, 256])
            h2 = cv2.calcHist([hsv], [2], None, [64], [0, 256])
            hist = np.concatenate([h0.flatten(), h1.flatten(), h2.flatten()]).astype(np.float32)
            hist /= (np.sum(hist) + 1e-6)
            diffs.append(0.0 if prev_hist is None else float(np.sum(np.abs(hist - prev_hist))))
            prev_hist = hist
            ts_list.append(ts)
        frame_idx += 1
    cap.release()
    ts = np.array(ts_list, dtype=np.float32)
    diffs = np.array(diffs, dtype=np.float32)
    return ts, diffs

def detect_scenes(ts: np.ndarray, diffs: np.ndarray, thresh: float = 0.55):
    if len(ts) == 0:
        return [(0.0, 0.0)]
    cuts = [0]
    for i in range(1, len(diffs)):
        if diffs[i] > thresh:
            cuts.append(i)
    cuts.append(len(ts) - 1)
    bounds = []
    for i in range(len(cuts)-1):
        s = float(ts[cuts[i]])
        e = float(ts[cuts[i+1]])
        if e > s:
            bounds.append((s, e))
    return bounds

def make_chunks(bounds: List[Tuple[float, float]], min_len=2.0, max_len=6.0, video_id="vid"):
    chunks: List[Chunk] = []
    for s, e in bounds:
        cur = s
        while cur + min_len <= e:
            end = min(cur + max_len, e)
            chunks.append(Chunk(video_id=video_id, start=cur, end=end))
            cur += min_len
    return chunks

def avg_motion(diffs: np.ndarray, ts: np.ndarray, start: float, end: float) -> float:
    mask = (ts >= start) & (ts <= end)
    if not np.any(mask):
        return 0.0
    return float(np.mean(diffs[mask]))

def audio_rms(mp4_path: str, start: float, end: float) -> float:
    dur = max(0.0, end - start)
    if dur <= 0.0:
        return 0.0
    try:
        y, sr = librosa.load(mp4_path, sr=None, offset=max(0.0, start), duration=dur)
        if y.size == 0:
            return 0.0
        return float(np.sqrt(np.mean(y**2)))
    except Exception:
        return 0.0

def compute_features_for_video(mp4_path: str, vtt_path: Optional[str], min_seg: float, max_seg: float, scene_thresh: float) -> List[Chunk]:
    video_id = basename_noext(mp4_path)
    captions = parse_vtt(vtt_path) if vtt_path else []
    ts, diffs = sample_video_histograms(mp4_path, fps_sample=2.0)
    if len(ts) == 0:
        raise RuntimeError("Failed to sample frames.")
    bounds = detect_scenes(ts, diffs, thresh=scene_thresh) or [(0.0, float(ts[-1]))]
    chunks = make_chunks(bounds, min_len=min_seg, max_len=max_seg, video_id=video_id)
    for c in tqdm(chunks, desc=f"Features {video_id}"):
        c.motion = avg_motion(diffs, ts, c.start, c.end)
        c.audio = audio_rms(mp4_path, c.start, c.end)
        c.cap_overlap = caption_overlap(captions, c.start, c.end) if captions else 0.0
        c.kw_density = caption_keyword_density(captions, c.start, c.end) if captions else 0.0
    # baseline score (used for pseudo-labels later)
    m_n = normalize([c.motion for c in chunks])
    a_n = normalize([c.audio  for c in chunks])
    t_n = normalize([0.5*c.cap_overlap + 0.5*c.kw_density for c in chunks])
    for i, c in enumerate(chunks):
        c.score = float(0.4*m_n[i] + 0.4*a_n[i] + 0.2*t_n[i])
    return chunks

def greedy_select(chunks: List[Chunk], target_len: float, min_gap: float) -> List[Chunk]:
    chosen, used_starts, total = [], [], 0.0
    for c in sorted(chunks, key=lambda x: x.score, reverse=True):
        if total >= target_len * 0.98:
            break
        if any(abs(c.start - s) < min_gap for s in used_starts):
            continue
        dur = c.end - c.start
        if total + dur > target_len + 2.0:
            continue
        chosen.append(c)
        used_starts.append(c.start)
        total += dur
    return chosen

def render_trailer(mp4_path: str, chunks: List[Chunk], out_mp4: str, target_len: float, min_seg: float):
    selected = greedy_select(chunks, target_len=target_len, min_gap=min_seg/2.0)
    if not selected:
        raise RuntimeError("No chunks selected for trailer.")
    tmp_dir = os.path.join(os.path.dirname(out_mp4), "_tmp"); os.makedirs(tmp_dir, exist_ok=True)
    parts = []
    for i, c in enumerate(selected):
        part = os.path.join(tmp_dir, f"part_{i:03d}.mp4")
        cmd = [
            "ffmpeg","-y","-ss",f"{c.start:.3f}","-to",f"{c.end:.3f}",
            "-i", mp4_path, "-c:v","libx264","-preset","veryfast","-crf","23",
            "-c:a","aac","-b:a","128k", part
        ]
        try:
            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except Exception as e:
            raise RuntimeError("ffmpeg is required on PATH. Please install ffmpeg.") from e
        parts.append(part)
    filelist = os.path.join(tmp_dir, "files.txt")
    with open(filelist,"w",encoding="utf-8") as f:
        for p in parts:
            f.write(f"file '{os.path.abspath(p)}'\n")
    os.makedirs(os.path.dirname(out_mp4), exist_ok=True)
    subprocess.run(["ffmpeg","-y","-safe","0","-f","concat","-i",filelist,"-c","copy", out_mp4], check=True)
    shutil.rmtree(tmp_dir, ignore_errors=True)
    print("🎬 Trailer saved:", out_mp4)

In [5]:
import pandas as pd, os

# Use your uploaded CSV path (update if needed)
csv_path = os.path.join(WORKDIR, "video_ids.csv")  # or move it into your project folder
if not os.path.exists(csv_path):
    raise FileNotFoundError(f"Could not find CSV at {csv_path}")

# Read file and build YouTube URLs list
df_ids = pd.read_csv(csv_path)
urls = []

if "url" in df_ids.columns:
    urls.extend(df_ids["url"].dropna().astype(str).tolist())

if "video_id" in df_ids.columns:
    for v in df_ids["video_id"].dropna().astype(str):
        v = v.strip()
        if v.startswith("http://") or v.startswith("https://"):
            urls.append(v)
        else:
            urls.append(f"https://www.youtube.com/watch?v={v}")

# Remove duplicates
urls = list(dict.fromkeys(urls))

print(f"✅ Loaded {len(urls)} video(s) from {os.path.basename(csv_path)}")
for u in urls[:10]:
    print("  ", u)

✅ Loaded 100 video(s) from video_ids.csv
   https://www.youtube.com/watch?v=vH1Dj4_JKVU
   https://www.youtube.com/watch?v=Ipm6V_tOFHc
   https://www.youtube.com/watch?v=IcxZa7HOW1o
   https://www.youtube.com/watch?v=AJ1-WE1B2Ss
   https://www.youtube.com/watch?v=kjsqQPMTbd4
   https://www.youtube.com/watch?v=iaFl3AMRvEs
   https://www.youtube.com/watch?v=mgmVOuLgFB0
   https://www.youtube.com/watch?v=dip7rRT79gk
   https://www.youtube.com/watch?v=43gpODUhTCQ
   https://www.youtube.com/watch?v=FhTsFjsofrw


In [None]:
# 6) Download & extract features (for ALL videos in CSV)
min_seg, max_seg, scene_thresh = 2.0, 6.0, 0.55
raw_dir = os.path.join(WORKDIR, "raw")
features_dir = os.path.join(WORKDIR, "features")

for url in tqdm(urls):
    try:
        mp4, vtt = download_video(
            url, raw_dir,
            write_subs=WRITE_SUBS, cookies_path=COOKIES_PATH,
            sleep_requests=SLEEP_REQUESTS, max_sleep_interval=MAX_SLEEP_INTERVAL
        )
        chunks = compute_features_for_video(mp4, vtt, min_seg, max_seg, scene_thresh)
        out_csv = os.path.join(features_dir, f"{basename_noext(mp4)}.csv")
        pd.DataFrame([asdict(c) for c in chunks]).to_csv(out_csv, index=False)
        print("✅ Saved features →", out_csv)
    except Exception as e:
        print("⚠️ Error on URL:", url, "→", e)

In [None]:
# 7) Use the whole dataset from data/features/*.csv → auto-label → train & evaluate

def load_full_dataset(features_dir: str) -> pd.DataFrame:
    files = [os.path.join(features_dir, f) for f in os.listdir(features_dir) if f.endswith(".csv")]
    if not files:
        raise RuntimeError("No feature CSV files found. Run the extraction step first.")
    df = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)
    print(f"Loaded {len(df)} total chunks from {len(files)} videos.")
    return df

# Load all feature files
full_df = load_full_dataset(features_dir)

# Automatically generate pseudo-labels: top 30% score as highlight (label=1)
threshold = full_df["score"].quantile(0.70)
full_df["label"] = (full_df["score"] >= threshold).astype(int)
print(f"Top 30% clips labeled as 1 (highlight) → {full_df['label'].sum()} positives / {len(full_df)} total")

# Train/test split
feat_cols = ["motion", "audio", "cap_overlap", "kw_density"]
X_train, X_test, y_train, y_test = train_test_split(
    full_df[feat_cols], full_df["label"], test_size=0.2, random_state=42, stratify=full_df["label"]
)

# Train quick model
clf = LogisticRegression(max_iter=1000, class_weight="balanced")
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)
y_prob = clf.predict_proba(X_test)[:, 1]

# Evaluate
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, zero_division=0)
rec = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
auc = roc_auc_score(y_test, y_prob)

print("✅ Model Evaluation (auto-labeled highlights)")
print(f"Accuracy : {acc:.3f}")
print(f"Precision: {prec:.3f}")
print(f"Recall   : {rec:.3f}")
print(f"F1-score : {f1:.3f}")
print(f"ROC-AUC  : {auc:.3f}")

# Save model
model_path = os.path.join(WORKDIR, "models", "highlight_model.pkl")
os.makedirs(os.path.dirname(model_path), exist_ok=True)
joblib.dump(clf, model_path)
print("Model saved →", model_path)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.title("Confusion Matrix")
plt.tight_layout()
plt.savefig(os.path.join(WORKDIR, "models", "confusion_matrix.png"))
plt.close()

# Feature importance (coefs)
importance = pd.Series(clf.coef_[0], index=feat_cols).sort_values()
plt.figure(figsize=(6,3.5))
importance.plot(kind="barh")
plt.title("Feature Importance (LogReg Coefficients)")
plt.tight_layout()
plt.savefig(os.path.join(WORKDIR, "models", "feature_importance.png"))
plt.close()

In [None]:
# 8) Build a trailer for one video (requires ffmpeg)
# Pick the last downloaded mp4 (change as needed)
mp4_list = sorted([p for p in os.listdir(raw_dir) if p.endswith(".mp4")])
if not mp4_list:
    raise RuntimeError("No MP4 found in raw/. Download step may have failed.")
mp4_path = os.path.join(raw_dir, mp4_list[-1])

# Recompute features (no captions by default)
chunks = compute_features_for_video(mp4_path, None, 2.0, 6.0, 0.55)

# Score with the trained model
Xf = np.array([[c.motion, c.audio, c.cap_overlap, c.kw_density] for c in chunks], dtype=np.float32)
scores = clf.predict_proba(Xf)[:, 1]
mn, mx = float(np.min(scores)), float(np.max(scores))
norm_scores = (scores - mn) / (mx - mn + 1e-9)
for i, c in enumerate(chunks):
    c.score = float(norm_scores[i])

# Render trailer
out_mp4 = os.path.join(WORKDIR, "out", f"{basename_noext(mp4_path)}_trailer.mp4")
render_trailer(mp4_path, chunks, out_mp4, target_len=20.0, min_seg=2.0)
print("Trailer:", out_mp4)

In [6]:
# A) Preference helpers (fast, 3–4 knobs) to blend with model scores

def compute_pref_weights(style="action", focus="both"):
    # simple weights (sum to 1) — only 3 signals: motion, audio, text
    w_motion, w_audio, w_text = 0.4, 0.4, 0.2
    style = (style or "action").strip().lower()
    focus = (focus or "both").strip().lower()

    if style == "action":
        w_motion, w_audio, w_text = 0.55, 0.35, 0.10
    elif style == "emotional":
        w_motion, w_audio, w_text = 0.20, 0.30, 0.50
    elif style == "funny":
        w_motion, w_audio, w_text = 0.30, 0.25, 0.45
    elif style == "informative":
        w_motion, w_audio, w_text = 0.10, 0.20, 0.70

    if focus == "dialogue":
        w_text += 0.15
    elif focus == "visuals":
        w_motion += 0.15

    total = w_motion + w_audio + w_text
    return (w_motion/total, w_audio/total, w_text/total)

def blend_model_with_prefs(chunks, model_scores, style="action", focus="both", blend_alpha=0.7):
    """
    blend_alpha in [0,1]: 1.0 uses only model, 0.0 uses only prefs.
    """
    wm, wa, wt = compute_pref_weights(style, focus)
    m_norm = normalize([c.motion for c in chunks])
    a_norm = normalize([c.audio for c in chunks])
    t_norm = normalize([0.5*c.cap_overlap + 0.5*c.kw_density for c in chunks])
    pref_scores = (wm*m_norm) + (wa*a_norm) + (wt*t_norm)

    # Normalize both streams then blend
    ms = np.asarray(model_scores, dtype=np.float32)
    ms = (ms - ms.min()) / (ms.max() - ms.min() + 1e-9)
    ps = (pref_scores - pref_scores.min()) / (pref_scores.max() - pref_scores.min() + 1e-9)

    final = blend_alpha*ms + (1.0-blend_alpha)*ps
    # Write scores back into chunks for downstream selection
    for i, c in enumerate(chunks):
        c.score = float(final[i])
    return final

In [7]:
# B) Take user input → build trailer with trained model + preferences
# Paste this AFTER training so highlight_model.pkl exists.

# Load model if needed
model_path = os.path.join(WORKDIR, "models", "highlight_model.pkl")
if "clf" not in globals():
    if not os.path.exists(model_path):
        raise RuntimeError("Trained model not found. Run the training cell first.")
    clf = joblib.load(model_path)

# 1) Let user choose the source video
print("Choose a source video:")
print("  1) Enter a YouTube URL now")
print("  2) Pick by index from your CSV list (data/video_ids.csv)")
choice = input("Enter 1 or 2 [default=2]: ").strip() or "2"

if choice == "1":
    url = input("Paste YouTube URL: ").strip()
    if not (url.startswith("http://") or url.startswith("https://")):
        raise ValueError("Please provide a full YouTube URL.")
else:
    # Read from your CSV in WORKDIR
    csv_path = os.path.join(WORKDIR, "video_ids.csv")
    if not os.path.exists(csv_path):
        # fallback: try to copy if you used /mnt/data upload earlier
        import shutil
        src = "/mnt/data/video_ids.csv"
        if os.path.exists(src):
            shutil.copy(src, csv_path)
    df_pick = pd.read_csv(csv_path)
    url_list = []
    if "url" in df_pick.columns:
        url_list += df_pick["url"].dropna().astype(str).str.strip().tolist()
    if "video_id" in df_pick.columns:
        for v in df_pick["video_id"].dropna().astype(str).str.strip():
            url_list.append(v if v.startswith("http") else f"https://www.youtube.com/watch?v={v}")
    # dedupe
    seen = set(); url_list = [u for u in url_list if not (u in seen or seen.add(u))]
    if not url_list:
        raise RuntimeError("No rows found in CSV. Add at least one video_id or url.")

    print(f"\nFound {len(url_list)} entries:")
    for i, u in enumerate(url_list[:20]):  # show first 20
        print(f"  [{i}] {u}")
    idx = input(f"Pick an index [0..{len(url_list)-1}, default=0]: ").strip() or "0"
    idx = int(idx)
    if not (0 <= idx < len(url_list)):
        raise ValueError("Index out of range.")
    url = url_list[idx]

print("\nPreferences (leave blank for defaults)")
style = (input("Style [action/emotional/funny/informative] (default: action): ").strip() or "action")
focus = (input("Focus [dialogue/visuals/both] (default: both): ").strip() or "both")
try:
    target_len = float(input("Target trailer length in seconds (default: 20): ").strip() or 20.0)
except Exception:
    target_len = 20.0
try:
    alpha = float(input("Blend α (0=prefs only, 1=model only) (default: 0.7): ").strip() or 0.7)
    alpha = min(max(alpha, 0.0), 1.0)
except Exception:
    alpha = 0.7

# 2) Download the chosen video (no subtitles by default to avoid 429)
raw_dir = os.path.join(WORKDIR, "raw")
mp4_path, vtt_path = download_video(
    url, raw_dir,
    write_subs=WRITE_SUBS,
    cookies_path=COOKIES_PATH,
    sleep_requests=SLEEP_REQUESTS,
    max_sleep_interval=MAX_SLEEP_INTERVAL
)

# 3) Extract features
chunks = compute_features_for_video(mp4_path, vtt_path if WRITE_SUBS else None, min_seg=2.0, max_seg=6.0, scene_thresh=0.55)

# 4) Score with trained model and blend with preferences
Xf = np.array([[c.motion, c.audio, c.cap_overlap, c.kw_density] for c in chunks], dtype=np.float32)
proba = clf.predict_proba(Xf)[:, 1]
_ = blend_model_with_prefs(chunks, proba, style=style, focus=focus, blend_alpha=alpha)

# 5) Render the trailer
out_mp4 = os.path.join(WORKDIR, "out", f"{basename_noext(mp4_path)}_trailer.mp4")
render_trailer(mp4_path, chunks, out_mp4, target_len=target_len, min_seg=2.0)
print("\n✅ Done. Trailer saved at:", out_mp4)

Choose a source video:
  1) Enter a YouTube URL now
  2) Pick by index from your CSV list (data/video_ids.csv)


Enter 1 or 2 [default=2]:  1
Paste YouTube URL:  https://www.youtube.com/watch?v=KflMqooMzF8



Preferences (leave blank for defaults)


Style [action/emotional/funny/informative] (default: action):  informative
Focus [dialogue/visuals/both] (default: both):  both
Target trailer length in seconds (default: 20):  
Blend α (0=prefs only, 1=model only) (default: 0.7):  0.5


         player = https://www.youtube.com/s/player/5cf6312f/player_ias.vflset/en_US/base.js
         n = NGi04ZZmxNBw8aO ; player = https://www.youtube.com/s/player/5cf6312f/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


                                                                          

  from pkg_resources import resource_filename
  y, sr = librosa.load(mp4_path, sr=None, offset=max(0.0, start), duration=dur)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(mp4_path, sr=None, offset=max(0.0, start), duration=dur)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
Features KflMqooMzF8: 100%|███████████████████| 673/673 [05:06<00:00,  2.20it/s]
  ret = a @ b
  ret = a @ b
  ret = a @ b


🎬 Trailer saved: /Users/dhruvpatel/Desktop/trailer_ai_package/data/out/KflMqooMzF8_trailer.mp4

✅ Done. Trailer saved at: /Users/dhruvpatel/Desktop/trailer_ai_package/data/out/KflMqooMzF8_trailer.mp4


ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 17.0.0 (clang-1700.3.19.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/8.0_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --e

  from pkg_resources import resource_filename
  y, sr = librosa.load(mp4_path, sr=None, offset=max(0.0, start), duration=dur)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(mp4_path, sr=None, offset=max(0.0, start), duration=dur)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
Features KflMqooMzF8: 100%|███████████████████| 673/673 [11:46<00:00,  1.05s/it]
  ret = a @ b
  ret = a @ b
  ret = a @ b


🎬 Trailer saved: /Users/dhruvpatel/Desktop/trailer_ai_package/data/out/KflMqooMzF8_trailer.mp4

✅ Done. Trailer saved at: /Users/dhruvpatel/Desktop/trailer_ai_package/data/out/KflMqooMzF8_trailer.mp4


ffmpeg version 8.0 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 17.0.0 (clang-1700.3.19.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/8.0_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --e