In [2]:
from dotenv import load_dotenv
import os
from app.backend.pipeline.transcribe import transcribe_video
from app.backend.pipeline.summarize import select_story_segments
from app.backend.pipeline.video_utils import concat_clips, cut_segments
from app.backend.utils import hash_file, save_pickle, load_pickle
from pathlib import Path

load_dotenv()

print(os.getenv("OPENAI_API_KEY") is not None)
print(os.getenv("HUGGINGFACE_TOKEN") is not None)

# MATT - Use this to skip the transciprtion and diarization steps if the video exists in cache here (based on hash of video)
ENABLE_CACHE = True
CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(parents=True, exist_ok=True)

True
True


In [19]:
input_path = "rawfootage_mary(Interview Original).mp4"

# --- Define the pipeline ---
"""
Run AutoCut pipeline with caching on the transcription (SRT) *content*.
All downstream artifacts are stored under ./artifacts/<file_hash>/.
"""
input_path = Path(input_path)
hf_token = os.getenv("HUGGINGFACE_TOKEN")
file_hash = hash_file(input_path)

base_dir = Path("artifacts") / file_hash
base_dir.mkdir(parents=True, exist_ok=True)

cache_path = Path("cache") / f"{file_hash}.srt.pkl"

print(f"üé• Processing: {input_path.name}")
print(f"üîë Hash: {file_hash}")
print(f"üìÅ Outputs will be saved to: {base_dir}\n")

# --- Stage 1: Transcription (cached) ---
if ENABLE_CACHE and cache_path.exists():
    print(f"‚úÖ Using cached SRT text: {cache_path}")
    srt_text = load_pickle(cache_path)
    srt_path = base_dir / "transcription.srt"
    srt_path.write_text(srt_text, encoding="utf-8")
else:
    print("‚öôÔ∏è Transcribing video with GPU (this may take a while)...")
    srt_path = transcribe_video(input_path, hf_token)

    # Read the file text and cache the content
    srt_text = Path(srt_path).read_text(encoding="utf-8")
    save_pickle(srt_text, cache_path)
    print(f"üíæ Cached SRT text at: {cache_path}")

    # Move it under artifacts/<hash>/ for consistency
    new_srt_path = base_dir / "transcription.srt"
    Path(srt_path).replace(new_srt_path)
    srt_path = new_srt_path

üé• Processing: rawfootage_mary(Interview Original).mp4
üîë Hash: 6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146
üìÅ Outputs will be saved to: artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146

‚úÖ Using cached SRT text: cache/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146.srt.pkl


In [20]:
# --- Stage 2‚Äì4 ---
print("\n‚úÇÔ∏è Selecting story segments...")
story_json = select_story_segments(srt_path, max_duration=120, model="gpt-5")
story_json_out = base_dir / "segments.json"
Path(story_json).replace(story_json_out) if Path(story_json) != story_json_out else None
story_json = story_json_out


‚úÇÔ∏è Selecting story segments...
‚è≥ Calling LLM to select story segments...
‚úÖ Saved 16 story segments to artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146/transcription.story_segments.json


In [21]:
print("üéûÔ∏è Cutting segments...")
clips_dir = cut_segments(input_path, story_json, output_dir=base_dir / "clips")

print("üé¨ Concatenating clips...")
final_video = concat_clips(clips_dir, output_path=base_dir / "final_video.mp4")

print("\n‚úÖ Done!")

üéûÔ∏è Cutting segments...
‚úÖ clip_01_00-00-28.120_00-00-36.440.mp4
‚úÖ clip_02_00-01-08.560_00-01-18.080.mp4
‚úÖ clip_03_00-01-19.480_00-01-24.480.mp4
‚úÖ clip_04_00-01-25.480_00-01-30.480.mp4
‚úÖ clip_05_00-01-32.040_00-01-36.560.mp4
‚úÖ clip_06_00-01-38.240_00-01-47.560.mp4
‚úÖ clip_07_00-01-49.120_00-01-58.880.mp4
‚úÖ clip_08_00-01-59.840_00-02-12.800.mp4
‚úÖ clip_09_00-02-14.160_00-02-17.520.mp4
‚úÖ clip_10_00-02-28.320_00-02-38.560.mp4
‚úÖ clip_11_00-03-01.520_00-03-03.440.mp4
‚úÖ clip_12_00-03-11.920_00-03-19.600.mp4
‚úÖ clip_13_00-03-20.560_00-03-25.600.mp4
‚úÖ clip_14_00-03-29.040_00-03-36.400.mp4
‚úÖ clip_15_00-13-28.080_00-13-40.720.mp4
‚úÖ clip_16_00-17-05.520_00-17-18.720.mp4
üé¨ Concatenating clips...
‚úÖ Concatenated 16 clips into artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146/final_video.mp4

‚úÖ Done!


In [22]:
outputs = {
    "srt": str(srt_path),
    "segments_json": str(story_json),
    "clips_dir": str(clips_dir),
    "final_video": str(final_video),
}

for k, v in outputs.items():
    print(f"  {k}: {v}")


  srt: artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146/transcription.srt
  segments_json: artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146/segments.json
  clips_dir: artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146/clips
  final_video: artifacts/6663679a4f42dfc5e059e497045ae07a8421bb707ac8490eeb3e34f417031146/final_video.mp4
