In [None]:
import whisper

# Path to your input mp4 file
input_path = "rawfootage_mary(Interview Original).mp4"
output_srt = "output.srt"

# Load model
model = whisper.load_model("base")  # or "small", "medium", "large"

# Transcribe
result = model.transcribe(input_path, verbose=True)

# Write to SRT
def write_srt(segments, filename):
    with open(filename, "w", encoding="utf-8") as f:
        for i, seg in enumerate(segments, start=1):
            start = seg["start"]
            end = seg["end"]
            text = seg["text"].strip()
            f.write(f"{i}\n")
            f.write(f"{format_timestamp(start)} --> {format_timestamp(end)}\n")
            f.write(f"{text}\n\n")

def format_timestamp(seconds: float) -> str:
    h, m, s = int(seconds // 3600), int((seconds % 3600) // 60), seconds % 60
    return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")

write_srt(result["segments"], output_srt)

print(f"✅ SRT saved to {output_srt}")


In [2]:
from pathlib import Path
import re

def extract_text_from_srt(srt_path: str) -> str:
    text_lines = []
    pattern = re.compile(r"^\d+$|-->|^\s*$")  # skip indices & timestamps
    with open(srt_path, "r", encoding="utf-8") as f:
        for line in f:
            if not pattern.match(line):
                text_lines.append(line.strip())
    return " ".join(text_lines)

srt_text = extract_text_from_srt("output.srt")
print(srt_text[:500])  # sanity check


00:00:00,000 --> 00:00:03,080 Okay, ready to get started? 00:00:03,080 --> 00:00:03,920 Okay. 00:00:03,920 --> 00:00:06,600 So start out and just can you introduce yourself, 00:00:06,600 --> 00:00:10,080 so say your first name and how long you have been living 00:00:10,080 --> 00:00:12,560 at sunrise of Alexandria. 00:00:12,560 --> 00:00:16,120 Okay, my name is Mary Sikoski. 00:00:16,120 --> 00:00:19,200 I've been living here at sunrise almost five months. 00:00:19,200 --> 00:00:22,480 It'll be 


In [None]:
import os
import json
from openai import OpenAI

API_KEY = os.getenv("OPENAI_API_KEY")

if not API_KEY:
    raise ValueError("Please set the OPENAI_API_KEY environment variable.")

client = OpenAI(
    api_key = API_KEY
)

max_duration = 120
model_name = "gpt-4o"#-mini

# --- build prompt ---
system_prompt = f"""
You are a video editor creating a concise {max_duration}-second story from a full interview transcript in SRT format.
Your task is to down-select the most meaningful spoken moments that together form a cohesive story.

Return only the essential timestamped dialogue segments that preserve:
- chronological order
- emotional continuity and completeness of thought
- total combined runtime of about {max_duration} seconds (±10 seconds)
- use timestamps exactly as in the transcript
- do not fabricate or merge lines from different timestamps
- ignore visual or b-roll info
- preserve tone and authenticity

Output only valid JSON: a list of objects like
[
  {{"start": "HH:MM:SS.xx", "end": "HH:MM:SS.xx", "spoken_text": "verbatim text"}}
]
"""

user_prompt = f"Here is the full SRT transcript:\n\n{srt_text}\n\nReturn only the JSON list."

resp = client.chat.completions.create(
    model=model_name,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ],
    temperature=0.2,
)

print(resp.choices[0].message.content)

```json
[
  {"start": "00:00:12,560", "end": "00:00:16,120", "spoken_text": "Okay, my name is Mary Sikoski."},
  {"start": "00:00:16,120", "end": "00:00:19,200", "spoken_text": "I've been living here at sunrise almost five months."},
  {"start": "00:00:56,800", "end": "00:01:00,920", "spoken_text": "So can you talk a little bit about sunrise of Alexandria, what stood out to you, what you noticed about this community?"},
  {"start": "00:01:02,160", "end": "00:01:13,080", "spoken_text": "Well, as I've told almost anybody that I've met who's coming in here and I try to encourage them, I think this is a wonderful place for people to come to live."},
  {"start": "00:01:13,080", "end": "00:01:25,480", "spoken_text": "Sunrise embodies a lot of things that everyone wants, security, kindness, happiness, a happy face, good food, a good bed, and people who care about what's happening to you."},
  {"start": "00:01:25,480", "end": "00:01:32,040", "spoken_text": "So that's what I find as sunrise and

In [32]:
raw = resp.choices[0].message.content.strip()

# --- clean fences if present ---
if "```" in raw:
    raw = re.sub(r"^```[a-zA-Z0-9]*\n?", "", raw)
    raw = raw.replace("```", "").strip()

# --- parse JSON safely ---
try:
    segments = json.loads(raw)
except json.JSONDecodeError:
    Path("story_segments_raw.txt").write_text(raw, encoding="utf-8")
    raise RuntimeError("Model output was not valid JSON; raw text saved to story_segments_raw.txt")

# --- save clean JSON ---
Path("story_segments.json").write_text(json.dumps(segments, indent=2), encoding="utf-8")
print(f"✅ Saved {len(segments)} cleaned segments to story_segments.json")


def normalize_ts(ts: str) -> str:
    """Convert SRT-style '00:00:12,560' -> '00:00:12.560'"""
    return re.sub(r",", ".", ts.strip())

✅ Saved 64 cleaned segments to story_segments.json


In [33]:
import json, subprocess
from pathlib import Path

input_video = "rawfootage_mary(Interview Original).mp4"
segments_file = "story_segments.json"
output_dir = Path("trimmed_clips")
output_dir.mkdir(exist_ok=True)

segments = json.loads(Path(segments_file).read_text(encoding="utf-8"))

def ffmpeg_trim(input_path, start, end, output_path):
    """Trim reliably, allowing sub-second accuracy."""
    cmd = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel", "error",      # quieter logs
        "-ss", start,
        "-to", end,
        "-i", input_path,
        "-c:v", "libx264",         # re-encode ensures playable output
        "-c:a", "aac",
        "-strict", "experimental",
        "-y",
        str(output_path)
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"❌ ffmpeg failed for {output_path.name}")
        print(result.stderr)
    else:
        print(f"✅ {output_path.name}")

for i, seg in enumerate(segments, start=1):
    start, end = normalize_ts(seg["start"]), normalize_ts(seg["end"])
    clip_name = f"clip_{i:02d}_{start.replace(':', '-')}_{end.replace(':', '-')}.mp4"
    out_path = output_dir / clip_name
    ffmpeg_trim(input_video, start, end, out_path)

print(f"\nAll done. Check clips in {output_dir.resolve()}")


✅ clip_01_00-00-12.560_00-00-16.120.mp4
✅ clip_02_00-00-16.120_00-00-19.200.mp4
✅ clip_03_00-00-56.800_00-01-00.920.mp4
✅ clip_04_00-01-02.160_00-01-13.080.mp4
✅ clip_05_00-01-13.080_00-01-25.480.mp4
✅ clip_06_00-01-25.480_00-01-32.040.mp4
✅ clip_07_00-01-38.240_00-01-52.000.mp4
✅ clip_08_00-01-52.000_00-01-58.880.mp4
✅ clip_09_00-01-59.840_00-02-09.920.mp4
✅ clip_10_00-02-14.160_00-02-28.320.mp4
✅ clip_11_00-02-28.320_00-02-37.120.mp4
✅ clip_12_00-03-01.520_00-03-05.280.mp4
✅ clip_13_00-03-09.440_00-03-20.560.mp4
✅ clip_14_00-03-46.320_00-03-54.960.mp4
✅ clip_15_00-04-00.320_00-04-02.160.mp4
✅ clip_16_00-04-02.800_00-04-08.800.mp4
✅ clip_17_00-04-11.840_00-04-26.880.mp4
✅ clip_18_00-04-27.520_00-04-31.600.mp4
✅ clip_19_00-05-01.440_00-05-06.160.mp4
✅ clip_20_00-05-09.920_00-05-11.600.mp4
✅ clip_21_00-05-11.600_00-05-16.160.mp4
✅ clip_22_00-05-31.040_00-05-34.160.mp4
✅ clip_23_00-05-36.080_00-05-46.160.mp4
✅ clip_24_00-05-52.400_00-05-55.120.mp4
✅ clip_25_00-06-01.600_00-06-03.120.mp4


In [34]:
import subprocess
from pathlib import Path

# --- config ---
clips_dir = Path("trimmed_clips")
output_video = "story_concat.mp4"

# --- collect clips in natural order ---
clip_files = sorted(clips_dir.glob("*.mp4"))

# --- create ffmpeg concat list file ---
concat_file = Path("concat_list.txt")
with concat_file.open("w", encoding="utf-8") as f:
    for clip in clip_files:
        # ffmpeg concat demuxer requires "file 'path'"
        f.write(f"file '{clip.resolve()}'\n")

# --- concat using ffmpeg demuxer ---
cmd = [
    "ffmpeg",
    "-y",
    "-f", "concat",
    "-safe", "0",
    "-i", str(concat_file),
    "-c", "copy",
    output_video
]

subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(f"✅ Concatenated {len(clip_files)} clips into {output_video}")


✅ Concatenated 64 clips into story_concat.mp4
