# Setup

In [1]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [4]:
# take openai key as input secret
import getpass
openai_key = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key:  ········


In [5]:
import os
os.environ["OPENAI_API_KEY"] = openai_key

# Read Your Story

In [45]:
import os
from pathlib import Path
data_dir = Path(os.getcwd()) / "data" / "0"
parts = []
files = [f for f in os.listdir(data_dir) if f.endswith('.txt')]
logger.info(f"Files in data directory: {files}")
for f in files:
    with open(data_dir / f, 'r', encoding='utf8') as file:
        content = file.read()
        parts.append(content)
logger.info(f"Read {len(parts)} parts from the story files.")


INFO:__main__:Files in data directory: ['0.txt', '1.txt', '2.txt', '3.txt']
INFO:__main__:Read 4 parts from the story files.


In [46]:
parts[1]

'The Arrival\nIt was past 8:00 PM when Riya, Kabir, and Mehul entered the silent city of Bansipur.\nA faint mist curled along the cracked roads, the streetlamps flickering as if unsure they wanted to stay lit.\nThey had been driving for hours, lost after taking a wrong turn from the highway. Fuel gauge—dangerously low.\nBansipur looked deserted, except for one building at the end of the main road:\na bright, glowing sign that read “Mehta Super Mart – Always Open.”\nRiya gave a nervous laugh.\n“Creepy or not, we need snacks. And water. And… maybe a map.”\nThey parked, the sound of their car engine echoing too loudly in the empty street.\nThe sliding doors of the supermarket opened with a slow hiss, though no one stood behind the counter.\nInside, the air was too cold for summer.\nThe lights buzzed overhead, but the aisles were perfectly stocked—cereal boxes lined like soldiers, canned goods gleaming, fruits unnaturally shiny.\nMehul called out, “Hello? Anyone here?”\nOnly the sound of t

# Scenes Creation

In [8]:
prompt = """
You are a YouTube Shorts storyboard generator and animation planner.

I will give you a story. Split it into coherent scenes for a short vertical video (9:16). Ensure narration, visuals, and motion match exactly.

IMPORTANT: EVERY sentence from the story MUST appear in narration_text of a scene.
Do not skip, merge, or truncate sentences. Ignore duration or scene count if it conflicts with this.

## Style
- The `style` field is a **single descriptive string** capturing the cinematic style for the entire video.
- Include lens type, film format, lighting, mood, visual tone, and any director/genre references.
- This string will guide SDXL’s rendering across all scenes.

---

## Scene Rules
- Chunking: Default = one scene per 2-3 sentences. Also merge if sentence ≤10 words or incomplete. Split if multiple locations/actions.
- Continuity: Narration and visuals represent the same moment.
- Perspective: Specify exact camera POV.
- Sound: Suggest background music or ambient sound per scene.
- Consistency: Keep characters/props/environment consistent unless narration changes.
- Aspect Ratio: Vertical 9:16.
- Choose values according to **how similar the current scene is to the previous**.
- Do not truncate narration to satisfy strength.

---

## Visual Prompts
Each scene has visual_prompts (array of objects):
- stage_type: "base", "lighting", or "details"
- prompt: description text
Omit unused stages.

---

## Animation
Each scene must include:
- animation_type: "Ken Burns", "Parallax", "Cinemagraph", "Dolly Zoom", or "Static"
- ffmpeg_command: string showing how to apply effect

Example:
```json
{{
  "animation_type": "Ken Burns",
  "ffmpeg_command": "-vf zoompan=d=250:fps=25,scale=1080:1920"
}}
Story:
{story_text}
"""


In [9]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
template = ChatPromptTemplate.from_template(prompt)

In [10]:
from pydantic import BaseModel, Field
from typing import List, Optional, Literal


class GeographicLocation(BaseModel):
    country: Optional[str] = None
    specific_location: Optional[str] = None


class TimePeriod(BaseModel):
    era: Optional[str] = None
    time_of_day: Optional[str] = None


class Weather(BaseModel):
    condition: Optional[str] = None
    details: Optional[str] = None


class Character(BaseModel):
    name: str
    role: Literal["hero", "narrator", "bystander", "antagonist", "supporting"]
    visual_features: Optional[str] = None
    psychological_features: Optional[str] = None


class Profile(BaseModel):
    geographic_location: GeographicLocation
    time_period: TimePeriod
    weather: Weather
    ethnicity: Optional[str] = None
    mood: Optional[str] = None
    characters: List[Character] = []


class VisualPromptStage(BaseModel):
    stage_type: Literal["base", "lighting", "details"]
    prompt: str


class Style(BaseModel):
    style_description: str


class Animation(BaseModel):
    animation_type: Literal["Ken Burns", "Parallax", "Cinemagraph", "Dolly Zoom", "Static"]
    ffmpeg_command: str = Field(..., description="ffmpeg command to apply this animation")


class Subtitle(BaseModel):
    index: int = Field(..., description="Sequential subtitle index (starts at 1 for each scene)")
    start_time: str = Field(..., description="Start time in HH:MM:SS,mmm")
    end_time: str = Field(..., description="End time in HH:MM:SS,mmm")
    text: str = Field(..., description="Subtitle text, ≤42 chars per line")


class Scene(BaseModel):
    scene_index: int
    narration_text: str
    subtitle_text: str
    visual_prompts: List[VisualPromptStage]
    background_music: str
    animation: Animation
    duration_sec: int
    strength_from_previous: float = Field(..., ge=0.0, le=1.0, description="Scene transition blending strength")
    subtitles: List[Subtitle] = Field(..., description="Per-scene subtitles in SRT format")


class StoryBoard(BaseModel):
    profile: Profile
    style: Style
    scenes: List[Scene]


In [11]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0.2,
    openai_api_key=openai_key,
)

llm = llm.with_structured_output(StoryBoard)  # Enable structured output for Scenes model

In [12]:
chain = template | llm

In [13]:
result = chain.invoke({
    "story_text": "\n\n".join(parts)
})


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [14]:
"\n\n".join(parts)

'Title: They are still there\n\nThe Arrival\nIt was past 8:00 PM when Riya, Kabir, and Mehul entered the silent city of Bansipur.\nA faint mist curled along the cracked roads, the streetlamps flickering as if unsure they wanted to stay lit.\nThey had been driving for hours, lost after taking a wrong turn from the highway. Fuel gauge—dangerously low.\nBansipur looked deserted, except for one building at the end of the main road:\na bright, glowing sign that read “Mehta Super Mart – Always Open.”\nRiya gave a nervous laugh.\n“Creepy or not, we need snacks. And water. And… maybe a map.”\nThey parked, the sound of their car engine echoing too loudly in the empty street.\nThe sliding doors of the supermarket opened with a slow hiss, though no one stood behind the counter.\nInside, the air was too cold for summer.\nThe lights buzzed overhead, but the aisles were perfectly stocked—cereal boxes lined like soldiers, canned goods gleaming, fruits unnaturally shiny.\nMehul called out, “Hello? Any

In [15]:
len(result.scenes)

20

In [16]:
result.profile

Profile(geographic_location=GeographicLocation(country='India', specific_location='Bansipur'), time_period=TimePeriod(era='Modern', time_of_day='Night'), weather=Weather(condition='Misty', details='Faint mist curling along roads'), ethnicity='South Asian', mood='Eerie, suspenseful', characters=[Character(name='Riya', role='hero', visual_features='Young woman, nervous expression', psychological_features='Anxious but determined'), Character(name='Kabir', role='supporting', visual_features='Young man, thoughtful demeanor', psychological_features='Cautious and observant'), Character(name='Mehul', role='supporting', visual_features='Young man, curious eyes', psychological_features='Inquisitive and brave')])

In [17]:
result.style

Style(style_description="Cinematic style with a handheld feel, shot on a digital camera with a 35mm lens. Lighting is dim and moody, with a cool color palette. The tone is suspenseful, reminiscent of Hitchcock's thrillers, with a touch of supernatural horror.")

In [18]:
result.scenes

[Scene(scene_index=1, narration_text='It was past 8:00 PM when Riya, Kabir, and Mehul entered the silent city of Bansipur. A faint mist curled along the cracked roads, the streetlamps flickering as if unsure they wanted to stay lit.', subtitle_text='The Arrival', visual_prompts=[VisualPromptStage(stage_type='base', prompt='A car driving into a misty, deserted city at night.'), VisualPromptStage(stage_type='lighting', prompt='Streetlamps flickering, casting eerie shadows.')], background_music='Low, suspenseful strings with a hint of wind howling.', animation=Animation(animation_type='Ken Burns', ffmpeg_command='-vf zoompan=d=250:fps=25,scale=1080:1920'), duration_sec=10, strength_from_previous=0.5, subtitles=[Subtitle(index=1, start_time='00:00:01,000', end_time='00:00:05,000', text='It was past 8:00 PM when Riya, Kabir, and Mehul entered the silent city of Bansipur.'), Subtitle(index=2, start_time='00:00:05,500', end_time='00:00:10,000', text='A faint mist curled along the cracked road

# Scene Profile (TBD)

In [105]:
from tabulate import tabulate

def to_profile_tab(profile: Profile) -> str:
    rows = [
        ["Geographic Location", f"{profile.geographic_location.country or ''}, {profile.geographic_location.specific_location or ''}"],
        ["Time Period", f"Era: {profile.time_period.era or ''}, Time: {profile.time_period.time_of_day or ''}"],
        ["Weather", f"{profile.weather.condition or ''} ({profile.weather.details or ''})"],
        ["Ethnicity", profile.ethnicity or ""],
        ["Mood", profile.mood or ""],
    ]
    
    # Add characters as a sub-table
    char_rows = []
    for c in profile.characters:
        char_rows.append([
            c.name,
            c.role,
            c.visual_features or "",
            c.psychological_features or ""
        ])
    
    table_str = tabulate(rows, headers=["Attribute", "Value"], tablefmt="grid")
    
    if char_rows:
        char_table = tabulate(
            char_rows, 
            headers=["Name", "Role", "Visual Features", "Psychological Features"], 
            tablefmt="grid"
        )
        table_str += "\n\nCharacters:\n" + char_table
    
    return table_str


In [106]:
profile_str = to_profile_tab(result.profile)

In [107]:
print(profile_str)


+---------------------+----------------------------------------+
| Attribute           | Value                                  |
| Geographic Location | India, Bansipur                        |
+---------------------+----------------------------------------+
| Time Period         | Era: modern, Time: night               |
+---------------------+----------------------------------------+
| Weather             | misty (faint mist curling along roads) |
+---------------------+----------------------------------------+
| Ethnicity           | South Asian                            |
+---------------------+----------------------------------------+
| Mood                | eerie, suspenseful                     |
+---------------------+----------------------------------------+

Characters:
+--------+------------+---------------------------------------+--------------------------+
| Name   | Role       | Visual Features                       | Psychological Features   |
| Riya   | hero       | y

# Stage Area

In [47]:
import os
from uuid import uuid4
from pathlib import Path

id = str(uuid4())

stage_dir = Path(os.getcwd()) / "stage" / id
stage_dir.mkdir(parents=True, exist_ok=True)

In [48]:
images_dir = stage_dir / "images"
raw_images_dir = images_dir / "raw"
raw_images_dir.mkdir(parents=True, exist_ok=True)

In [49]:
clean_images_dir = images_dir / "clean"
clean_images_dir.mkdir(parents=True, exist_ok=True)

In [50]:
audio_dir = stage_dir / "audios"
audio_dir.mkdir(parents=True, exist_ok=True)

In [51]:
video_dir = stage_dir / "videos"
video_dir.mkdir(parents=True, exist_ok=True)

# Scene Temporary Persistence

In [52]:
board_dict = result.model_dump()

In [53]:
with open(stage_dir / "board.json", "w", encoding="utf-8") as f:
    import json
    json.dump(board_dict, f, indent=2, ensure_ascii=False)


# Image Generation

In [54]:
current = result.scenes[0]
print(current)

scene_index=1 narration_text='It was past 8:00 PM when Riya, Kabir, and Mehul entered the silent city of Bansipur. A faint mist curled along the cracked roads, the streetlamps flickering as if unsure they wanted to stay lit.' subtitle_text='The Arrival' visual_prompts=[VisualPromptStage(stage_type='base', prompt='A car driving into a misty, deserted city at night.'), VisualPromptStage(stage_type='lighting', prompt='Streetlamps flickering, casting eerie shadows.')] background_music='Low, suspenseful strings with a hint of wind howling.' animation=Animation(animation_type='Ken Burns', ffmpeg_command='-vf zoompan=d=250:fps=25,scale=1080:1920') duration_sec=10 strength_from_previous=0.5 subtitles=[Subtitle(index=1, start_time='00:00:01,000', end_time='00:00:05,000', text='It was past 8:00 PM when Riya, Kabir, and Mehul entered the silent city of Bansipur.'), Subtitle(index=2, start_time='00:00:05,500', end_time='00:00:10,000', text='A faint mist curled along the cracked roads, the streetla

In [55]:
whole = '\n'.join([ prompt.prompt for prompt in current.visual_prompts])
whole = f"{whole}{result.style.style_description}"
print(whole)

A car driving into a misty, deserted city at night.
Streetlamps flickering, casting eerie shadows.Cinematic style with a handheld feel, shot on a digital camera with a 35mm lens. Lighting is dim and moody, with a cool color palette. The tone is suspenseful, reminiscent of Hitchcock's thrillers, with a touch of supernatural horror.


In [56]:
result.style.style_description


"Cinematic style with a handheld feel, shot on a digital camera with a 35mm lens. Lighting is dim and moody, with a cool color palette. The tone is suspenseful, reminiscent of Hitchcock's thrillers, with a touch of supernatural horror."

In [57]:
api = "http://3.143.254.8:8000"

In [58]:
import requests
import json
import base64

_NEGATIVE_PROMPT = """
    blurry, low quality, 
    low resolution, bad anatomy, bad hands, 
    missing fingers, extra digit, fewer digits, 
    cropped, worst quality, low quality, 
    normal quality, jpeg artifacts, signature, watermark, username, blurry,
    cartoon, anime, illustration, comic, flat shading
"""

_DEFAULT_PARAMS = {
    'guidance_scale': 7.5,
    'strength': 0.45,
    'orientation': 'portrait'
}

def call(text: str, image_path: str = None, params: dict = _DEFAULT_PARAMS):
    path = "/sm/txt2img"
    data = {
        'prompt': text,
        'negative_prompt': _NEGATIVE_PROMPT,
        **params
    }
    if image_path:
        path = "/sm/img2img"
        with open(image_path, "rb") as f:
            data["image"] = base64.b64encode(f.read()).decode("utf-8")
    endpoint = f"{api}{path}"

    response = requests.post(endpoint, 
                             data=json.dumps(data), 
                             headers={'Content-Type': 'application/json'},
                             timeout=3000)

    if response.status_code != 200:
        raise RuntimeError(f"Request failed: {response.status_code} {response.text}")
    else:
        img = json.loads(response.content.decode('utf-8'))
        image_bytes = base64.b64decode(img['image_base64'])
        return image_bytes

In [59]:
import base64

def generate_raw_scene(idx: int, dir: str):
    style = result.style
    scene = result.scenes[idx]
    images_dir = dir / str(idx)
    images_dir.mkdir(parents=True, exist_ok=True)
    style_prompt = VisualPromptStage(stage_type="details", prompt=style.style_description)
    for i, prompt in enumerate([*scene.visual_prompts, style_prompt]):
        logger.info(f"Generating image for scene {idx}, prompt {i}: {prompt.prompt}")
        if i == 0:
            img = call(text=prompt.prompt)
        else:
            prev_img_path = images_dir / f"{i-1}.png"
            img = call(text=prompt.prompt, image_path=prev_img_path)
        
        output_path = images_dir / f"{i}.png"
    
        with open(output_path, "wb") as f:
            f.write(img)
        
        if i == len(scene.visual_prompts) - 1:
            output_path = images_dir / f"final.png"
            with open(output_path, "wb") as f:
                f.write(img)

    logger.info(f"completed scene generation for scene - {idx}")
        
def generate_raw_combined_scene(idx: int, dir: str):
    style = result.style
    scene = result.scenes[idx]
    images_dir = dir / str(idx)
    images_dir.mkdir(parents=True, exist_ok=True)
    
    whole = '\n'.join([ prompt.prompt for prompt in scene.visual_prompts])
    whole = f"{whole}{style.style_description}"
    
    img = call(text=whole)
    output_path = images_dir / "final.png"
    
    with open(output_path, "wb") as f:
        f.write(img)
    
    logger.info(f"completed scene generation for scene - {idx}")

def generate_raw_combined_scene_with_past_reference(idx: int, dir: str):
    style = result.style
    scene = result.scenes[idx]
    images_dir = dir / str(idx)
    images_dir.mkdir(parents=True, exist_ok=True)

    past_image = None
    if idx > 0:
        past_image_path = dir / str(idx - 1) / "final.png"
        if past_image_path.exists():
            past_image = past_image_path
    
    whole = '\n'.join([ prompt.prompt for prompt in scene.visual_prompts])
    whole = f"{whole}{style.style_description}"

    if past_image:
        img = call(text=whole, image_path=str(past_image), params = { **_DEFAULT_PARAMS, 'strength': scene.strength_from_previous })
    else:
        # If no past image, generate from scratch
        img = call(text=whole)
    output_path = images_dir / "final.png"
    
    with open(output_path, "wb") as f:
        f.write(img)
    
    logger.info(f"completed scene generation for scene - {idx}")

In [60]:
import time
for idx, scene in enumerate(result.scenes):
    generate_raw_combined_scene(idx, raw_images_dir)
    time.sleep(2)

INFO:__main__:completed scene generation for scene - 0
INFO:__main__:completed scene generation for scene - 1
INFO:__main__:completed scene generation for scene - 2
INFO:__main__:completed scene generation for scene - 3
INFO:__main__:completed scene generation for scene - 4
INFO:__main__:completed scene generation for scene - 5
INFO:__main__:completed scene generation for scene - 6
INFO:__main__:completed scene generation for scene - 7
INFO:__main__:completed scene generation for scene - 8
INFO:__main__:completed scene generation for scene - 9
INFO:__main__:completed scene generation for scene - 10
INFO:__main__:completed scene generation for scene - 11
INFO:__main__:completed scene generation for scene - 12
INFO:__main__:completed scene generation for scene - 13
INFO:__main__:completed scene generation for scene - 14
INFO:__main__:completed scene generation for scene - 15
INFO:__main__:completed scene generation for scene - 16
INFO:__main__:completed scene generation for scene - 17
IN

# Image Upscale

In [132]:
import requests
import json
import base64


def upscale(image_path: str = None, params: dict = {}):
    path = "/upscale/latent/v2"
    data = {
        **params
    }
    with open(image_path, "rb") as f:
        data["image"] = base64.b64encode(f.read()).decode("utf-8")
    endpoint = f"{api}{path}"

    response = requests.post(endpoint, 
                             data=json.dumps(data), 
                             headers={'Content-Type': 'application/json'},
                             timeout=3000)

    if response.status_code != 200:
        raise RuntimeError(f"Request failed: {response.status_code} {response.text}")
    else:
        img = json.loads(response.content.decode('utf-8'))
        image_bytes = base64.b64decode(img['image_base64'])
        return image_bytes

In [133]:
import base64

def upscale_raw_scene(idx: int, input_dir: str, output_dir: str):
    images_dir = output_dir
    source_img_path = input_dir / f"{idx}" / "final.png"
    image_bytes = upscale(source_img_path)
    output_path = images_dir / f"{idx}.png"
    
    with open(output_path, "wb") as f:
        f.write(image_bytes)

In [134]:
import time
for idx, scene in enumerate(result.scenes):
    upscale_raw_scene(idx, raw_images_dir, clean_images_dir)
    time.sleep(2)

RuntimeError: Request failed: 500 <!doctype html>
<html lang=en>
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>


# Audio Generation

In [61]:
from openai import OpenAI

client = OpenAI()

def generate_tts(text, filename):
    """Generate narration audio from text using OpenAI TTS."""
    speech = client.audio.speech.create(
        model="gpt-4o-mini-tts",
        voice="alloy",
        input=text
    )
    with open(filename, "wb") as f:
        f.write(speech.read())
    return filename

In [62]:
import time

for idx, scene in enumerate(result.scenes):
    output_path = audio_dir / f"{idx}.mp3"
    generate_tts(scene.narration_text, output_path)
    time.sleep(2)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/audio/speech "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.o

# Generate Videos

In [63]:
import subprocess

def create_video_with_ffmpeg(image_path, audio_path, animation_str, output_path):
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image file not found: {image_path}")
    if not os.path.exists(audio_path):
        raise FileNotFoundError(f"Audio file not found: {audio_path}")
    if animation_str:
        # If an animation string is provided, use it in the ffmpeg command
        command = [
            "ffmpeg",
            "-y",
            "-loop", "1",
            "-i", image_path,
            "-i", audio_path,
            animation_str,
            "-c:v", "libx264",
            "-tune", "stillimage",
            "-c:a", "aac",
            "-b:a", "192k",
            "-pix_fmt", "yuv420p",
            "-shortest",
            output_path
        ]
    else:
        # Default command without animation
        command = [
            "ffmpeg",
            "-y",
            "-loop", "1",
            "-i", image_path,
            "-i", audio_path,
            "-c:v", "libx264",
            "-tune", "stillimage",
            "-c:a", "aac",
            "-b:a", "192k",
            "-pix_fmt", "yuv420p",
            "-shortest",
            output_path
        ]
    subprocess.run(command, check=True)

In [64]:
for idx, scene in enumerate(result.scenes):
    image_path = images_dir / "raw" / str(idx) / "final.png"
    audio_path = audio_dir / f"{idx}.mp3"
    animation_str = scene.animation.ffmpeg_command if scene.animation else None
    output_path = video_dir / f"{idx}.mp4"
    create_video_with_ffmpeg(
        str(image_path),
        str(audio_path),
        None,
        str(output_path)
    )
    

# Merge Videos

In [65]:
videos = [(video_dir / file) for file in sorted(os.listdir(video_dir)
    , key=lambda x: int(x.split('.')[0]) if x.split('.')[0].isdigit() else 0)]

print(videos)

[WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/0.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/1.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/2.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/3.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/4.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/5.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/story-to-video/stage/37a7e017-e977-4eb0-8f97-9324c54049bc/videos/6.mp4'), WindowsPath('C:/Samriddha/opensource/examples/misc/projects/s

In [66]:
with open(stage_dir / "scenes.txt", "w") as f:
    for video in videos:
        # Convert path to forward slashes
        path_str = video.as_posix()  # works if video is a Path object
        f.write(f"file '{path_str}'\n")


In [67]:
import subprocess
try:
    subprocess.run([
        "ffmpeg",
        "-f", "concat",
        "-safe", "0",
        "-i", str(stage_dir / "scenes.txt"),
        "-c:v", "libx264",   # re-encode video
        "-crf", "23",        # quality
        "-preset", "fast",
        "-c:a", "aac",       # audio codec
        "-b:a", "192k",
        str(stage_dir / "story.mp4")
    ], check=True)
    print("Video concatenation successful!")
except subprocess.CalledProcessError as e:
    print("FFmpeg failed with exit code:", e.returncode)

Video concatenation successful!
