BradAI: AI Story Video Generator:-
This notebook contains the complete code for the BradAI video generation pipeline. It uses a multi-stage AI process to transform a user's short story into a complete, narrated video with dynamic scenes and selectable art styles.

How to Run:-
Add Your API Keys: On the left sidebar, click the key icon to open the Secrets Manager. You must add the following three secrets for the application to work:

GOOGLE_API_KEY

REPLICATE_API_TOKEN

AZURE_SPEECH_KEY

AZURE_SPEECH_REGION

Run the Code: Once your keys are set, you can run the code cell below. The script will:

Install all necessary libraries.

Define the core AI functions.

Run a test with a sample story.

Find Your Video: The final .mp4 video file will be generated and saved in the Colab file explorer, also on the left sidebar. You can download it from there.

Note: The full process can take several minutes to complete.

In [None]:
!pip install azure-cognitiveservices-speech
!apt-get install -y ffmpeg
!apt-get update && apt-get install -y imagemagick
print("Libraries installed!!")

In [None]:
#change the policy for file paths from no access to read/write access in ImageMagick
!sed -i 's/<policy domain="path" rights="none" pattern="@\*"\/>/<policy domain="path" rights="read|write" pattern="@\*"\/>/g' /etc/ImageMagick-6/policy.xml


In [None]:
STYLE_PRESETS = {
    "anime": "anime style, vibrant colors, Studio Ghibli aesthetic",
    "realistic": "photorealistic, 4k, cinematic, sharp focus, detailed, professional photography",
    "fantasy_art": "fantasy digital painting, epic, vibrant, detailed, concept art, artstation",
    "comic_book": "graphic novel illustration, comic book art, bold lines, cel shading, ink drawing",
    "cyberpunk": "cyberpunk art, neon-drenched, Blade Runner aesthetic, futuristic, high-tech",
    "watercolor": "vibrant watercolor painting, soft wash, beautiful, detailed, paper texture",
    "vintage_film": "1950s black and white film, vintage, film grain, noir style, cinematic lighting"
}

In [None]:
REPLICATE_API_TOKEN = "ENTER API" # @param
AZURE_SPEECH_KEY = "ENTER API" # @param
GOOGLE_API_KEY = "ENTER API" # @param
AZURE_SPEECH_REGION = "ENTER REGION (EX: eastus)" # @param

import os
os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN
os.environ["AZURE_SPEECH_KEY"] = AZURE_SPEECH_KEY
os.environ["AZURE_SPEECH_REGION"] = AZURE_SPEECH_REGION
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

print("APIs configured!")

In [None]:
!pip install -q replicate moviepy python-dotenv


In [None]:
from moviepy.editor import *
import azure.cognitiveservices.speech as speechsdk
import replicate
import urllib.request
import os

# Create speechConfig and speechsynthesizer
speech_config = speechsdk.SpeechConfig(
    subscription=os.environ.get("AZURE_SPEECH_KEY"),
    region=os.environ.get("AZURE_SPEECH_REGION")
)
# Select a voice name
speech_config.speech_synthesis_voice_name = "en-US-JennyNeural"
speech_synthesizer = speechsdk.SpeechSynthesizer(
    speech_config=speech_config,
    audio_config=None
)


def generate_video_from_script(visual_script, add_subtitles=False, aspect_ratio="1:1"):
    print("Video Generation Started...")
    if add_subtitles:
        print("   - Subtitle option is ON")
    print(f"   - Aspect ratio is set to {aspect_ratio}")

    if aspect_ratio == "9:16":
        width, height = 896, 1536
    elif aspect_ratio == "16:9":
        width, height = 1344, 768
    else:
        width, height = 1024, 1024

    #  Generate pics and audio
    for i, scene in enumerate(visual_script):
        image_prompt = scene['image_prompt']
        negative_prompt = scene['negative_prompt']
        print(f"Generating image for scene {i+1}/{len(visual_script)}...")
        output = replicate.run(
            "stability-ai/sdxl:c221b2b8ef527988fb59bf24a8b97c4561f1c671f73bd389f866bfb27c061316",
            input={
                "prompt": image_prompt,
                "negative_prompt": negative_prompt,
                "num_inference_steps": 25,
                "width": width,
                "height": height
            }
        )
        urllib.request.urlretrieve(str(output[0]), f"image_{i}.png")

        audio_text = scene['audio_text']
        print(f"Generating audio for scene {i+1}/{len(visual_script)}...")

        result = speech_synthesizer.speak_text_async(audio_text).get()
        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            with open(f"audio_{i}.mp3", "wb") as f:
                f.write(result.audio_data)
        else:
            cancellation_details = result.cancellation_details
            print(f"Error generating audio: {cancellation_details.reason}")
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print(f"Error details: {cancellation_details.error_details}")

    #Assemble video
    print("\nAll assets generated.rendering the final video(/)")
    clips = []
    for i, scene in enumerate(visual_script):
        audio_clip = AudioFileClip(f"audio_{i}.mp3")
        image_clip = ImageClip(f"image_{i}.png").set_duration(audio_clip.duration)

        if add_subtitles:
            subtitle_text = scene['audio_text']
            text_clip = TextClip(
                subtitle_text,
                fontsize=45,
                color='white',
                font='Arial-Bold',
                size=(image_clip.w * 0.9, None),
                method='caption'
            )
            text_clip = text_clip.set_position(('center', 'bottom')).set_duration(image_clip.duration)
            video_clip = CompositeVideoClip([image_clip, text_clip])
        else:
            video_clip = image_clip

        video_clip = video_clip.set_audio(audio_clip)
        clips.append(video_clip)

    final_video = concatenate_videoclips(clips, method="compose")
    output_filename = f"final_video_{aspect_ratio.replace(':', 'x')}.mp4"
    final_video.write_videofile(output_filename, fps=24, codec="libx264", audio_codec="aac")

    print(f"Video generated! File saved as '{output_filename}'")
    return output_filename

In [None]:
import google.generativeai as genai
import json
import os
from google.colab import userdata

def generate_visual_script_v2(story, style="anime style, vibrant colors, Studio Ghibli aesthetic"):
    api_key = os.environ.get('GOOGLE_API_KEY')
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel('gemini-1.5-flash')

    system_prompt = f"""You are a master visual director and scriptwriter for an AI video generation pipeline. Your task is to take a story and transform it into a highly detailed, consistent, scene-by-scene script.

    Your output must be a single, valid JSON list. Each object in the list must have two keys: `audio_text` and `image_prompt`.

    Here are the strict rules you must follow for every scene:

    #### **I. General Visual and Character Consistency**

    1.  **Core Aesthetic**: Every `image_prompt` must start with the core aesthetic: **"{style}."**
    2.  **Character Registry**: When a character is first introduced, create a detailed description for them based on the text. If the text provides no details, invent a plausible appearance. You must re-use this EXACT full description every time the character appears.
    3.  **Background Registry**: Follow the same rule for settings. Describe them in detail once and re-use that full description.
    4.  **Re-Use Full Descriptions**: For every prompt, every character and setting present MUST be described using their full registry details.

    #### **II. Story Breakdown and Scene Management**

    1.  **Split Complex Actions**: If a sentence has multiple distinct actions, break it into separate scenes.
    2.  **Combine Filler Sentences**: If a sentence is an emotional detail or a non-visual thought, combine its text with the previous visual scene's `audio_text`.
    3.  **Prioritize Focus and Action**: Place the main character and their primary action at the beginning of the `image_prompt`. For example: "Leo cautiously approaching Cora...". **Crucially, the action described in the `image_prompt` must ONLY be what is described in that same scene's `audio_text`. Do not include actions or reactions from future sentences.**
    4.  **Detail Background Characters**: Always include full descriptions for characters in the background.
    5.  **Quality Enhancers**: Always append the following to the very end of every `image_prompt`: ", highly detailed, cinematic lighting, masterpiece, sharp focus, intricate details."
    6.  **Dynamic Negative Prompt**: You must generate a `negative_prompt` for every scene. The default should be "blurry, lowres, bad anatomy, worst quality". **Crucially, if the scene contains only ONE person, you MUST add "duplicate, cloned, multiple people" to the negative_prompt.** If the scene explicitly describes more than one person, you must NOT add these words.
    7.  **Sentence Mapping Requirement**: Include a `sentence_ids` field listing the original sentence IDs used for the `audio_text`.

    #### **III. Example of Perfect Output**

    EXAMPLE 1:
    **Original Story**: "As the sun dipped below the cherry blossom trees, Yuki stood alone at the train station. She felt a profound sense of loneliness. Then, Haru appeared, soaked but smiling. He was a young man with a slender build."

    **Correct JSON Output**:
        [
      {{
        "audio_text": "As the sun dipped below the cherry blossom trees, Yuki stood alone at the train station. She felt a profound sense of loneliness.",
        "image_prompt": "{style}, Yuki, a young woman with a melancholic expression, standing alone at a bustling train station at sunset, with cherry blossom trees in the background and soft, warm lighting, highly detailed, cinematic lighting, masterpiece, sharp focus, intricate details.",
        "sentence_ids": ["s1", "s2"]
      }},
      {{
        "audio_text": "Then, Haru appeared, soaked but smiling. He was a young man with a slender build.",
        "image_prompt": "{style}, Haru, a young man with a slender build, soaked from the rain but smiling warmly, appearing in the distance at a bustling train station at sunset, with cherry blossom trees in the background, Yuki, a young woman with a melancholic expression, standing in the background with a surprised look, highly detailed, cinematic lighting, masterpiece, sharp focus, intricate details.",
        "sentence_ids": ["s3", "s4"]
      }}
    ]
    """

    response = model.generate_content(f"Story: {story}\n\n{system_prompt}")

    try:
        json_start = response.text.find('[')
        json_end = response.text.rfind(']')
        if json_start != -1 and json_end != -1:
            json_string = response.text[json_start : json_end + 1]
            return json.loads(json_string)
        else:
            raise ValueError("Could not find valid JSON content in API response.")
    except (json.JSONDecodeError, ValueError) as e:
        print(f"Error parsing JSON: {e}")
        print("Raw API response:")
        print(response.text)
        raise


    response = model.generate_content(f"Story: {story}\n\n{system_prompt}")

    try:
        json_start = response.text.find('[')
        json_end = response.text.rfind(']')
        if json_start != -1 and json_end != -1:
            json_string = response.text[json_start : json_end + 1]
            return json.loads(json_string)
        else:
            raise ValueError("Could not find valid JSON content in API response.")
    except (json.JSONDecodeError, ValueError) as e:
        print(f"Error parsing JSON: {e}")
        print("Raw API response:")
        print(response.text)
        raise





In [None]:
#overall workflow
style_choice = "anime"  #you can choose your video style, default is anime

# give your story here
story_text = """
Elara, a young woman with braided brown hair and a simple leather tunic, followed the ancient map. It led her to a hidden cave behind a waterfall. Inside, an old man with a long white beard sat by a fire. He looked up and pointed a gnarled finger towards a glowing crystal resting on a stone pedestal.
"""

full_style_prompt = STYLE_PRESETS.get(style_choice, STYLE_PRESETS["anime"])

print(f"Calling creative director with style: '{style_choice}'")
visual_script = generate_visual_script_v2(
    story_text,
    style=full_style_prompt
)

print("\nScript generated!!")

print("\nSending script to production")
video_path = generate_video_from_script(
    visual_script,
    add_subtitles=True,
    aspect_ratio="16:9"
)

print(f"\nYour video is ready at: {video_path}")


In [None]:
#downloading the final video
from google.colab import files
print("🚀 Preparing video for download...")
files.download(video_path)