<a href="https://colab.research.google.com/github/Ataerman/Auto-short-video-agent/blob/main/Video_gen_all_ai_agents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
!pip -q install langgraph langchain-openai langchain-community langchain requests pydub ffmpeg

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone


In [40]:
from google.colab import userdata
import os
openai_api_key=userdata.get('openai')
langsmith_api_key=userdata.get('langsmith')
eleven_api_key = userdata.get("eleven1")
pexels_api_key = userdata.get("pexels")

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGSMITH_API_KEY"] = langsmith_api_key
os.environ["LANGSMITH_PROJECT"] = "video_gen_ai"

In [41]:
from typing import TypedDict, List
from langgraph.graph import StateGraph,END
from langchain_core.runnables import RunnableLambda
from langchain_openai import ChatOpenAI
from pydub.utils import mediainfo
from IPython.display import Audio, Video
import math
import requests
import subprocess


In [42]:
class GenerationState(TypedDict):
    topic: str
    script: str
    image_prompts: List[str]
    image_urls: List[str]
    audio_path: str
    video_path: str
    final_path: str
    last_topics: List[str]

In [43]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.5, openai_api_key=openai_api_key)

def topic_generator(state: GenerationState) -> dict:
    last_topics = state.get("last_topics", [])
    while True:
        prompt = (
            "Suggest a catchy and attention-grabbing title for a 30-second short educational video. "
            "Choose a topic from science, psychology, or interesting general knowledge. "
            "Keep the title short, clear, and engaging."
        )
        response = llm.invoke(prompt)
        topic = response.content.strip()
        if topic not in last_topics:
            last_topics.append(topic)
            return {"topic": topic, "last_topics": last_topics}

def script_writer(state: GenerationState) -> dict:
    topic = state["topic"]
    prompt = f"""
    Write a 30-second educational script based on the title: '{topic}'.
    The tone should be clear, informative, and scientific. Avoid storytelling.
    Focus on surprising or curiosity-triggering facts.Keep it around 4–5 sentences."""
    response = llm.invoke(prompt)
    return {"script": response.content}



In [44]:
# Make sure you define this somewhere securely (e.g., from environment or secret manager)
pexels_api_key = userdata.get("pexels")  # Replace with os.environ.get("PEXELS_API_KEY") if needed

def search_pexels_image(query: str, num_results: int = 1) -> List[str]:
    """
    Search for images on Pexels based on a given query.

    Args:
        query (str): The search keyword.
        num_results (int): Number of images to retrieve.

    Returns:
        List[str]: A list of image URLs (high quality).
    """
    url = f"https://api.pexels.com/v1/search?query={query}&per_page={num_results}"
    headers = {"Authorization": pexels_api_key}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        return [photo["src"]["large"] for photo in data.get("photos", [])]
    else:
        return []

def generate_image_prompts(script: str) -> List[str]:
    """
    Generate 5 short visual tags from a given video script using an LLM.

    Args:
        script (str): The educational script content.

    Returns:
        List[str]: A list of short English keywords (max 2–3 words each).
    """
    prompt = f"""
Below is a script written for a short educational video.
Read the script carefully and suggest 5 short, effective English visual search tags
that are suitable for querying images on Pexels.

The tags should represent the core concepts in the script.
Each tag must be a maximum of 2–3 words. Just list the tags—no explanation.

Script:
\"\"\"
{script}
\"\"\"
"""
    response = llm.invoke(prompt)
    raw_output = response.content.strip()
    prompts = [line.strip("-• ").strip() for line in raw_output.splitlines() if line.strip()]
    return prompts

def image_generator(state: GenerationState) -> dict:
    """
    Main agent function that generates image tags and fetches image URLs.

    Args:
        state (dict): Must include a 'script' field.

    Returns:
        dict: Contains 'image_prompts' and 'image_urls'
    """
    script = state["script"]
    image_prompts = generate_image_prompts(script)

    image_urls = []
    for keyword in image_prompts:
        images = search_pexels_image(keyword)
        image_urls.append(images[0] if images else "https://dummyimage.com/600x400/000/fff&text=No+Image")

    return {
        "image_prompts": image_prompts,
        "image_urls": image_urls
    }


In [45]:
def generate_audio(state: GenerationState, speed: float = 1.2) -> dict:
    script = state["script"]
    voice_id = "ErXwobaYiN019PkySvjV"  # Turkish male voice ("Cem")
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

    headers = {
        "xi-api-key": eleven_api_key,  # Make sure this variable is defined securely
        "Content-Type": "application/json"
    }

    data = {
        "text": script,
        "model_id": "eleven_multilingual_v2",  # Recommended for multilingual support
        "voice_settings": {
            "stability": 0.3,
            "similarity_boost": 0.85
        }
    }

    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        original_audio = "output_audio.mp3"
        with open(original_audio, "wb") as f:
            f.write(response.content)

        # Speed up the audio using FFmpeg
        fast_audio = "output_audio_fast.mp3"
        cmd = [
            "ffmpeg", "-y", "-i", original_audio,
            "-filter:a", f"atempo={speed}",
            fast_audio
        ]
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        return {"audio_path": fast_audio}
    else:
        raise RuntimeError(f"❌ ElevenLabs API error: {response.status_code} - {response.text}")


In [46]:
def generate_video(state: GenerationState) -> dict:
    image_urls = state["image_urls"]
    os.makedirs("images", exist_ok=True)

    # 📥 Download images
    image_paths = []
    headers = {"User-Agent": "Mozilla/5.0"}
    for i, url in enumerate(image_urls):
        filename = f"images/img{i:03}.jpg"
        r = requests.get(url, headers=headers)
        if r.status_code == 200:
            with open(filename, "wb") as f:
                f.write(r.content)
            image_paths.append(filename)

    # ⏱️ Set fixed duration per image (e.g., 5 seconds)
    per_image_duration = 5

    # 🎞️ Convert each image to short video clip
    clip_paths = []
    for i, path in enumerate(image_paths):
        out_path = f"clip_{i:03}.mp4"
        cmd = [
            "ffmpeg", "-y", "-loop", "1", "-t", str(per_image_duration), "-i", path,
            "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2,fps=25",
            "-c:v", "libx264", "-pix_fmt", "yuv420p", "-r", "25", out_path
        ]
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        clip_paths.append(out_path)

    # 📝 Create input.txt for concatenation
    with open("input.txt", "w") as f:
        for clip in clip_paths:
            f.write(f"file '{clip}'\n")

    # 🎬 Merge all clips into a single silent video
    silent_video = "silent_video.mp4"
    cmd_concat = [
        "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", "input.txt",
        "-c", "copy", silent_video
    ]
    subprocess.run(cmd_concat, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    return {"video_path": silent_video}


In [47]:
def combine_audio_video(state: GenerationState) -> dict:
    video_path = state["video_path"]
    audio_path = state["audio_path"]
    final_path = "output_video.mp4"

    # 1️⃣ Get durations
    audio_duration = float(mediainfo(audio_path)["duration"])
    video_duration = float(mediainfo(video_path)["duration"])

    # 2️⃣ If video is shorter, loop it to match audio duration
    if audio_duration > video_duration:
        loop_count = math.ceil(audio_duration / video_duration)
        extended_video = "extended_video.mp4"
        with open("loop_input.txt", "w") as f:
            for _ in range(loop_count):
                f.write(f"file '{video_path}'\n")
        cmd_loop = [
            "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", "loop_input.txt",
            "-c", "copy", extended_video
        ]
        subprocess.run(cmd_loop, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        video_path = extended_video

    # 3️⃣ Combine audio and video (temporary)
    temp_path = "temp_combined.mp4"
    cmd_combine = [
        "ffmpeg", "-y", "-i", video_path, "-i", audio_path,
        "-c:v", "copy", "-c:a", "aac", "-shortest", temp_path
    ]
    subprocess.run(cmd_combine, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    # 4️⃣ Resize to vertical format (1080x1920, TikTok / Reels / Shorts compatible)
    tiktok_ready_path = "final_tiktok_ready.mp4"
    cmd_resize = [
        "ffmpeg", "-y", "-i", temp_path,
        "-vf", "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2",
        "-c:a", "copy", tiktok_ready_path
    ]
    subprocess.run(cmd_resize, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    return {"final_path": tiktok_ready_path}


In [48]:
builder = StateGraph(GenerationState)
builder.add_node("generate_topic", RunnableLambda(topic_generator))
builder.add_node("write_script", RunnableLambda(script_writer))
builder.add_node("generate_images", RunnableLambda(image_generator))
builder.add_node("generate_audio", RunnableLambda(generate_audio))
builder.add_node("generate_video", RunnableLambda(generate_video))
builder.add_node("combine_audio_video", RunnableLambda(combine_audio_video))


builder.set_entry_point("generate_topic")
builder.add_edge("generate_topic", "write_script")
builder.add_edge("write_script", "generate_images")
builder.add_edge("generate_images", "generate_audio")
builder.add_edge("generate_audio", "generate_video")
builder.add_edge("generate_video", "combine_audio_video")
builder.add_edge('combine_audio_video',END)

graph = builder.compile()


In [52]:
output = graph.invoke({
    "topic": "",
    "script": "",
    "image_prompts": [],
    "image_urls": [],
    "audio_path": "",
    "video_path": "",
    "final_path": "",
    "last_topics": []
})

print("🎯 Title:", output["topic"])
print("📜 Script:", output["script"])
print("🖼️ Image Prompts:", output["image_prompts"])
print("🌐 Image URLs:", output["image_urls"])
print("🎧 Audio File:", output["audio_path"])
print("🎞️ Video Path:", output["video_path"])


🎯 Title: "The Power of Positive Thinking: Boost Your Brain in 30 Seconds"
📜 Script: Did you know that positive thinking can actually boost your brain in just 30 seconds? Studies have shown that optimistic thoughts can increase the production of neurotransmitters, improving cognitive function. By actively practicing positive thinking, you can enhance your memory, focus, and overall brain health. So next time you catch yourself in a negative mindset, try shifting your thoughts to something more uplifting for a quick brain boost. Remember, the power of positive thinking is not just a cliché – it's backed by science.
🖼️ Image Prompts: ['1. Positive Thinking', '2. Brain Boost', '3. Cognitive Function', '4. Neurotransmitters', '5. Memory Focus']
🌐 Image URLs: ['https://images.pexels.com/photos/10828123/pexels-photo-10828123.jpeg?auto=compress&cs=tinysrgb&h=650&w=940', 'https://images.pexels.com/photos/10132269/pexels-photo-10132269.jpeg?auto=compress&cs=tinysrgb&h=650&w=940', 'https://images

In [53]:
Audio("output_audio.mp3")

In [54]:
Video("final_tiktok_ready.mp4", embed=True, width=360)