In [15]:
from typing import Annotated, Sequence, TypedDict, List, Dict
from dotenv import load_dotenv  
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from langchain_groq import ChatGroq
from langchain_core.tools import tool
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, END, START
from langgraph.prebuilt import ToolNode
import os

In [16]:
load_dotenv()
GROQ_API_KEY=os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"]= GROQ_API_KEY

In [17]:
llm = ChatGroq(model="llama-3.1-8b-instant")
# llm.invoke("hello who are you").content

In [18]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    user_prompt: str                                 # Original user input prompt
    scraped_data: List[Dict[str, str]]               # Raw web content scraped
    subtopics: List[str]                             # List of 5–7 subtopics generated by LLM
    full_script: List[Dict[str, str]]                                   # Full generated narration/script
    slide_segments: List[Dict[str, str]]             # Segmented slide content

In [None]:

def generate_subtopics(state: AgentState) -> AgentState:
    """
    Generate slide-worthy subtopics from a given user prompt.

    This tool asks the LLM to break the provided topic into 8-9 concise 
    and informative subtopics suitable for video slides, without considering 
    any duration specified by the user.

    Args:
        user_prompt (str): The user's input prompt containing the topic.

    Returns:
        list[str]: A list of concise subtopics suitable for video slides.
    """
    prompt = state['user_prompt']
    response = llm.invoke(f"Given the topic: '{prompt}', generate ONLY 8-9 concise subtopic titles "
        "as a numbered list (1. Title, 2. Title, ...). "
        "Do NOT include any explanations, definitions, or extra details.")
    state['subtopics'] = [s.strip() for s in response.content.split("\n") if s.strip()]
    return state

In [20]:
load_dotenv()
LANGSEARCH_API_KEY = os.getenv("LANGSEARCH_API_KEY")
LANGSEARCH_API_URL = "https://api.langsearch.com/v1/web-search"
import requests


def fetch_resources(state: AgentState) -> AgentState:
    """
    Fetch web resources for each subtopic using LangSearch API.

    Args:
        state (AgentState): The current agent state with subtopics.

    Returns:
        AgentState: Updated state with scraped_data populated.
    """
    headers = {
        "Authorization": f"Bearer {LANGSEARCH_API_KEY}",
        "Content-Type": "application/json"
    }

    for topic in state["subtopics"]:
        payload = {
            "query": topic,
            "freshness": "noLimit",
            "summary": True,
            "count": 2
        }

        response = requests.post(LANGSEARCH_API_URL, headers=headers, json=payload)

        topic_results = []
        if response.status_code == 200:
            data = response.json()
            web_pages = data.get("data", {}).get("webPages", {}).get("value", [])

            for page in web_pages:
                topic_results.append({
                    "title": page.get("name"),
                    "url": page.get("url"),
                    "summary": page.get("summary")
                })

        state["scraped_data"].append({
            "topic": topic,
            "results": topic_results
        })

    return state


In [21]:
import re

def create_script(state: AgentState) -> AgentState:
    """
    For each topic in scraped_data, generate:
    - Facts (numbered list)
    - Narration Script
    and store them in state["full_script"].

    Args:
        state (AgentState): The agent state with scraped_data.

    Returns:
        AgentState: Updated with structured full_script.
    """
    state["full_script"] = []

    for data in state["scraped_data"]:
        topic = data.get("topic", "")
        results = data.get("results", [])

        combined_summary = "\n".join(
            result.get("summary", "") for result in results if result.get("summary", "")
        )
        if len(combined_summary) > 4000:
            combined_summary = combined_summary[:4000]

        if not combined_summary:
            continue

        llm_prompt = (
            f"Using the following information as context:\n'''{combined_summary}'''\n\n"
            f"Generate the following two outputs strictly in PLAIN TEXT (no HTML, no markdown, no special formatting). "
            f"Use EXACTLY the headings 'Facts:' and 'Narration Script:' with no extra symbols or decoration.\n\n"

            f"Facts:\n"
            f"List exactly 4 to 5 key facts about the topic '{topic}'. Format them as:\n"
            f"1. First fact\n"
            f"2. Second fact\n"
            f"3. Third fact\n"
            f"4. Fourth fact\n"
            f"5. Fifth fact (optional)\n\n"

            f"Narration Script:\n"
            f"Write a complete, engaging YouTube video narration script for the topic '{topic}'. "
            f"Make it natural, conversational, and friendly. "
            f"No stage directions, sound effects, or visual cues."
            f"Ensure to give the output in the following format:\n\n"
            f"Facts:\n"
            f"1. <fact one>\n"
            f"2. <fact two>\n"
            f"3. <fact three>\n"
            f"4. <fact four>\n"
            f"5. <fact five>\n\n"
            f"Narration Script:\n"
            f"<Full engaging YouTube-style narration based on the above facts in conversational tone>"
        )

        response = llm.invoke(llm_prompt)
        output = response.content.strip()

        # Split the output using the defined headers
        facts_part = ""
        narration_part = ""

        match = re.search(r"Facts:\s*((?:\d+\..*?\n?)+)\s*Narration Script:\s*(.*)", output, re.DOTALL)
        if match:
            facts_text = match.group(1).strip()
            narration_part = match.group(2).strip()

            # Split facts into a Python list
            facts_list = [fact.strip() for fact in facts_text.split('\n') if fact.strip()]
        else:
            print(f"Warning: Unable to parse facts and narration for topic '{topic}'")
            facts_list = []
            narration_part = ""

        state["full_script"].append({
            "topic": topic,
            "facts": facts_list,
            "narration": narration_part
        })


    return state


In [None]:
import re
import os
import json

def create_slide_segments(state: AgentState) -> AgentState:
    """
    Generates structured slide segments for each subtopic based on narration and facts.
    
    For each slide:
    - content_to_display: Contains the specific facts relevant to that slide.
    - narration_script: Explains the facts shown in content_to_display.
    - is_blank_slide: True if the slide is meant to show stock video only.
    """
    state["slide_segments"] = []
    slide_counter = 1

    for script_data in state["full_script"]:
        subtopic = script_data["topic"]
        narration = script_data["narration"]
        facts = script_data["facts"]

        llm_prompt = (
            f"For the subtopic '{subtopic}', you are provided with:\n\n"
            f"Facts:\n{facts}\n\n"
            f"Narration Script:\n{narration}\n\n"
            f"Your task is to divide this content into up to 2 slides if required. For each slide:\n"
            f"if this content is able to perfectly fit in one slide then dont divide it into two slides and just return one slide.\n"
            f"- In 'Display', ONLY show the relevant facts for that slide.\n"
            f"- In 'Narration', provide the portion of the narration that directly explains the displayed facts and explain the facts in great detail. consider this \n{narration}\n \n"
            f"- If there's a narration section that doesn't require any fact display (like general commentary), mark that slide's display as 'BLANK_SLIDE'.\n\n"
            f"- you can expand (increase the number of words) the narration if necessary make sure to cover the facts in a detail manner. \n"
            f"STRICT RULES:\n"
            f"- Format the facts into proper bullet points and dont add '[]' to it"
            f"- Maximum of 2 slides per subtopic.\n"
            f"- Only split into 2 if necessary.\n"
            f"- Always pair facts with their explanations.\n\n"
            f"Format your output strictly as:\n"
            f"Slide 1:\nDisplay: <Facts or BLANK_SLIDE>\nNarration: <Narration for this slide>\n\n"
            f"Slide 2:\nDisplay: <Facts or BLANK_SLIDE>\nNarration: <Narration for this slide>\n\n"
            f"Ensure clarity and direct fact-to-narration mapping."
        )

        response = llm.invoke(llm_prompt)
        slides_text = response.content.strip()

        # Parse LLM response into slides
        slide_matches = re.split(r'Slide \d+:', slides_text)
        for slide_data in slide_matches:
            if not slide_data.strip():
                continue

            display_match = re.search(r'Display:\s*(.*?)\nNarration:', slide_data, re.DOTALL)
            narration_match = re.search(r'Narration:\s*(.*)', slide_data, re.DOTALL)

            content_to_display = display_match.group(1).strip() if display_match else ""
            narration_text = narration_match.group(1).strip() if narration_match else ""

            is_blank_slide = content_to_display.strip().upper() == "BLANK_SLIDE"

            state["slide_segments"].append({
                "slide_no": slide_counter,
                "subtopic": subtopic,
                "content_to_display": content_to_display,
                "narration_script": narration_text,
                "is_blank_slide": is_blank_slide,
                "image_address" : "",
                "video_address" : "",
                "image_position"  :"",
                "test_position": ""
            })

            slide_counter += 1

    # Save subtopics and slide_segments to JSON
    output_dir = "../assets/scripts"
    os.makedirs(output_dir, exist_ok=True)

    with open(os.path.join(output_dir, "slide_segments.json"), "w", encoding="utf-8") as f:
        json.dump({
            "subtopics": state.get("subtopics", []),
            "slide_segments": state["slide_segments"]
        }, f, ensure_ascii=False, indent=2)

    return state


In [27]:
graph_builder = StateGraph(AgentState)

graph_builder.add_node("Generate_Subtopics", generate_subtopics)
graph_builder.add_node("Fetch_Resources", fetch_resources)
graph_builder.add_node("Create_Script", create_script)
graph_builder.add_node("Create_Slide_Segments", create_slide_segments)

<langgraph.graph.state.StateGraph at 0x222933b4350>

In [28]:
graph_builder.add_edge(START, "Generate_Subtopics")
graph_builder.add_edge("Generate_Subtopics", "Fetch_Resources")
graph_builder.add_edge("Fetch_Resources", "Create_Script")
graph_builder.add_edge("Create_Script", "Create_Slide_Segments")
graph_builder.add_edge("Create_Slide_Segments", END)

graph = graph_builder.compile()


In [29]:
user_prompt = input("Enter your topic prompt: ")

initial_state = {
    "messages": [],
    "user_prompt": user_prompt,
    "scraped_data": [],
    "subtopics": [],
    "full_script": [],
    "slide_segments": []
}

final_state = graph.invoke(initial_state)

from pprint import pprint

print("\n--- Subtopics ---")
pprint(final_state["subtopics"])

print("\n--- Full Script ---")
pprint(final_state["full_script"])

print("\n--- Slide Segments ---")
pprint(final_state["slide_segments"])


--- Subtopics ---
['1. Choosing a Niche or Topic',
 '2. Setting Up a YouTube Account',
 '3. Creating High-Quality Content',
 '4. Optimizing Video Titles and Tags',
 '5. Building an Audience and Engagement',
 '6. Monetizing Your Channel',
 '7. Promoting Your YouTube Channel']

--- Full Script ---
[{'facts': ['1. Choosing a niche or topic is a crucial step in creating '
            'profitable digital products.',
            '2. Having one specific niche is essential for success, as it '
            'helps differentiate your products from others.',
            '3. To choose a profitable niche, you need to consider both '
            'forethought and research, including identifying problems people '
            'are experiencing and determining whether you can solve them.',
            '4. Researching your competition and analyzing their strengths and '
            'weaknesses is also vital in choosing a niche.',
            '5. A niche with low-quality content or a lack of competition c