In [1]:
from typing import Annotated, Sequence, TypedDict, List, Dict
from dotenv import load_dotenv  
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
from langchain_groq import ChatGroq
from langchain_core.tools import tool
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, END, START
from langgraph.prebuilt import ToolNode
import os

In [2]:
load_dotenv()
GROQ_API_KEY=os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"]= GROQ_API_KEY

In [3]:
llm = ChatGroq(model="llama-3.1-8b-instant")

In [6]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    slide_no: int
    subtopic: List[str]
    content_to_display: str
    narration_script: str
    is_blank_slide: bool
    image_address: str
    video_address: str
    image_position: str
    content_position: str

In [7]:
import requests

PEXELS_API_KEY = 'mIZdthiPsT6hrIGHcGTOgwH61Q4UvepeUP3o9sU9GUqXm1HVhqas1fQQ'

def fetch_relevant_image(query: str) -> str:
    headers = {
        "Authorization": PEXELS_API_KEY
    }
    params = {
        "query": query,
        "per_page": 1
    }
    response = requests.get('https://api.pexels.com/v1/search', headers=headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if data['photos']:
            return data['photos'][0]['src']['medium']
    return ""  # fallback if no image found


In [8]:
def enrich_slide_with_media(state: AgentState) -> AgentState:
    subtopic_text = ", ".join(state["subtopic"]) if isinstance(state["subtopic"], list) else state["subtopic"]
    content_text = state["content_to_display"]

    # Step 1: Use LLM to generate a more accurate image search query
    query_prompt = (
        f"Given the subtopic: '{subtopic_text}' and content: '{content_text}', "
        f"generate a short and precise search query that would retrieve the most relevant image for this content from an image search API. "
        f"The query should include important keywords only, without any extra words, punctuation, or formatting. "
        f"Output only the query, no explanation."
    )

    query_response = llm.invoke(query_prompt)
    refined_query = query_response.content.strip()

    # Step 2: Fetch image from Pexels using the refined query
    image_address = fetch_relevant_image(refined_query)

    # Step 3: Ask LLM for positioning of image and content
    position_prompt = (
        f"For the subtopic '{subtopic_text}', given the content:\n\n"
        f"Content: {content_text}\n\n"
        f"Decide where to place the image and content on the slide.\n"
        f"Respond strictly in this format:\n"
        f"Image Position: <left/right/top/bottom>\n"
        f"Content Position: <left/right/top/bottom>\n"
        f"Choose positions that enhance readability and visual appeal."
    )

    position_response = llm.invoke(position_prompt)
    output = position_response.content.strip()

    image_position = ""
    content_position = ""

    for line in output.split('\n'):
        if line.lower().startswith("image position:"):
            image_position = line.split(":", 1)[1].strip()
        elif line.lower().startswith("content position:"):
            content_position = line.split(":", 1)[1].strip()

    # Step 4: Update state
    state.update({
        "image_address": image_address,
        "video_address": "",  # Optional: you can implement video retrieval similarly
        "image_position": image_position,
        "content_position": content_position
    })

    return state
