# Notebook Agente LangGraph + MongoDB

## Imports

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ.get("OPENAI_API_KEY")

import json
from typing import TypedDict, Annotated
import operator
from langgraph.graph import StateGraph, END
from langgraph.checkpoint.mongodb import MongoDBSaver 
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage, AnyMessage
from langchain_openai import ChatOpenAI
from langchain.tools import tool
from pymongo import MongoClient


## Configuración Modelo LLM y MongoDB

In [None]:
# ==========================================
# 🔗 MongoDB Configuration (using pymongo[srv])
# ==========================================
MONGO_URI = os.getenv("MONGODB_URI")
DB_NAME = "sample_mflix"
COLLECTION_NAME = "movies"

client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
collection = client[DB_NAME][COLLECTION_NAME]
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
memory = MongoDBSaver(client, db_name="langgraph_memory", collection_name="agent_checkpoints")
config = {"configurable": {"thread_id": "user_456"}}

## PROMPT

In [None]:

prompt = """You are "CineScope", a movie assistant integrated in a LangGraph workflow. 
Your goal is to help users discover, explore, and learn about movies by:
1. Extracting structured information from their queries.
2. Deciding which tool to use.
3. Providing a concise and natural-language answer.

---

### 🧩 Step 1 — Structured Information Extraction

From each user query, extract a structured representation in **valid JSON** with the following fields:

{
  "title": <title of the movie mentioned, otherwise null>,
  "director": <name of the director if mentioned, otherwise null>,
  "actors": <comma-separated list of actors if mentioned, otherwise null>,
  "genre": <genre mentioned, otherwise null>,
  "year": <specific year or range mentioned, otherwise null>,
  "keywords": <other relevant key terms, otherwise null>,
}

Rules:
- Normalize "science fiction" → "sci-fi".
- Fill only what is explicitly or clearly implied by the query.
- Return only JSON, no explanation.

If the query implies similarity (contains phrases like “similar to”, “like”, “resembling”, “in the style of”, “movies that feel like”), you must always perform a two-step reasoning process:

Step 1 — Lookup:
Use the mongo_search tool with only the title of the referenced movie to retrieve factual data (such as plot, year, or genres).
Example:

{"title": "Gladiator"}


→ Tool returns: historical action drama about a Roman general turned gladiator.

Step 2 — Enrichment:
Using the returned information, build a refined structured query including the inferred "genre" and "keywords".
For example:

{"genre": "historical drama", "keywords": "Rome, gladiators, revenge, honor"}


or

{"genre": "sci-fi", "keywords": "space, time travel, survival"}


if the movie was Interstellar.

Step 3 — Second search:
Perform a second mongo_search call using the enriched JSON to find similar movies.

Only after completing both steps, generate the final natural-language answer.

🧩 Additional notes

You must always perform the two-step reasoning process for “movies like …” or “similar to …” queries, even if you already know the movie.

The goal of the first lookup is to extract objective context (genre, setting, tone) from MongoDB, not to rely on prior world knowledge.

Do not skip the first lookup. Even if you recognize the title, still call the tool once to confirm factual data before building the enriched query.

After both searches, summarize results concisely for the user.

💬 Example (Gladiator):

User: “Movies similar to Gladiator”
→ Step 1: mongo_search({"title": "Gladiator"})
→ Step 2: Builds: {"genre": "historical drama", "keywords": "Rome, gladiators, revenge, honor"}
→ Step 3: mongo_search again with enriched query
→ Final Answer: “If you liked Gladiator, you might enjoy Spartacus, Kingdom of Heaven, and Troy…”
---

### 🧩 Step 2 — Tool Usage Guidelines

You have access to the following tools:

- **mongo_search(genre, year, director, actors, keywords)**  
  → Searches and returns a list of movies that match the parameters.

- **get_movie_info(title)**  
  → Returns detailed factual information about one movie (e.g., release year, director, cast, plot summary).

Tool usage policy:
- If `intent == "recommendation"`, use `mongo_search` with the relevant parameters.
- If `intent == "fact"` and `movie_title` is known, use `get_movie_info(title)`.
- If there’s not enough information to make a confident call, ask the user a short clarifying question.
- Do **not** hallucinate or make up data. If a lookup fails, say:  
  `"I could not find the information you requested."`

---

### 🧩 Step 3 — Response Formatting

After obtaining data (via tool or known facts), respond in concise, human-like natural language.

Examples:

**Factual answers**
User: "Who directed Inception?"  
Agent: "Christopher Nolan directed *Inception* (2010). The film follows a thief who enters dreams to steal secrets."

User: "What year was The Godfather released?"  
Agent: "*The Godfather* was released in 1972, directed by Francis Ford Coppola."

**Recommendations**
User: "Recommend me sci-fi movies from the last decade."  
Agent: "Here are some sci-fi movies from the past decade:  
1. *Dune* (2021) — Directed by Denis Villeneuve, a visually stunning adaptation of Frank Herbert’s novel.  
2. *Ex Machina* (2014) — Directed by Alex Garland, a tense exploration of artificial intelligence.  
3. *Interstellar* (2014) — Directed by Christopher Nolan, a breathtaking journey through space and time."

Formatting rules:
- Always include movie title in *italics*.
- Mention the year and director when available.
- For lists, use numbered bullets.
- Keep tone concise and natural, avoid repetition or filler.
- End with a helpful offer, e.g.,  
  “Would you like me to recommend similar movies?” (optional)

---

### 🧩 Step 4 — General Behavior

- You may make multiple tool calls in sequence if needed.
- Only use a tool when you’re sure of the parameters.
- Be truthful, concise, and stylistically consistent.
- If no relevant data is found: `"I could not find the information you requested."`
- Never output raw tool data directly to the user. Always summarize it cleanly.

---

System summary:
- Primary goal → extract → choose tool → answer.
- Focus domain → movies only.
- Style → concise, cinematic, factual.
- When unsure → ask or abstain.
"""

## Tool

In [None]:

# MongoDB Search Tool
@tool("mongo_search", return_direct=False)
def mongo_search(structured_query: str) -> str:
    """
    Receives a JSON string from the LLM with fields: director, actors, genre, keywords, year/range.
    Builds a MongoDB Atlas Search pipeline dynamically using $compound.
    Enforces strong genre matching and balances relevance via must/should conditions.
    """
    try:
        query_data = json.loads(structured_query)
        print("Received query:", query_data)

        must_conditions = []
        should_conditions = []

        # Director → must
        if query_data.get("director"):
            must_conditions.append({
                "phrase": {"query": query_data["director"], "path": "directors"}
            })

        # Actors → should (soft relevance)
        if query_data.get("actors"):
            should_conditions.append({
                "phrase": {
                    "query": query_data["actors"],
                    "path": "cast",
                    "score": {"boost": {"value": 3}}
                }
            })

        # Genre → multiple musts (strict)
        if query_data.get("genre"):
            # Soporta tanto un solo género como varios separados por coma
            genres = query_data["genre"]
            if isinstance(genres, str):
                genres = [g.strip() for g in genres.split(",") if g.strip()]
            for g in genres:
                must_conditions.append({
                    "phrase": {"query": g, "path": "genres", "score": {"boost": {"value": 5}}}
                })

        # Keywords → should (semantic flexibility)
        if query_data.get("keywords"):
            should_conditions.append({
                "text": {
                    "query": query_data["keywords"],
                    "path": ["plot", "title"],
                    "score": {"boost": {"value": 2}}
                }
            })

        # Title → used for similarity cases
        if query_data.get("title") and not must_conditions:
            should_conditions.append({
                "text": {
                    "query": query_data["title"],
                    "path": ["title", "plot"],
                    "score": {"boost": {"value": 1.5}}
                }
            })

        # Build compound query safely
        compound_query = {"must": must_conditions}
        if should_conditions:
            compound_query["should"] = should_conditions
            compound_query["minimumShouldMatch"] = 1  # solo si hay shoulds

        search_stage = {
            "$search": {
                "index": "searchMovies",
                "compound": compound_query
            }
        }

        pipeline = [
            search_stage,
            {"$limit": 20},
            {"$project": {
                "_id": 0,
                "title": 1,
                "year": 1,
                "genres": 1,
                "directors": 1,
                "plot": 1,
                "score": {"$meta": "searchScore"}
            }},
            {"$sort": {"score": -1}}
        ]

        # Year filter (optional)
        if query_data.get("year"):
            year_range = query_data["year"]
            if "-" in year_range:
                start, end = map(int, year_range.split("-"))
                pipeline.append({"$match": {"year": {"$gte": start, "$lte": end}}})
            else:
                pipeline.append({"$match": {"year": int(year_range)}})

        # Execute query
        results = list(collection.aggregate(pipeline))
        if not results:
            return f"No results found for '{structured_query}'."

        summary = "\n".join([
            f"{r['title']} ({r.get('year', '?')}) - Genres: {', '.join(r.get('genres', []))}"
            for r in results
        ])

        return f"Results for '{structured_query}':\n{summary}"

    except Exception as e:
        return f"Error running search: {str(e)}"


## Agent State

In [None]:
# LangGraph State Agent
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

## Agent

In [None]:
# LangGraph Agent
class Agent:
    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges("llm", self.exists_action, {True: "action", False: END})
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer=memory)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def call_openai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling tool: {t}")
            if t['name'] not in self.tools:
                print("\n ....bad tool name....")
                result = "bad tool name, retry"
            else:
                result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}


# Instantiate Agent
agent = Agent(model=model, tools=[mongo_search], system=prompt)

## Probando el agente con diferentes consultas

In [None]:
messages = [HumanMessage(content="Movies whit Leonardo DiCaprio?")]
result = agent.graph.invoke({"messages": messages}, config=config)

state = agent.graph.get_state(config)

print("State:", state)

print("Final Answer:")
print(result['messages'][-1].content)

In [None]:
messages = [HumanMessage(content="Which ones are crime themed?")]
result = agent.graph.invoke({"messages": messages}, config=config)

state = agent.graph.get_state(config)

print("State:", state)

print("Final Answer:")
print(result['messages'][-1].content)

In [None]:
messages = [HumanMessage(content="Thank you!")]
result = agent.graph.invoke({"messages": messages}, config=config)

state = agent.graph.get_state(config)

print("State:", state)

print("Final Answer:")
print(result['messages'][-1].content)