# Assignment 1

In [1]:
import os
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model

load_dotenv()
api_key = os.getenv("API_KEY")
os.environ["OPENAI_API_KEY"] = api_key

model = init_chat_model("gpt-4o-mini", model_provider="openai")

In [2]:
from langchain_core.tools import tool
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time

movie_infos = []

@tool
def movie_infos_search(movie_name: str):
    """
    Input the movie name that the user is looking for and extract movies from IMDb search results and return their infos.
    The infos include the movie title, plot, cast and characters.
    """
    
    chrome_options = Options()
    driver = webdriver.Chrome(options = chrome_options)
    driver.minimize_window()
    # driver.maximize_window()

    try:
        url = "https://www.imdb.com/?ref_=nv_home"
        driver.get(url)

        input = driver.find_element(By.ID, "suggestion-search")
        input.send_keys(movie_name)

        button = driver.find_element(By.ID, "suggestion-search-button")
        button.click()
        time.sleep(0.1)

        li_elements = driver.find_elements(By.CSS_SELECTOR, 'li.ipc-metadata-list-summary-item')
        for i in range(5):
            metadatas_element = li_elements[i].find_elements(By.CLASS_NAME, 'ipc-metadata-list-summary-item__li')
            metadatas = []
            for metadata_element in metadatas_element:
                metadatas.append(metadata_element.text)

            if 'TV Series' in metadatas or 'Video' in metadatas:
                # print('Skipping')
                continue

            title_element = li_elements[i].find_element(By.CSS_SELECTOR, 'a.ipc-metadata-list-summary-item__t')
            title = title_element.text
            link = title_element.get_attribute("href")
            movie_infos.append({"title": title, "link": link, "metadatas": metadatas})

        # for movie_info in movie_infos:
        #     print(movie_info)

        for i in range(len(movie_infos)):
            # print(movie_infos[i]["title"])
            link = movie_infos[i]["link"]
            driver.get(link)

            section = driver.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/section/div[3]/section/section/div[3]/div[2]/div[1]/section')
            p = section.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/section/div[3]/section/section/div[3]/div[2]/div[1]/section/p')
            plot_text = p.text
            movie_infos[i]["plot"] = plot_text
            if "Plot under wraps" in plot_text:
                movie_infos[i]["casts_and_characters"] = []
                continue

            try:
                section = driver.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/div/section')
                section = section.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/div/section/div/div[1]/section[4]')
                names = section.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/div/section/div/div[1]/section[4]/div[2]/div[2]')
                names_list = names.text.splitlines()
            except Exception as e:
                names_list = []

            casts_and_characters = []
            for j in range(0, len(names_list), 2):
                names_list[j] = names_list[j].replace("…", "").replace("(voice)", "")
                names_list[j + 1] = names_list[j + 1].replace("…", "").replace("(voice)", "")
                casts_and_characters.append({"cast": names_list[j], "character": names_list[j + 1]})
            movie_infos[i]["casts_and_characters"] = casts_and_characters

        # for movie_info in movie_infos:
        #     print(movie_info)

        response = ""
        for movie_info in movie_infos:
            response += f"""
Movie Title: {movie_info["title"]}
Link: {movie_info["link"]}
Plot: {movie_info["plot"]}
Casts and Characters:
"""
            for cast_and_character in movie_info["casts_and_characters"]:
                response += f"  {cast_and_character["cast"]} playing {cast_and_character["character"]}\n"

        driver.quit()
        return response
    except Exception as e:
        # movie_infos = []
        driver.quit()
        return str(e)

@tool
def movie_metadata_search(url: str):
    """
    Input the IMDb movie page URL and extract the movie title, plot, casts, characters, directors, writers, rating, popularity.
    """
    response = ""
    for movie_info in movie_infos:
        if movie_info["link"] == url:
            chrome_options = Options()
            driver = webdriver.Chrome(options = chrome_options)
            driver.maximize_window()
            
            driver.get(url)

            rating = driver.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/section/div[3]/section/section/div[2]/div[2]/div/div[1]/a/span/div/div[2]/div[1]/span[1]')
            movie_info["rating"] = rating.text
            # print("Rating: " + rating.text)

            popularity = driver.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/section/div[3]/section/section/div[2]/div[2]/div/div[3]/a/span/div/div[2]/div[1]')
            movie_info["popularity"] = popularity.text
            # print("Popularity: " + popularity.text)

            driver.minimize_window()

            # Director
            ul_element = driver.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/div/section/div/div[1]/section[4]/ul/li[1]/div/ul')
            li_elements = ul_element.find_elements(By.TAG_NAME, 'li')
            directors = []
            for li in li_elements:
                directors.append(li.text)
                # print(li.text)
            movie_info["directors"] = directors

            # Writer
            ul_element = driver.find_element(By.XPATH, '//*[@id="__next"]/main/div/section[1]/div/section/div/div[1]/section[4]/ul/li[2]/div/ul')
            li_elements = ul_element.find_elements(By.TAG_NAME, 'li')
            writers = []
            for li in li_elements:
                writers.append(li.text)
                # print(li.text)
            movie_info["writers"] = writers

            box_office_div = driver.find_element(By.CSS_SELECTOR, 'div[data-testid="title-boxoffice-section"]')

            box_office_text = box_office_div.text
            lines = box_office_text.splitlines()
            lines = lines[:-1]
            box_office = []
            for i in range(0, len(lines), 2):
                box_office.append(lines[i] + ": " + lines[i + 1])
            # print(box_office)
            movie_info["box_office"] = box_office
            
            response += f"""
Movie Title: {movie_info["title"]}
Link: {movie_info["link"]}
Plot: {movie_info["plot"]}
Casts and Characters:
"""
            for cast_and_character in movie_info["casts_and_characters"]:
                response += f"  {cast_and_character["cast"]} playing {cast_and_character["character"]}\n"
            response += "Directors: "
            for director in movie_info["directors"]:
                response += f"{director}, "
            response += "\nWriters: "
            for writer in movie_info["writers"]:
                response += f"{writer}, "
            response += "\nBox Office:\n"
            for box in movie_info["box_office"]:
                response += f"  {box}\n"
            response += f"""Rating: {movie_info["rating"]}
Popularity: {movie_info["popularity"]}
"""
            
            driver.quit()
            return response

In [3]:
# movie_infos_text = movie_infos_search.invoke("The Matrix")
# print(movie_infos_text)

In [4]:
# movie_metadata_text = movie_metadata_search.invoke("https://www.imdb.com/title/tt0133093/?ref_=fn_all_ttl_1")
# print(movie_metadata_text)

In [5]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent

prompt = """
    Based on the plan steps provided by planner, please select the appropriate tool to use and provide the response of the plan step.
"""

memory = MemorySaver()
tools = [movie_infos_search, movie_metadata_search]
executor = create_react_agent(model, tools, prompt=prompt)

In [6]:
# from langchain_core.messages import HumanMessage

# # Terminator 2: I am looking for a movie that a robot pretending to be a human and trying to assassinate the main character. The mvoie is the second installment in a series.
# # The Avengers: I am looking for a movie that starring Robert Downey Jr., Chris Evans, and Scarlett Johansson.
# # The Matrix: I am looking for a movie starring Keanu Reeves and the movie is about virtual reality.
# movie_infos = []
# query = "I am looking for a movie that a robot pretending to be a human and trying to assassinate the main character. The mvoie is the second installment in a series."

# config = {"configurable": {"thread_id": "abc123"}}
# for step in executor.stream(
#     {"messages": [HumanMessage(content=query)]},
#     config,
#     stream_mode="values",
# ):
#     step["messages"][-1].pretty_print()

# Assignment 2

In [7]:
import operator
from typing import Annotated, List, Tuple
from typing_extensions import TypedDict
from langgraph.graph import MessagesState



class PlanExecute(MessagesState):
    input: str
    plan: List[str]
    past_steps: Annotated[List[Tuple], operator.add]
    response: str

In [8]:
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

class Plan(BaseModel):
    """Plan to follow in future"""

    steps: List[str] = Field(
        description="different steps to follow, should be in sorted order"
    )

planner_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """For the given objective, come up with a simple step by step plan.  
This plan should involve individual tasks, that if executed correctly will yield the correct answer.  
Do not add any superfluous steps.  
The result of the final step should be the final answer.  
Make sure that each step has all the information needed – do not skip steps.

You can reference the following plan as an example:
    1. Generate a probable movie name based on user input  
    Goal: Extract or infer the most likely movie title the user is referring to from their natural language query.  
    Method: Apply semantic understanding, contextual reasoning, named entity recognition (NER), or prompted inference to transform the raw query into a clear movie name.

    2. Search IMDb and determine the correct movie based on search results  
    Goal: Use the inferred movie name from the previous step to perform an IMDb search and select the most semantically relevant result.  
    Method:  
    Use the movie_search(movie_name) tool to retrieve multiple search results  
    Compare result titles, release years, actors, and other metadata to identify the best match

    3. Retrieve detailed information about the selected movie from IMDb  
    Goal: Use the selected IMDb link to retrieve comprehensive movie metadata such as plot, cast, characters, directors, ratings, etc.  
    Method: Call the movie_metadata_search(imdb_url) tool

At each step, clearly output structured results such as:  
- Step 1 → movie_name: <string>  
- Step 2 → selected_movie_url: <IMDb URL>  
- Step 3 → metadata: <JSON object with fields such as title, plot, casts, etc.>      

If the provided movie is not what the user is looking for, start over and create a new plan by generating alternative possible movie names based on the user’s previous input and searching again.

Examples of mismatch conditions (triggering replan):  
- The plot returned is semantically inconsistent with the original user query.  
- None of the cast members match the ones mentioned by the user.  
- The title or year obviously contradicts contextual hints.
""",
        ),
        ("placeholder", "{messages}"),
    ]
)
planner = planner_prompt | ChatOpenAI(
    model="gpt-4o-mini", temperature=0
).with_structured_output(Plan)

In [9]:
response = planner.invoke(
    {
        "messages": [
            ("user", "I am looking for a movie starring Keanu Reeves and the movie is about virtual reality.")
        ]
    }
)
for step in response.steps:
    print(step)

Generate a probable movie name based on user input
Search IMDb and determine the correct movie based on search results
Retrieve detailed information about the selected movie from IMDb


In [10]:
from typing import Union


class Response(BaseModel):
    """Response to user."""

    response: str


class Act(BaseModel):
    """Action to perform."""

    action: Union[Response, Plan] = Field(
        description="Action to perform. If you want to respond to user, use Response. "
        "If you need to further use tools to get the answer, use Plan."
    )


replanner_prompt = ChatPromptTemplate.from_template(
    """For the given objective, come up with a simple step by step plan. \
This plan should involve individual tasks, that if executed correctly will yield the correct answer. Do not add any superfluous steps. \
The result of the final step should be the final answer. Make sure that each step has all the information needed - do not skip steps.

Your objective was this:
{input}

Your original plan was this:
{plan}

You have currently done the follow steps:
{past_steps}

Update your plan accordingly. 
If no more steps are needed and you can return to the user, then respond with that. 
If you need more information from the user to complete the task, then respond with that.
Otherwise, fill out the plan. Only add steps to the plan that still NEED to be done. Do not return previously done steps as part of the plan.

Notice:
Do not respond to the user with the plan. You should only return the final conclusion to the user.
"""
)


replanner = replanner_prompt | ChatOpenAI(
    model="gpt-4o", temperature=0
).with_structured_output(Act)

In [11]:
from typing import Literal
from langgraph.graph import END


async def execute_step(state: PlanExecute):
    user_input = state["input"]
    plan = state["plan"]
    plan_str = "\n".join(f"{i+1}. {step}" for i, step in enumerate(plan))
    task = plan[0]
    task_formatted = f"""User Input: {user_input}\n\n 
For the following plan:
{plan_str}\n\nYou are tasked with executing step {1}, {task}."""
    agent_response = await executor.ainvoke(
        {"messages": [("user", task_formatted)]},
        config={"configurable": {"thread_id": "executor123"}}
    )
    return {
        "past_steps": [(task, agent_response["messages"][-1].content)],
    }


async def plan_step(state: PlanExecute):
    plan = await planner.ainvoke({"messages": [("user", state["input"])]})
    
    if "response" in state and state["response"]:
        return {"plan": plan.steps, "response": ""}
    
    return {"plan": plan.steps}


async def replan_step(state: PlanExecute):
    output = await replanner.ainvoke(state)
    if isinstance(output.action, Response):
        return {"response": output.action.response}
    else:
        return {"plan": output.action.steps}


def should_end(state: PlanExecute):
    if "response" in state and state["response"]:
        return END
    else:
        return "agent"

In [12]:
from langgraph.graph import StateGraph, START, END, MessagesState
from langgraph.checkpoint.memory import MemorySaver

workflow = StateGraph(PlanExecute)

# Add the plan node
workflow.add_node("planner", plan_step)

# Add the execution step
workflow.add_node("agent", execute_step)

# Add a replan node
workflow.add_node("replan", replan_step)

workflow.add_edge(START, "planner")

# From plan we go to agent
workflow.add_edge("planner", "agent")

# From agent, we replan
workflow.add_edge("agent", "replan")

workflow.add_conditional_edges(
    "replan",
    # Next, we pass in the function that will determine which node is called next.
    should_end,
    ["agent", END],
)

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
memeory = MemorySaver()
app = workflow.compile(checkpointer=memeory)

In [13]:
# from IPython.display import Image, display

# display(Image(app.get_graph(xray=True).draw_mermaid_png()))

In [14]:
config = {"recursion_limit": 50, "thread_id": "workflow123"}
inputs = {"input": "I am looking for a movie starring Keanu Reeves and the movie is about virtual reality."}
async for event in app.astream(inputs, config=config):
    for k, v in event.items():
        if k != "__end__":
            print(v)
            if "response" in v:
                print(v["response"])

{'plan': ['Generate a probable movie name based on user input', 'Search IMDb and determine the correct movie based on search results', 'Retrieve detailed information about the selected movie from IMDb']}
{'past_steps': [('Generate a probable movie name based on user input', 'Based on the user input regarding a movie starring Keanu Reeves about virtual reality, a probable movie name is **"The Matrix."**\n\nHere are some related titles from the franchise:\n\n1. [**The Matrix**](https://www.imdb.com/title/tt0133093/?ref_=fn_all_ttl_1)\n   - Plot: When a beautiful stranger leads computer hacker Neo to a forbidding underworld, he discovers the shocking truth--the life he knows is the elaborate deception of an evil cyber-intelligence.\n\n2. [**The Matrix Resurrections**](https://www.imdb.com/title/tt10838180/?ref_=fn_all_ttl_2)\n   - Plot: Return to a world of two realities: one, everyday life; the other, what lies behind it. To find out if his reality is a construct, to truly know himself, 

In [15]:
config = {"recursion_limit": 50, "thread_id": "workflow123"}
inputs = {"input": "Please provide more details of the movie, such as characters, directors, and ratings."}
async for event in app.astream(inputs, config=config):
    for k, v in event.items():
        if k != "__end__":
            print(v)
            if "response" in v:
                print(v["response"])

{'plan': ['1. Identify the movie title based on user request for details.', '2. Search IMDb for the identified movie title to find the correct movie.', '3. Retrieve detailed information about the selected movie from IMDb, including characters, directors, and ratings.'], 'response': ''}

{'past_steps': [('1. Identify the movie title based on user request for details.', 'To proceed with step 1, I need to identify the specific movie title that the user wants more details about. Since there is no movie title provided in the user input, I cannot complete this step without additional information. Please provide the name of the movie you would like details about.')]}
{'response': 'Since the probable movie name has been identified as "The Matrix," I will proceed with retrieving detailed information about this movie. Here are the details:\n\n### **The Matrix (1999)**\n\n- **Directors:**\n  - Lana Wachowski\n  - Lilly Wachowski\n\n- **Main Characters:**\n  - **Neo (Thomas A. Anderson):** Played 

In [16]:
# # I am looking for a movie starring Keanu Reeves and the movie is about virtual reality.
# # Please provide more details of the movie, such as characters, directors, and ratings.

# while True:
#     # Get user input
#     user_input = input("Please enter your query: ")
#     if user_input.lower() == "exit":
#         break
#     config = {"recursion_limit": 50, "thread_id": "workflow123"}
#     inputs = {"input": user_input}
#     async for event in app.astream(inputs, config=config):
#         for k, v in event.items():
#             if k != "__end__":
#                 print(v)
#                 if "response" in v:
#                     print(v["response"])