## Models initialization

In [79]:
import json
from os import stat_result

from dotenv import load_dotenv
import os
from langchain_openai import AzureOpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import numpy as np

load_dotenv()
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT = os.getenv("EMBEDDING_DEPLOYMENT_NAME")
assert os.getenv("OPENAI_API_KEY"), "OPENAI_API_KEY missing"

embeddings = AzureOpenAIEmbeddings(
    deployment=AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT,
    base_url=None,
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)
llm = ChatOpenAI(model="gpt-4.1", temperature=0.0)
print("LLM ready.")

LLM ready.


## RAG implementation

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jrobischon/wikipedia-movie-plots")
path += "/wiki_movie_plots_deduped.csv"
print("Path to dataset files:", path)

import numpy as np
import pandas as pd

df = pd.read_csv(path)
df.head()
from datetime import datetime, date
from typing import Optional, Iterable, List, Dict, Any
from langchain_core.documents import Document


def _to_python(val: Any) -> Any:
    """Make values JSON/metadata-friendly (convert numpy scalars, NaT, timestamps, etc.)."""
    if pd.isna(val):
        return None
    if isinstance(val, (np.generic,)):
        return val.item()
    if isinstance(val, (pd.Timestamp, datetime, date)):
        return val.isoformat()
    return val

def df_to_documents(
    df: pd.DataFrame,
    text_col: str = "plot",
    meta_cols: Optional[Iterable[str]] = None,
    drop_na_text: bool = True,
) -> List[Document]:
    """
    Convert a DataFrame to LangChain Documents.
    - `text_col` becomes page_content.
    - All other columns (or `meta_cols` if provided) become metadata.
    - Rows with missing text are dropped by default.
    """
    if text_col not in df.columns:
        raise ValueError(f"text_col '{text_col}' not found in DataFrame columns: {list(df.columns)}")

    work = df.copy()

    if drop_na_text:
        work = work[~work[text_col].isna()]

    # Decide which columns become metadata
    if meta_cols is None:
        meta_cols = [c for c in work.columns if c != text_col]
    else:
        for c in meta_cols:
            if c not in work.columns:
                raise ValueError(f"meta_col '{c}' not found in DataFrame")

    docs: List[Document] = []
    for _, row in work.iterrows():
        page_content = "" if pd.isna(row[text_col]) else str(row[text_col])
        metadata: Dict[str, Any] = {col: _to_python(row[col]) for col in meta_cols}
        docs.append(Document(page_content=page_content, metadata=metadata))
    return docs

docs = df_to_documents(df, text_col="Plot")

Path to dataset files: C:\Users\oleksiy.hoyev\.cache\kagglehub\datasets\jrobischon\wikipedia-movie-plots\versions\1/wiki_movie_plots_deduped.csv


In [5]:
vector_store = FAISS.from_documents(docs[:10000], embeddings)
print("FAISS vector store created from movie plots.")

FAISS vector store created from movie plots.


In [3]:
# # Persistence for vector store
PERSIST_DIR = "./faiss_index"
# os.makedirs(PERSIST_DIR, exist_ok=True)
# vector_store.save_local(PERSIST_DIR)
# print(f"Saved FAISS index to {PERSIST_DIR}")

In [61]:
vector_store = FAISS.load_local(PERSIST_DIR, embeddings, allow_dangerous_deserialization=True)
print("Reloaded store size:", len(reloaded.index_to_docstore_id))

Reloaded store size: 10000


In [62]:
import json
def format_docs(docs):
    return "\n\n".join(f"{json.dumps(d.metadata)}: {d.page_content}" for d in docs)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Use the provided context to answer the user's question. If unsure, say you don't know."),
    ("human", "Question: {question}\n\nContext:\n{context}\n\nAnswer:")
])

#  "score_threshold": 1.3
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG chain constructed.")

RAG chain constructed.


## Tool Calling

In [63]:
from typing import Annotated
from langchain_core.tools import tool

In [64]:
from typing import Annotated
from langchain_core.tools import tool
from random import randint


@tool(description="Simple addition tool. Call it whenever user requests to add 2 numbers. "
                  "If more numbers are given, split into multiple calls.")
def add_numbers(a: Annotated[float, "First operand. Floating point value. Required."],
                b: Annotated[float, "Second operand. Floating point value. Required."]) -> float:
    return a+b


@tool(description="Fetch current weather for a given city. Call it when user asks for current weather.")
def fetch_current_weather(
        country: Annotated[str, "Name of the country where the city is located. "
                                "Required. Must be a country code (i.e. UK, UA, US, etc.)" ],
        city: Annotated[str, "Name of the city to fetch weather for. Optional."]) -> str:
    return f"Weather in {country} {city if city else ''}:  Sunny, {randint(-20, 40)}C"

In [69]:
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
from langchain_core.runnables.utils import Output
from langchain_core.runnables import Runnable

messages = [
    SystemMessage("You are a helpful assistant that can use tools to answer user questions."),
    HumanMessage("What is the weather in Lviv? Also, add 5.5 and 10.2.")
]

tools = {tool.name: tool for tool in [add_numbers, fetch_current_weather]}

tools = {
    "add_numbers": add_numbers,
    "fetch_current_weather": fetch_current_weather
}

llm_with_tools: Runnable = llm.bind_tools([add_numbers, fetch_current_weather])
response: Output = llm_with_tools.invoke(messages)
messages.append(response)

In [70]:
from pprint import pprint
pprint(response.content)
pprint(response.tool_calls)

''
[{'args': {'city': 'Lviv', 'country': 'UA'},
  'id': 'call_MVD0CHDxpVOLZoCMWLhVDizv',
  'name': 'fetch_current_weather',
  'type': 'tool_call'},
 {'args': {'a': 5.5, 'b': 10.2},
  'id': 'call_VsnwFu3BoTDhGLcPif5rbutl',
  'name': 'add_numbers',
  'type': 'tool_call'}]


In [71]:
tool_results = []
for tool in response.tool_calls:
    res = tools[tool["name"]].invoke({**tool['args']})
    print(f"Tool called: {tool['name']} with args {tool['args']} -> returned {res}")
    tool_results.append(ToolMessage(res, tool_call_id=tool['id']))
messages += tool_results

Tool called: fetch_current_weather with args {'country': 'UA', 'city': 'Lviv'} -> returned Weather in UA Lviv:  Sunny, 29C
Tool called: add_numbers with args {'a': 5.5, 'b': 10.2} -> returned 15.7


In [73]:
response: Output = llm_with_tools.invoke(messages)

In [74]:
response.content

'The current weather in Lviv, Ukraine is sunny with a temperature of 29°C.\n\nAlso, the sum of 5.5 and 10.2 is 15.7.'

## Custom Conversational Agent with Tool Calling

In [80]:
class Conversation:
    def __init__(self, llm):
        self.messages = [
            SystemMessage("You are a helpful assistant that can use tools to answer user questions.")
        ]
        self.llm = llm.bind_tools([add_numbers, fetch_current_weather])
        self.tools = {
            "add_numbers": add_numbers,
            "fetch_current_weather": fetch_current_weather
        }

    def execute_tools(self, message: AIMessage) -> List[ToolMessage]:
        if not message.tool_calls:
            return []
        tool_results = []
        for tool in message.tool_calls:
            res = self.tools[tool["name"]].invoke({**tool['args']})
            print(f"Tool called: {tool['name']} with args {tool['args']} -> returned {res}")
            tool_results.append(ToolMessage(res, tool_call_id=tool['id']))
        return tool_results

    def turn(self):
        response: Output = self.llm.invoke(self.messages)
        self.messages.append(response)
        self.messages += self.execute_tools(response)

    def __call__(self, user_input: str):
        self.messages.append(HumanMessage(user_input))
        while not isinstance(self.messages[-1], AIMessage):
            self.turn()
        return self.messages[-1]


In [81]:
# create new conversation
conv = Conversation(llm)

In [84]:
message = "give me all weather statuses from conversation."
response = conv(message)
pprint(response.content)

('Here are all the weather statuses from our conversation:\n'
 '\n'
 '1. Brazil: Sunny, 13°C\n'
 '2. Ukraine: Sunny, 2°C\n'
 '3. New York, US: Sunny, -19°C\n'
 '\n'
 'Let me know if you need weather information for any other locations!')


## LangGraph Agent with RAG and Tool Calling

In [85]:
from typing import Annotated, Literal

from langchain_openai import ChatOpenAI
from langgraph.prebuilt import ToolNode
from langchain.agents import create_agent
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import MessagesState

In [97]:
tools = [add_numbers, fetch_current_weather, rag_tool]
tool_node = ToolNode(tools)
agent_node = create_agent(
    model=llm,
    tools=tools,
    state_schema=MessagesState
)

def should_continue(state: MessagesState) -> Literal["tools", END]:
    if state["messages"][-1].tool_calls:
        return "tools"
    return END

def build_graph():
    workflow = StateGraph(MessagesState)

    workflow.add_node("agent", agent_node)
    workflow.add_node("tools", tool_node)

    workflow.add_edge(START, "agent")

    workflow.add_conditional_edges(
        "agent",
        should_continue,
    )

    workflow.add_edge("tools", "agent")

    return workflow.compile()

In [98]:
graph = build_graph()

In [99]:
res = graph.invoke({
    "messages": [
        SystemMessage("You are a helpful assistant that can use tools to answer user questions."),
        HumanMessage("give me a move with sci-fy plot")
    ]
})

RAG tool called with question: Give me a movie with a sci-fi plot.


In [100]:
pprint(res["messages"][-1].content)

("Here's a movie with a sci-fi plot: Monster A Go-Go (1965).\n"
 '\n'
 'The story is about an astronaut named Frank Douglas who disappears after '
 'returning to Earth. Afterward, a large, radioactive, humanoid monster '
 'appears—suggesting the astronaut was either replaced by or turned into this '
 'creature. Scientists and the military try to capture it as chaos unfolds. In '
 "a twist ending, it's revealed Douglas was actually rescued elsewhere, "
 'hinting that the monster may have been an alien impostor.\n'
 '\n'
 'Let me know if you want a more popular or different kind of sci-fi movie!')


In [96]:
## Task during workshop: add rag as a tool in the LangGraph agent
from langchain.tools import tool
@tool(description="Use RAG to answer questions based on movie plots dataset.")
def rag_tool(question: Annotated[str, "Question to be answered based on movie plots. Required."]) -> str:
    print("RAG tool called with question:", question)
    return rag_chain.invoke(question)

## Bigger workflow with task planning, react agent, conversation summarization, etc.

In [101]:
## Lets create artificial tools as in example with files upload
from langchain_core.tools import tool

@tool(description="Upload file text to google drive. Call it when user wants to upload a file to cloud."
                  "It is a main method to upload something. Always use it when there are some files to be saved.")
def upload_to_google_drive(file_name: Annotated[str, "Name of the file to be uploaded. Required."],
                           file_content: Annotated[str, "Content of the file to be uploaded. Required."]) -> str:
    print(f"Failed to upload {file_name} to Google Drive.")
    return f"Failed to upload {file_name} to Google Drive."

@tool(description="Upload file text to dropbox. Call it when all other methods of upload to cloud have failed."
                  "It is a fallback method to upload something. Use it only when upload to google drive fails.")
def upload_to_dropbox(file_name: Annotated[str, "Name of the file to be uploaded. Required."],
                      file_content: Annotated[str, "Content of the file to be uploaded. Required."]) -> str:
    print(f"Successfully uploaded {file_name} to Dropbox.")
    return f"Successfully uploaded {file_name} to Dropbox."


In [112]:
from pydantic import Field
from langgraph.graph import add_messages
from langchain_core.messages import AnyMessage
from pydantic import BaseModel


class State(BaseModel):
    messages: Annotated[List[AnyMessage], add_messages] = Field([])
    conversation_summary: Optional[str] = None
    answer: Optional[str] = None
    user_query: Optional[str] = None


tools = [add_numbers,
         fetch_current_weather,
         rag_tool,
         upload_to_google_drive,
         upload_to_dropbox]

tool_node = ToolNode(tools)
agent_node = create_agent(
    model=llm,
    tools=tools,
    state_schema=State
)

def should_continue(state: State) -> Literal["tools", "answer_saver"]:
    if state.messages[-1].tool_calls:
        print("Routing to tools")
        return "tools"
    print("Routing to next node")
    return "answer_saver"

def planner_node(state: State) -> State:
    res = llm.invoke([
        SystemMessage("You are a task planning assistant. Given the conversation so far, "
                      "create a plan of action to answer the user's last question. "
                      "If tools are needed, specify which ones to use and in what order. "
                      "If no tools are needed, indicate that as well."),
    ] + state.messages)
    print("Planner output:", res.content)
    state.messages.append(res)
    return state

def answer_saver(state: State) -> State:
    print("Saving final answer to special field.")
    state.answer = state.messages[-1].content
    return state

def conversation_summarization_node(state: State) -> State:
    print("Summarizing conversation so far.")
    res = llm.invoke([
        SystemMessage("You are a conversation summarization assistant. "
                      "Given the conversation so far, provide a concise summary "
                      "of the key points discussed."),
    ] + state.messages)
    print("Summarization output:", res.content)
    state.messages.append(res)
    state.conversation_summary = res.content
    return state

def build_graph():
    workflow = StateGraph(State)

    workflow.add_node("planner", planner_node)
    workflow.add_node("agent", agent_node)
    workflow.add_node("tools", tool_node)
    workflow.add_node("summarization", conversation_summarization_node)
    workflow.add_node("answer_saver", answer_saver)

    workflow.add_edge(START, "planner")
    workflow.add_edge("planner", "agent")
    workflow.add_conditional_edges(
        "agent",
        should_continue
    )
    workflow.add_edge("answer_saver", "summarization")
    workflow.add_edge("summarization", END)
    workflow.add_edge("tools", "agent")

    return workflow.compile().with_config(callbacks=[langfuse_handler])

In [113]:
graph = build_graph()

res = State(**graph.invoke({
    "messages": [
        SystemMessage("You are a helpful assistant that can use tools to answer user questions."),
        HumanMessage("Upload a file report.txt with content 'This is a sample report.' to cloud and also tell me the weather in Tokyo, Japan.")
    ]
}))

Planner output: Plan of Action:

1. Create a file named report.txt with the content "This is a sample report."
2. Upload the created file to the cloud (using the appropriate file upload tool).
3. Use a weather tool to fetch and display the current weather in Tokyo, Japan.
4. Provide both the link/access to the uploaded file and the weather information to the user.

Tools Needed:

- File creation/upload tool
- Weather information tool

Order of Execution:

1. Create and upload the report.txt file to the cloud.
2. Fetch the current weather in Tokyo, Japan.
3. Respond with the upload details and weather information.
Failed to upload report.txt to Google Drive.
Successfully uploaded report.txt to Dropbox.
Routing to next node
Saving final answer to special field.
Summarizing conversation so far.
Summarization output: Here is the information you requested:

1. The file report.txt with the content "This is a sample report." has been successfully uploaded to Dropbox.
2. The current weather in

In [None]:
state = res
state.messages += HumanMessage("Now tell me a sci-fy movie plot.")
res = graph.invoke(state)

## Add observability with LangFuse

In [111]:
from langfuse import Langfuse
from langfuse.langchain import CallbackHandler

langfuse = Langfuse(
  secret_key="sk-lf-ad603960-aad1-482e-8c78-79eda9b5e8f3",
  public_key="pk-lf-c209d32a-cea6-473f-8ce4-f8c7d6a4bd70",
  host="http://127.0.0.1:3000"
)

langfuse_handler = CallbackHandler()
# workflow.compile().with_config(callbacks=[langfuse_handler])