# Performance benchmarking while developing and experimenting with LLMs.

In [4]:
! pip install langsmith openai
! pip install -qU requests bs4 lxml chromadb langchain langchain-text-splitters langchain-openai
! pip install -qU duckduckgo-search langchain-community ddgs



In [5]:
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGSMITH_API_KEY"] = ""
os.environ["LANGSMITH_PROJECT"] = ""
os.environ["OPENAI_API_KEY"] = ""

## Parsing website

In [8]:
# kb_en_to_chroma.py  â€” minimal & direct
import os, re, time, requests
from urllib.parse import urljoin, urldefrag
from bs4 import BeautifulSoup

BASE = "https://www.kapitalbank.az"
START = f"{BASE}/en"
UA = {"User-Agent": "kb-minicrawl/0.2"}
TIMEOUT = 15
MAX_PAGES = 50

def clean_url(u):
    u = urldefrag(u)[0]
    if not u: return None
    if not u.startswith("http"): u = urljoin(BASE, u)
    if not u.startswith(START): return None
    if re.search(r"\.(pdf|jpe?g|png|gif|svg|mp4|zip|docx?|xlsx?)$", u, re.I): return None
    return u

def extract_text(html):
    s = BeautifulSoup(html, "lxml")
    for t in s(["script","style","noscript","svg","footer","nav","header"]): t.decompose()
    n = s.select_one("main") or s.select_one("article") or s.body or s
    return " ".join((n.get_text(" ", strip=True) if n else s.get_text(" ", strip=True)).split())

visited, queue, pages = set(), [START], []
while queue and len(visited) < MAX_PAGES:
    url = queue.pop(0)
    if url in visited: continue
    try:
        r = requests.get(url, headers=UA, timeout=TIMEOUT)
        if r.ok and "text/html" in r.headers.get("Content-Type",""):
            txt = extract_text(r.text)
            if len(txt) > 200:
                pages.append({"url": url, "text": txt})
            s = BeautifulSoup(r.text, "lxml")
            for a in s.find_all("a", href=True):
                u = clean_url(a["href"])
                if u and u not in visited:
                    queue.append(u)
        visited.add(url); time.sleep(0.15)
    except requests.RequestException:
        visited.add(url)

In [9]:
import json

# Save the crawled pages data to a file for later use
pages_outfile = "kapitalbank_pages.json"
with open(pages_outfile, "w", encoding="utf-8") as f:
    json.dump(pages, f, indent=2, ensure_ascii=False)
print(f"Saved {len(pages)} pages to {pages_outfile}")

# Load crawled pages from JSON file to make them available for Chroma processing
with open(pages_outfile, "r", encoding="utf-8") as f:
    pages = json.load(f)
print(f"Loaded {len(pages)} pages from {pages_outfile}")

Saved 39 pages to kapitalbank_pages.json
Loaded 39 pages from kapitalbank_pages.json


## Constructing vector store

In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

# ---- LangChain chunking ----
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=120)
docs, metas = [], []
for p in pages:
    for chunk in splitter.split_text(p["text"]):
        docs.append(chunk)
        metas.append({"url": p["url"]})

# ---- OpenAI embeddings -> Chroma ----
persist_dir = "chroma_kapitalbank"
emb = OpenAIEmbeddings(model="text-embedding-3-small")  # cheap & solid
vs = Chroma.from_texts(
    texts=docs,
    embedding=emb,
    persist_directory=persist_dir,
    collection_name="kapitalbank_en",
    metadatas=metas,
)
vs.persist()
print(f"Indexed pages={len(pages)} chunks={len(docs)} into {persist_dir}/ (collection 'kapitalbank_en')")

Indexed pages=39 chunks=161 into chroma_kapitalbank/ (collection 'kapitalbank_en')


  vs.persist()


In [11]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

persist_dir = "chroma_kapitalbank"
collection_name = "kapitalbank_en"
emb = OpenAIEmbeddings(model="text-embedding-3-small")

# Load the existing/persisted Chroma vector store
vs = Chroma(
    persist_directory=persist_dir,
    embedding_function=emb,
    collection_name=collection_name
)

  vs = Chroma(


## Tracing LLM call only

In [12]:
from openai import OpenAI
from langsmith.wrappers import wrap_openai

openai_client = wrap_openai(OpenAI())

# Assume vs, emb, and persist_dir are already defined and initialized above

def retriever(query: str, k: int = 3):
    """
    Retrieve top-k most relevant chunks from the Chroma vector store for a query.
    """
    results = vs.similarity_search(query, k=k)
    # `results` is usually a list of Document objects with `page_content` and `metadata`
    return [doc.page_content for doc in results]

def rag(question: str) -> str:
    docs = retriever(question)
    system_message = (
        "Answer the user's question using only the provided information below:\n"
        + "\n".join(docs)
    )

    # This call is not traced yet
    resp = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": question},
        ],
    )
    return resp.choices[0].message.content

In [13]:
print(rag("Where Kapitalbank is located?"))

Kapital Bank is located in Azerbaijan and has the largest service network in the country, with 119 branches and 52 departments spread throughout.


## Tracing whole pipeline

In [14]:
from openai import OpenAI
from langsmith.wrappers import wrap_openai
from langsmith import traceable

openai_client = wrap_openai(OpenAI())

# Assume vs, emb, and persist_dir are already defined and initialized above

def retriever(query: str, k: int = 3):
    """
    Retrieve top-k most relevant chunks from the Chroma vector store for a query.
    """
    results = vs.similarity_search(query, k=k)
    # `results` is usually a list of Document objects with `page_content` and `metadata`
    return [doc.page_content for doc in results]

@traceable
def rag_traceble(question: str) -> str:
    docs = retriever(question)
    system_message = (
        "Answer the user's question using only the provided information below:\n"
        + "\n".join(docs)
    )

    # This call is not traced yet
    resp = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": question},
        ],
    )
    return resp.choices[0].message.content

In [15]:
print(rag_traceble("Where Kapitalbank is located?"))

Kapital Bank has the largest service network in Azerbaijan, with 119 branches and 52 departments all over the country.


## Collecting feedback, adding metadata

In [16]:
import uuid
from langsmith import Client

run_id = str(uuid.uuid4())
rag_traceble(
    "Where Kapitalbank is located?",
    langsmith_extra={"run_id": run_id}
)

ls_client = Client()
ls_client.create_feedback(
    run_id,
    key="user-score",
    score=0.5,
)

Feedback(id=UUID('7c39ae82-8850-4c21-b54a-68235a43919d'), created_at=datetime.datetime(2025, 11, 1, 11, 42, 11, 582984, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2025, 11, 1, 11, 42, 11, 582988, tzinfo=datetime.timezone.utc), run_id=UUID('b10fbe4d-b952-4dc8-81aa-e659470915b2'), trace_id=None, key='user-score', score=0.5, value=None, comment=None, correction=None, feedback_source=FeedbackSourceBase(type='api', metadata={}, user_id=None, user_name=None), session_id=None, comparative_experiment_id=None, feedback_group_id=None, extra=None)

In [17]:
run_id = str(uuid.uuid4())
rag_traceble(
    "Where Kapitalbank is located?",
    langsmith_extra={"run_id": run_id, "metadata": {"user_id": "alex_user_id"}}
)

'Kapital Bank has the largest service network in Azerbaijan, with 119 branches and 52 departments all over the country.'

## Adding a tool, maximize tracebility

In [18]:
import uuid, json
from openai import OpenAI
from langsmith.wrappers import wrap_openai
from langsmith import traceable
from langchain_community.tools import DuckDuckGoSearchRun

openai_client = wrap_openai(OpenAI())
ddg = DuckDuckGoSearchRun()

# -------- traced retriever ----------
@traceable(name="retriever", run_type="retriever", tags=["rag"])
def retriever(query: str, k: int = 3):
    results = vs.similarity_search(query, k=k)
    return [doc.page_content for doc in results]

# -------- traced tool wrapper ----------
@traceable(name="duckduckgo_search", run_type="tool", tags=["tool","ddg"])
def duckduckgo_search_traced(query: str, tool_call_id: str | None = None) -> str:
    return ddg.run(query)

OPENAI_TOOLS = [{
    "type": "function",
    "function": {
        "name": "duckduckgo_search",
        "description": "Search the web via DuckDuckGo and return a brief textual summary of results.",
        "parameters": {
            "type": "object",
            "properties": {"query": {"type": "string"}},
            "required": ["query"],
            "additionalProperties": False
        }
    }
}]

@traceable(name="rag_traceable_search", run_type="chain", tags=["rag"])
def rag_traceable_search(question: str, **kwargs) -> str:
    docs = retriever(question)
    system_message = (
        "Use ONLY the provided context to answer. If insufficient, call duckduckgo_search.\n\n"
        "Context:\n" + "\n---\n".join(docs)
    )

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": question},
    ]

    while True:
        resp = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            tools=OPENAI_TOOLS,
            tool_choice="auto",
            # optional: make it simpler while debugging
            # parallel_tool_calls=False,
        )
        msg = resp.choices[0].message

        # --- tool call path ---
        if getattr(msg, "tool_calls", None):
            # 1) append the assistant message that *contains* the tool_calls
            messages.append({
                "role": "assistant",
                "content": msg.content or "",
                "tool_calls": [
                    {
                        "id": tc.id,
                        "type": "function",
                        "function": {
                            "name": tc.function.name,
                            "arguments": tc.function.arguments,
                        },
                    }
                    for tc in msg.tool_calls
                ],
            })

            # 2) execute each tool and append 'tool' messages that reference tool_call_id
            for tc in msg.tool_calls:
                fn_name = tc.function.name
                args = json.loads(tc.function.arguments or "{}")

                if fn_name == "duckduckgo_search":
                    q = args.get("query") or question
                    tool_output = duckduckgo_search_traced(q, tool_call_id=tc.id)
                else:
                    tool_output = f"Tool {fn_name} not implemented."

                messages.append({
                    "role": "tool",
                    "tool_call_id": tc.id,   # must match the assistant tool_call id above
                    "content": tool_output,
                })

            # 3) continue the loop to let the model use tool results
            continue

        # --- no tool calls -> final answer ---
        return msg.content or ""

In [19]:
run_id = str(uuid.uuid4())
answer = rag_traceable_search(
    "What is revenue of Kapitalbank in Azerbaijan?",
    langsmith_extra={"run_id": run_id, "metadata": {"user_id": "alex_user_id", "search_enabled": True}},
)
print(answer)

As of October 1, 2025, Kapital Bank reported a net profit of 229 million AZN for the third quarter of 2025. For the year, predictions indicated a net profit range between 180 million AZN to 190 million AZN. Thus, the revenue specifics can be inferred from these profit figures, but exact revenue numbers were not provided in the available information.
