#. 🏦 Banking Service Agent 💵

In [1]:
%pip install -q openai qdrant-client docling fastembed jupyter-chat-widget

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.8/67.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.6/162.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m377.2/377.2 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.0/304.0 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.5/108.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m238.6/238.6 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.4/87.4 kB[0m [31m7.0 MB/s[0m e

In [3]:
from jupyter_chat_widget import ChatUI
from time import sleep

chat = ChatUI()

def answers(text):
  chat.rewrite("Thinking...")
  sleep(3)
  chat.rewrite("")
  chat.append("Hello "+ text)

chat.connect(answers)


Output()

Output()

Text(value='', description='user: ')

# Prep Functions

In [4]:
import openai
import os
from google.colab import userdata


def get_oai_client():
    """
        Creates the OpenAI client.
    """
    os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
    client = openai.OpenAI()
    messages = [{"role": "system",
                 "content": """You are a helpful banking assistant,
                  you must paraphrase the policy undertand customer questions
                   and provide easy to understand, Never can provide the explicit policy. Output in HTML."""}]
    return client, messages


def query_llm(ui, client, messages):
    """
        Query the LLM and returns a stream handle to the response.
    """
    ui.rewrite("[Generating]")
    stream = client.chat.completions.create(
        model="gpt-5.2",  # Or another suitable model
        messages=messages, # Use the message history
        stream=True, # Enable streaming
    )
    return stream


def display_response(ui, stream):
    """
        Display the LLM's response in the UI one token at a time.
    """
    ui.rewrite("")
    complete_response = ""
    for chunk in stream:
        if (content := chunk.choices[0].delta.content) and content.strip():
            complete_response += content
            ui.append(content)
    return complete_response


# Import Policy

In [5]:
import os
import requests

urls_and_filenames = [
    (
        "https://raw.githubusercontent.com/JeisonRobles/Banking-Service-Agent/main/Bank_Policy_1",
        "Bank_Policy_1.md"
    ),
]

os.makedirs("documents", exist_ok=True)

for url, filename in urls_and_filenames:
    response = requests.get(url)
    response.raise_for_status()
    with open(os.path.join("documents", filename), "wb") as f:
        f.write(response.content)

# Documents processing

In [6]:
from docling.document_converter import DocumentConverter
from docling.chunking import HybridChunker
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter
from uuid import uuid4
from qdrant_client import QdrantClient, models


def ingest_documents(ui, paths):
    print("Ingesting documents...")
    print(paths)
    # Prepare the vectordb and embedder
    vdb = QdrantClient(location=":memory:")
    dense_model = "sentence-transformers/all-MiniLM-L6-v2"
    vdb.set_model(dense_model)
    collection_name = "documents"
    vdb.create_collection(
        collection_name=collection_name,
        vectors_config=vdb.get_fastembed_vector_params(),
    )
    points = []
    for path in paths:
        print(path)

    print(f"processing path: {paths}")

    ui.rewrite(f"[Parsing {paths}...]")


    for path in paths:

        print(f"processing path: {path}")

        ui.rewrite(f"[Parsing {path}...]")

        # Parse the document with docling
        doc = DocumentConverter().convert(source=path).document

        # Chunk the document
        chunker = HybridChunker()
        chunk_iter = chunker.chunk(dl_doc=doc)

        # Enrich the chunks and build Qdrant points
        for chunk in chunk_iter:
            enriched_text = chunker.contextualize(chunk=chunk)
            meta = chunk.meta.export_json_dict()
            points.append(
                models.PointStruct(
                    id=uuid4().hex,
                    payload=meta | {"document": enriched_text},
                    vector={
                        # FastEmbed uses named vector fields derived from the model names
                        vdb.get_vector_field_name(): models.Document(text=enriched_text, model=dense_model),
                    },
                )
            )
    # Upload (embeddings happen internally because we used models.Document)
    vdb.upload_points(collection_name=collection_name, points=points, batch_size=64, wait=True)
    ui.rewrite(f"All documents were processed. I'm ready!")
    return vdb

def add_context(ui, vdb, query):
    """
        Queries the vector database for relevant context snippets
        and adds them to the LLM's context.
    """

    print("Adding context...")
    ui.rewrite("[Searching]")
    samples = vdb.query(
        collection_name="documents",
        query_text=query,
        limit=10,
    )
    ui.rewrite(f"[Found {len(samples)} relevant snippets]")
    sleep(1)
    return {
        "role": "user",
        "content": f"Relevant snippets from the document: {'\n\n'.join(s.document for s in samples)}"
    }



In [7]:
def basic_rag(paths):
    """
        Basic RAG on a set of documents.
    """
    ui = ChatUI()
    vdb = ingest_documents(ui, [paths])
    client, messages = get_oai_client()

    def _basic_rag(query):
        messages.append({"role": "user", "content": query})
        messages.append(add_context(ui, vdb, query))
        stream = query_llm(ui, client, messages)
        response = display_response(ui, stream)
        messages.append({"role": "assistant", "content": response})

    ui.connect(lambda query: _basic_rag(query))



In [8]:
basic_rag("/content/documents/Bank_Policy_1.md")

Output()

Output()

Text(value='', description='user: ')

Ingesting documents...
['/content/documents/Bank_Policy_1.md']


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.onnx:   0%|          | 0.00/90.4M [00:00<?, ?B/s]

/content/documents/Bank_Policy_1.md
processing path: ['/content/documents/Bank_Policy_1.md']
processing path: /content/documents/Bank_Policy_1.md


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Adding context...




___
# Retrieval as a tool

In [9]:
import json

def _invoke_tool(ui, client, vdb, messages, tools, first_token, stream):
    # Collect the tool's data
    tool_call = first_token.choices[0].delta.tool_calls[0]
    tool_name = tool_call.function.name
    ui.rewrite(f"Calling tool {tool_name}")

    # Collect the parameters
    args_json = ""
    for token in stream:
        if token.choices[0].delta.tool_calls:
            args_json += token.choices[0].delta.tool_calls[0].function.arguments
    tool_args = json.loads(args_json)

    # Invoke the tool
    tool_implementation = tool_implementations[tool_name]
    output = tool_implementation(ui, vdb, **tool_args)

    # Add the output to the messages
    messages.append(output)

    # call the LLM again
    return query_llm_with_tools(ui, client, vdb, messages, tools)


def query_llm_with_tools(ui, client, vdb, messages, tools):
    """
        Query the LLM and returns a stream handle to the response.
        If the LLM is trying to invoke tools, it will be done here
        in a loop until the LLM decides to reply to the user.
    """
    ui.rewrite("[Generating]")
    stream = client.chat.completions.create(
        model="gpt-5.2",  # Or another suitable model
        messages=messages, # Use the message history
        stream=True, # Enable streaming
        tools=tools
    )
    first_token = next(stream)
    if first_token.choices[0].delta.tool_calls:
        return _invoke_tool(ui, client, vdb, messages, tools, first_token, stream)
    else:
        return stream

In [10]:
tool_implementations = {
    "search": add_context
}

search = {
    "type": "function",
    "function": {
        "name": "search",
        "description": "Use this tool when the user is asking you something you don't know. You should use this tool very often and you can call it many times in a row if necessary.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description":
                        "The query to retrieve the information from the document store using pure embedding similarity search",
                },
            },
            "required": ["query"],
        },
    }
}

def retrieval_as_tool(paths):
    ui = ChatUI()
    vdb = ingest_documents(ui, paths)
    client, messages = get_oai_client()

    def _retrieval_as_tool(query):
        messages.append({"role": "user", "content": query})
        stream = query_llm_with_tools(ui, client, vdb, messages, [search])
        response = display_response(ui, stream)
        messages.append({"role": "assistant", "content": response})

    ui.connect(lambda query: _retrieval_as_tool(query))

In [11]:
retrieval_as_tool(["/content/documents/Bank_Policy_1.md"])

Output()

Output()

Text(value='', description='user: ')

Ingesting documents...
['/content/documents/Bank_Policy_1.md']
/content/documents/Bank_Policy_1.md
processing path: ['/content/documents/Bank_Policy_1.md']
processing path: /content/documents/Bank_Policy_1.md
Adding context...
