## Setup
Configuring environment variables and paths

In [None]:
# Setup: Load environment and enable local imports
import sys, os
from dotenv import load_dotenv

# Add the repo root (one level above /examples or /notebooks)
repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

# Load environment variables from .env at root of the repo
dotenv_path = os.path.join(repo_root, ".env")

load_dotenv(dotenv_path)

# Importing Project Module
from chat_utils.citing import generate_cited_response

## Sample Query
A sample query, simulating retrieved chunks. These are passed to `generate_cited_response`, which performs augmented generation and generates a response with in-text citations

In [17]:
result = await generate_cited_response(
    chunks=[
        {
            "text": (
                "George currently possesses a variety of fruits in his market stall, including three bunches of grapes and a crate of strawberries. "
                "The grapes are organic, grown locally, and usually sold by the bunch. He typically sells the grapes in quantities of one or more, depending on customer demand. "
                "The strawberries, on the other hand, are imported and priced differently based on weight rather than quantity."
            ),
            "uuid": "11111111-aaaa-bbbb-cccc-000000000001",
            "render_name": "market_inventory.txt",
            "source_data": {
                "url": "https://example.com/market_inventory",
                "filename": "market_inventory.txt",
                "file_type": "txt",
                "document_uuid": "doc-001"
            }
        },
        {
            "text": (
                "Natalie recently exchanged €5.00 into U.S. dollars and received exactly $5.45 USD. "
                "She has expressed interest in buying grapes and strawberries from George. "
                "However, she also mentioned she wants to keep $1.00 in reserve to buy bread from a nearby bakery later in the day."
            ),
            "uuid": "22222222-bbbb-cccc-dddd-000000000002",
            "render_name": "wallet_and_intentions.txt",
            "source_data": {
                "url": "https://example.com/wallet",
                "filename": "wallet_and_intentions.txt",
                "file_type": "txt",
                "document_uuid": "doc-002"
            }
        },
        {
            "text": (
                "George stated that he is willing to sell grapes for fifty cents per bunch, but added that if someone buys three bunches, he will offer them for a flat $1.25. "
                "Strawberries are sold for $3.00 per crate, and George is unwilling to negotiate on that price unless multiple crates are bought. "
                "All prices are quoted in USD and must be paid in cash."
            ),
            "uuid": "33333333-cccc-dddd-eeee-000000000003",
            "render_name": "pricing_policy.txt",
            "source_data": {
                "url": "https://example.com/pricing",
                "filename": "pricing_policy.txt",
                "file_type": "txt",
                "document_uuid": "doc-003"
            }
        },
        {
            "text": (
                "A bystander once noted that George used to raise llamas on a small patch of land adjacent to his current fruit farm. "
                "While this fact has no bearing on his current business dealings, it is often brought up in conversation due to the llama’s quirky behavior."
            ),
            "uuid": "44444444-dddd-eeee-ffff-000000000004",
            "render_name": "llama_memoirs.txt",
            "source_data": {
                "url": "https://example.com/llamas",
                "filename": "llama_memoirs.txt",
                "file_type": "txt",
                "document_uuid": "doc-004"
            }
        }
    ],
    system_prompt=(
        "You are a helpful assistant. Use the provided excerpts to fully analyze and answer the user's question. "
        "You must rely only on information from the excerpts, and cite every excerpt you used using the $REF: ID$ format. "
        "If any information is irrelevant, do not cite it or mention it. Think carefully and show clear reasoning."
        "provide your answer as a \"rationale: \" which includes in-text citations, and \"final answer: \" which is a concise answer to the question. "
        "the final answer should be separated by the rationale by a paragraph break. The rationale should not include newlines."
    ),
    query="Can Natalie afford to buy all of George's grapes and also buy strawberries?"
)

In [19]:
result

'Rationale: Natalie has $5.45 USD after exchanging her euros, but she wants to keep $1.00 in reserve for bread, leaving her with $4.45 to spend on fruits \n<InTextCitation chunkId="22222222-bbbb-cccc-dddd-000000000002" renderName="wallet_and_intentions.txt" url="https%3A//example.com/wallet" filename="wallet_and_intentions.txt" file_type="txt" document_uuid="doc-002"></InTextCitation>. George has three bunches of grapes and sells them for $0.50 each, or $1.25 for all three bunches \n<InTextCitation chunkId="33333333-cccc-dddd-eeee-000000000003" renderName="pricing_policy.txt" url="https%3A//example.com/pricing" filename="pricing_policy.txt" file_type="txt" document_uuid="doc-003"></InTextCitation>. Therefore, Natalie can buy all three bunches of grapes for $1.25. George sells strawberries for $3.00 per crate \n<InTextCitation chunkId="33333333-cccc-dddd-eeee-000000000003" renderName="pricing_policy.txt" url="https%3A//example.com/pricing" filename="pricing_policy.txt" file_type="txt" d

## Rendering the HTML
The in-text-citations are injected as `<InTextCitation>'s` within the text document. This line of code does some simple string formatting to convert each in-text citation into an href.

In [27]:
from IPython.display import display, HTML
import re

def render_citation_with_clickable_custom_tags(html: str) -> str:
    # Find the position of "Final answer" (case-insensitive)
    match = re.search(r"(final answer:)", html, re.IGNORECASE)
    if match:
        start = match.end()
        html = html[:start] + '<span class="final-answer">' + html[start:] + '</span>'

    styled_html = f"""
    <style>
        .page {{
            padding: 2rem;
            font-family: Arial, sans-serif;
            line-height: 1.6;
            background-color: #fefefe;
            border: 1px solid #ddd;
            border-radius: 8px;
            max-width: 800px;
            margin: 2rem auto;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
            white-space: normal;
        }}

        intextcitation {{
            display: inline-block;
            background-color: #e0e0ff;
            color: #333;
            border: 1px solid #aaa;
            border-radius: 5px;
            padding: 2px 8px;
            margin: 0 3px;
            font-size: 0.85em;
            font-family: monospace;
            cursor: pointer;
        }}

        intextcitation:hover {{
            background-color: #d0d0f0;
        }}

        intextcitation::before {{
            content: attr(renderName);
        }}

        .final-answer {{
            color: black;
        }}
    </style>

    <div class="page">
        {html}
    </div>

    <script>
        setTimeout(() => {{
            document.querySelectorAll("intextcitation").forEach(el => {{
                el.addEventListener("click", () => {{
                    const encodedUrl = el.getAttribute("url");
                    if (encodedUrl) {{
                        const decodedUrl = decodeURIComponent(encodedUrl);
                        window.open(decodedUrl, "_blank");
                    }}
                }});
            }});
        }}, 0);
    </script>
    """
    return styled_html

# Display the result with clickable citations and final answer highlight
display(HTML(render_citation_with_clickable_custom_tags(result)))


## Integrating with GroundX

**Downloading Files**

zsh:1: command not found: pip


In [28]:
import arxiv
import os

# Define target download directory
output_dir = os.path.join("temp", "arxiv_papers")
os.makedirs(output_dir, exist_ok=True)

# List of arXiv IDs and desired filenames
papers = {
    "1706.03762": "attention_is_all_you_need.pdf",   # Attention is All You Need
    "1901.02860": "transformer_xl.pdf",              # Transformer-XL
    "2104.09864": "rope_roformer.pdf",               # RoPE (RoFormer)
}

# Download each paper
for arxiv_id, filename in papers.items():
    print(f"Downloading {filename} (arXiv:{arxiv_id})...")
    paper = next(arxiv.Search(id_list=[arxiv_id]).results())
    paper.download_pdf(dirpath=output_dir, filename=filename)

print("✅ All downloads complete.")

ModuleNotFoundError: No module named 'arxiv'