In [None]:
!pip install "langchain[google-genai]" langchain-community faiss-cpu jsonlines

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting jsonlines
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Collecting langchain-google-genai (from langchain[google-genai])
  Downloading langchain_google_genai-2.1.9-py3-none-any.whl.metadata (7.2 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai->langch

In [None]:
import os
import requests
import json
from tqdm import tqdm
from datetime import datetime

API_BASE = "https://www.courtlistener.com/api/rest/v4"
TOKEN = #os.getenv("COURTLISTENER_API_TOKEN")

headers = {"Authorization": f"Token {TOKEN}"} if TOKEN else {}

def ingest_courtlistener(max_records=1000):
    results, count = [], 0
    url = f"{API_BASE}/opinions/"

    while url and count < max_records:
        resp = requests.get(url, headers=headers, timeout=60)
        #print(resp)
        resp.raise_for_status()
        data = resp.json()
        #print(data)
        for item in data.get("results", []):
            if count >= max_records:
                break
            rec = {
                "id": f"cl:{item.get('id')}",
                "source": "courtlistener",
                "doc_type": "case",
                "text": item.get("plain_text") or item.get("html"),
                "metadata": {
                    "case_name": item.get("caseName") or item.get("case_name"),
                    "court": item.get("court", {}).get("name"),
                    "date_filed": item.get("dateFiled"),
                    "citation": item.get("citation"),
                    "docket_number": item.get("docket_number"),
                    "url": item.get("absolute_url"),
                }
            }
            results.append(rec)
            count += 1
        url = data.get("next")

    return results

if __name__ == "__main__":
  court_listner_data = ingest_courtlistener()
  print(len(court_listner_data))
  print(court_listner_data[0])
  output_file = "courtlistener_cases.jsonl"
  with open(output_file, "w", encoding="utf-8") as f:
      for case in court_listner_data:
          f.write(json.dumps(case) + "\n")

  print(f"Saved {len(court_listner_data)} cases to {output_file}")

1000
{'id': 'cl:11126067', 'source': 'courtlistener', 'doc_type': 'case', 'text': "          IN THE COURT OF CRIMINAL APPEALS\n                      OF TEXAS\n\n                            NO. PD-0522-21, 0523-24,\n                                0524-21, 0525-21\n\n\n           EX PARTE ROBBIE GAIL CHARETTE, Appellant\n\n\n On State’s Motion for Rehearing After Opinion on Appellant’s Petition\n    for Discretionary Review from the Fourteenth Court of Appeals\n                         Washington County\n\n            SCHENCK, P.J., delivered the opinion in which YEARY, KEEL,\n      FINLEY, and PARKER, JJ., joined. RICHARDSON, J., filed a concurring\n      opinion in which NEWELL, J., joined. NEWELL and MCCLURE, JJ.,\n      concurred. WALKER, J., dissented.\n\n                                   OPINION\n\n      This matter is before us on rehearing. In our original opinion, the Court agreed\n\nthat Appellant’s pretrial writ of habeas corpus was properly before us as her\n\nchallenge pre

In [None]:
import json
import os
from langchain.schema import Document

def load_cuad_json(cuad_json_path: str):
    """
    Load CUAD JSON and return a list of LangChain Documents.
    Each Document = clause text + metadata (clause_type, risk, contract_id).
    Assumes the JSON structure provided in the user's example.
    """
    documents = []

    with open(cuad_json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # Assuming the top level is a dictionary with a 'data' key containing a list of documents
    if not isinstance(data, dict) or "data" not in data or not isinstance(data["data"], list):
        raise TypeError("Unexpected JSON structure. Expected a dictionary with a 'data' key containing a list.")

    for document_data in data["data"]:
        if not isinstance(document_data, dict):
            logging.warning(f"Expected document data to be a dictionary, but got {type(document_data)}. Skipping.")
            continue

        contract_id = document_data.get("title", "Unknown")
        paragraphs = document_data.get("paragraphs", [])

        if not isinstance(paragraphs, list):
            logging.warning(f"Expected 'paragraphs' to be a list in document {contract_id}, but got {type(paragraphs)}. Skipping.")
            continue

        for paragraph in paragraphs:
            if not isinstance(paragraph, dict):
                logging.warning(f"Expected paragraph to be a dictionary in document {contract_id}, but got {type(paragraph)}. Skipping.")
                continue

            qas = paragraph.get("qas", [])

            if not isinstance(qas, list):
                logging.warning(f"Expected 'qas' to be a list in document {contract_id}, paragraph, but got {type(qas)}. Skipping.")
                continue

            for qa in qas:
                if not isinstance(qa, dict):
                    logging.warning(f"Expected qa to be a dictionary in document {contract_id}, paragraph, but got {type(qa)}. Skipping.")
                    continue

                answers = qa.get("answers", [])
                clause_type = qa.get("question", "Unknown Clause Type")

                if not isinstance(answers, list):
                     logging.warning(f"Expected 'answers' to be a list in document {contract_id}, paragraph, qa, but got {type(answers)}. Skipping.")
                     continue

                for answer in answers:
                    if not isinstance(answer, dict):
                         logging.warning(f"Expected answer to be a dictionary in document {contract_id}, paragraph, qa, but got {type(answer)}. Skipping.")
                         continue

                    clause_text = answer.get("text", "").strip()

                    if clause_text:  # only keep non-empty
                        documents.append(
                            Document(
                                page_content=clause_text,
                                metadata={
                                    "contract_id": contract_id,
                                    "clause_type": clause_type,
                                    "clause_text": clause_text,

                                }
                            )
                        )
    return documents


def save_as_jsonl(documents, output_path: str):
    import jsonlines
    with jsonlines.open(output_path, mode="w") as writer:
        for doc in documents:
            writer.write({
                "text": doc.page_content,
                "metadata": doc.metadata
            })


if __name__ == "__main__":
    CUAD_JSON_PATH = "/content/drive/MyDrive/Colab Notebooks/RAG_legal/CUAD_v1/CUAD_v1.json"
    OUTPUT_JSONL = "cuad_preprocessed.jsonl"

    if not os.path.exists(CUAD_JSON_PATH):
        raise FileNotFoundError(f"Could not find {CUAD_JSON_PATH}. Check your path!")

    docs = load_cuad_json(CUAD_JSON_PATH)
    print(f"Loaded {len(docs)} clauses from CUAD.")
    if docs:
        print(f"Sample document: {docs[0]}")
    else:
        print("No documents were loaded.")


    # Save for later reuse
    save_as_jsonl(docs, OUTPUT_JSONL)
    print(f"Saved preprocessed dataset to {OUTPUT_JSONL}.")

Loaded 13823 clauses from CUAD.
Sample document: page_content='DISTRIBUTOR AGREEMENT' metadata={'contract_id': 'LIMEENERGYCO_09_09_1999-EX-10-DISTRIBUTOR AGREEMENT', 'clause_type': 'Highlight the parts (if any) of this contract related to "Document Name" that should be reviewed by a lawyer. Details: The name of the contract', 'clause_text': 'DISTRIBUTOR AGREEMENT'}
Saved preprocessed dataset to cuad_preprocessed.jsonl.


US code data ingestion

In [None]:
import os
import glob
import json
import xml.etree.ElementTree as ET

USCODE_DIR = "/content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code"
OUTPUT_FILE = "uscode_preprocessed.jsonl"

def parse_uscode_file(file_path):
    """Parse a US Code XML file with namespaces"""
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Handle namespaces dynamically
    ns = {"uslm": root.tag.split("}")[0].strip("{")}

    title_num = root.attrib.get("num", "Unknown")
    title_name = root.attrib.get("name", f"Title {title_num}")

    docs = []
    for section in root.findall(".//uslm:section", ns):
        sec_num = section.findtext("uslm:num", default="", namespaces=ns)
        sec_heading = section.findtext("uslm:heading", default="", namespaces=ns)

        # Extract full section content (paragraphs, subparagraphs, etc.)
        content_el = section.find("uslm:content", ns)
        sec_text = ""
        if content_el is not None:
            sec_text = " ".join([t.strip() for t in content_el.itertext() if t.strip()])

        if sec_text:
            doc = {
                "title_num": title_num,
                "title_name": title_name,
                "section_num": sec_num,
                "heading": sec_heading,
                "content": sec_text
            }
            docs.append(doc)

    return docs


def ingest_uscode(data_dir, output_file):
    all_docs = []
    xml_files = glob.glob(os.path.join(data_dir, "*.xml"))

    print(f"Found {len(xml_files)} XML files in {data_dir}")

    with open(output_file, "w", encoding="utf-8") as f:
        for xml_file in xml_files:
            try:
                docs = parse_uscode_file(xml_file)
                for d in docs:
                    f.write(json.dumps(d, ensure_ascii=False) + "\n")
                all_docs.extend(docs)
                print(f"Processed {xml_file} -> {len(docs)} sections")
            except Exception as e:
                print(f"Error parsing {xml_file}: {e}")

    print(f"\nTotal documents ingested: {len(all_docs)}")
    print(f"Saved to {output_file}")
    return all_docs


if __name__ == "__main__":
    ingest_uscode(USCODE_DIR, OUTPUT_FILE)


Found 10 XML files in /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc01.xml -> 35 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc07.xml -> 873 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc05.xml -> 281 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc02.xml -> 442 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc15.xml -> 726 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc11.xml -> 64 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc17.xml -> 27 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc26.xml -> 330 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/usc29.xml -> 196 sections
✅ Processed /content/drive/MyDrive/Colab Notebooks/RAG_legal/US_Code/

Embedding and chunking and Fiass Indexing

In [None]:
import os, json, math
from typing import List, Dict
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
import numpy as np
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore

# --------- CONFIG ---------
GOOGLE_API_KEY = "api-key"
EMBED_MODEL = "models/embedding-001"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 150

#  preprocessed files
COURTLISTENER_JSONL = "courtlistener_cases.jsonl"
CUAD_JSONL          = "cuad_preprocessed.jsonl"
USCODE_JSONL        = "uscode_preprocessed.jsonl"

OUT_DIR = "indices"
os.makedirs(OUT_DIR, exist_ok=True)

# --------- HELPERS ---------
def read_jsonl(path: str) -> List[Dict]:
    rows = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            rows.append(json.loads(line))
    return rows

def to_documents(rows: List[Dict], source_name: str) -> List[Document]:
    docs = []
    for r in rows:
        # Normalize common shapes from your saved files
        if source_name == "courtlistener":
            text = r.get("text") or ""
            md = r.get("metadata", {})
            md.update({"source": "courtlistener", "id": r.get("id")})
        elif source_name == "cuad":
            text = r.get("text") or r.get("page_content") or ""
            md = r.get("metadata", {})
            md.update({"source": "cuad"})
        elif source_name == "uscode":

            text = r.get("content") or r.get("text") or ""
            md = {
                "source": "uscode",
                "title_num": r.get("title_num"),
                "title_name": r.get("title_name"),
                "section_num": r.get("section_num"),
                "heading": r.get("heading"),
            }
        else:
            text, md = "", {}

        text = " ".join(str(text).split())
        if not text:
            continue
        docs.append(Document(page_content=text, metadata=md))
    return docs

def chunk_documents(docs: List[Document]) -> List[Document]:
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, separators=["\n\n", "\n", ". ", " "]
    )
    chunks = []
    for d in docs:
        for chunk in splitter.split_text(d.page_content):
            chunks.append(Document(page_content=chunk, metadata=d.metadata))
    return chunks

def build_faiss_cosine(docs: List[Document], embedder: GoogleGenerativeAIEmbeddings) -> FAISS:
    # Embed
    texts = [d.page_content for d in docs]
    vecs = np.array(embedder.embed_documents(texts), dtype="float32")

    # L2 normalize (cosine)
    norms = np.linalg.norm(vecs, axis=1, keepdims=True) + 1e-12
    vecs = vecs / norms

    # Build FAISS IP index
    dim = vecs.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(vecs)

    #  docstore
    docstore = InMemoryDocstore()
    store = FAISS(
        embedding_function=embedder,
        index=index,
        docstore=docstore,
        index_to_docstore_id={}
    )

    for i, d in enumerate(docs):
        doc_id = str(i)
        store.docstore.add({doc_id: d})
        store.index_to_docstore_id[i] = doc_id

    return store

def save_store(store: FAISS, path: str):
    os.makedirs(path, exist_ok=True)
    store.save_local(path)

def main():
    if not GOOGLE_API_KEY:
        raise EnvironmentError("Set GOOGLE_API_KEY environment variable.")


    embedder = GoogleGenerativeAIEmbeddings(model=EMBED_MODEL, google_api_key=GOOGLE_API_KEY)


    # ---- CourtListener ----
    try:
        cl_rows = read_jsonl(COURTLISTENER_JSONL)
        cl_docs = to_documents(cl_rows, "courtlistener")
        cl_chunks = chunk_documents(cl_docs)
        cl_store = build_faiss_cosine(cl_chunks, embedder)
        save_store(cl_store, os.path.join(OUT_DIR, "courtlistener"))
        print(f"CourtListener index built with {cl_store.index.ntotal} chunks")
    except FileNotFoundError:
        print(f"CourtListener JSONL file not found: {COURTLISTENER_JSONL}. Skipping.")
    except Exception as e:
        print(f"Error building CourtListener index: {e}")


    # ---- CUAD ----
    try:
        cuad_rows = read_jsonl(CUAD_JSONL)
        cuad_docs = to_documents(cuad_rows, "cuad")
        cuad_chunks = chunk_documents(cuad_docs)
        cuad_store = build_faiss_cosine(cuad_chunks, embedder)
        save_store(cuad_store, os.path.join(OUT_DIR, "cuad"))
        print(f"CUAD index built with {cuad_store.index.ntotal} chunks")
    except FileNotFoundError:
        print(f"CUAD JSONL file not found: {CUAD_JSONL}. Skipping.")
    except Exception as e:
        print(f"Error building CUAD index: {e}")


    # ---- U.S. Code ----
    try:
        usc_rows = read_jsonl(USCODE_JSONL)
        usc_docs = to_documents(usc_rows, "uscode")
        usc_chunks = chunk_documents(usc_docs)
        usc_store = build_faiss_cosine(usc_chunks, embedder)
        save_store(usc_store, os.path.join(OUT_DIR, "uscode"))
        print(f"U.S. Code index built with {usc_store.index.ntotal} chunks")
    except FileNotFoundError:
        print(f"U.S. Code JSONL file not found: {USCODE_JSONL}. Skipping.")
    except Exception as e:
        print(f"Error building U.S. Code index: {e}")


if __name__ == "__main__":
    main()

✅ CourtListener index built with 23203 chunks
✅ CUAD index built with 14204 chunks
✅ U.S. Code index built with 5209 chunks


Retrieval and Generation

In [None]:
import os
from typing import List, Tuple, Dict
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.schema import Document
from datetime import datetime

SESSION_ID = datetime.utcnow().strftime("%Y%m%dT%H%M%S")
LOG_FILE = f"conversation_{SESSION_ID}.jsonl"

os.makedirs("logs", exist_ok=True)
LOG_FILE = os.path.join("logs", LOG_FILE)

GOOGLE_API_KEY = "api-key"
EMBED_MODEL = "models/embedding-001"
CHAT_MODEL  = "gemini-2.5-flash"

INDEX_ROOT = "indices"
K_PER_SOURCE = 6
TOPK_GLOBAL  = 8

# ---------- Loaders ----------
def load_store(subdir: str, embedder: GoogleGenerativeAIEmbeddings) -> FAISS:
    path = os.path.join(INDEX_ROOT, subdir)
    return FAISS.load_local(path, embedder, allow_dangerous_deserialization=True)

def load_all() -> Dict[str, FAISS]:
    embedder = GoogleGenerativeAIEmbeddings(model=EMBED_MODEL, google_api_key=GOOGLE_API_KEY)
    stores = {
        "courtlistener": load_store("courtlistener", embedder),
        "cuad": load_store("cuad", embedder),
        "uscode": load_store("uscode", embedder),
    }
    return stores, embedder

# ---------- Gemini-based Router ----------
def gemini_route(query: str, llm: ChatGoogleGenerativeAI) -> List[str]:
    """Use Gemini to classify query into sources. Returns list of target sources."""

    classification_prompt = f"""
    You are a router for a Legal Research Assistant.
    Decide which knowledge source(s) the question should query:
    - "courtlistener" → for precedents, case law, judicial opinions.
    - "uscode" → for statutes, laws, federal titles/sections.
    - "cuad" → for contract clauses, risks, obligations.

    The user query is:
    "{query}"

    Respond ONLY with a comma-separated list of sources (e.g., "courtlistener, uscode").
    If unsure, say "uncertain".
    """

    resp = llm.invoke(classification_prompt)
    text = resp.content.strip().lower()

    if "uncertain" in text or not text:
        return []
    sources = [s.strip() for s in text.split(",") if s.strip() in ["courtlistener", "uscode", "cuad"]]
    return sources

# ---------- Keyword Fallback Router ----------
def keyword_route(query: str) -> List[str]:
    q = query.lower()
    if any(w in q for w in ["statute", "u.s. code", "title ", "usc ", "section ", "§"]):
        return ["uscode"]
    if any(w in q for w in ["clause", "contract", "agreement", "termination", "indemnification", "confidentiality"]):
        return ["cuad"]
    if any(w in q for w in ["precedent", "case", "opinion", "appeals", "holding", "binding", "citation"]):
        return ["courtlistener"]
    return ["courtlistener", "cuad", "uscode"]

# ---------- Retrieval ----------
def normalize_scores(pairs: List[Tuple[Document, float]]) -> List[Tuple[Document, float]]:
    out = []
    for d, s in pairs:
        sim = float(max(0.0, min(1.0, s))) if s is not None else 0.0
        out.append((d, sim))
    return out

def retrieve(stores: Dict[str, FAISS], query: str, targets: List[str]) -> List[Tuple[Document, float]]:
    hits: List[Tuple[Document, float]] = []
    for name in targets:
        store = stores[name]
        pairs = store.similarity_search_with_score(query, k=K_PER_SOURCE)
        hits.extend(normalize_scores(pairs))
    hits.sort(key=lambda x: x[1], reverse=True)
    seen, merged = set(), []
    for d, s in hits:
        key = (d.page_content[:200], tuple(sorted(d.metadata.items())))
        if key in seen:
            continue
        seen.add(key)
        merged.append((d, s))
        if len(merged) >= TOPK_GLOBAL:
            break
    return merged

# ---------- Prompt ----------
ANSWER_PROMPT = """You are a Legal Research & Contract Analysis assistant.
Use ONLY the provided context to answer. Cite each point with [source] using the metadata.
If something is unclear or missing, say so.

Conversation so far (last 3 turns only):
{question}

Relevant context:
{context}

Instructions:
- Summarize clearly and conservatively.
- Distinguish statutes vs. cases vs. contract clauses.
- Include citations like: (U.S. Code Title {{title_num}} §{{section_num}})
  or (Case: {{case_name}}, Court, Date)
  or (Clause: {{clause_type}}, Contract {{contract_id}}).
- Do NOT provide legal advice; only informational summaries with sources.
"""


def make_context(chunks: List[Tuple[Document, float]]) -> str:
    lines = []
    for d, s in chunks:
        md = d.metadata or {}
        src = md.get("source", "?")
        tag = "[SOURCE]" # Default tag

        if src == "uscode":
            title_num = md.get('title_num', 'N/A')
            section_num = md.get('section_num', 'N/A')
            heading = md.get('heading', 'N/A')
            tag = f"[USC T{title_num} §{section_num} – {heading}]"
        elif src == "courtlistener":
            case_name = md.get('case_name', 'N/A')
            court = md.get('court', 'N/A')
            date_filed = md.get('date_filed', 'N/A')
            url = md.get('url', '#')
            tag = f"[CASE {case_name} | {court} | {date_filed} | {url}]"
        elif src == "cuad":
            clause_type = md.get('clause_type', 'N/A')
            contract_id = md.get('contract_id', 'N/A')
            tag = f"[CLAUSE {clause_type} | contract {contract_id}]"


        lines.append(f"{tag} (score={s:.3f})\n{d.page_content}\n")
    return "\n---\n".join(lines)


def answer(query: str, history: list = None) -> str:
    if not GOOGLE_API_KEY:
        raise EnvironmentError("Set GOOGLE_API_KEY before running.")

    stores, _ = load_all()

    llm = ChatGoogleGenerativeAI(
        model=CHAT_MODEL,
        temperature=0.2,
        google_api_key=GOOGLE_API_KEY
    )

    # ---- Hybrid Routing ----
    targets = gemini_route(query, llm)
    if targets:
        print(f"[Router] Gemini classified → {targets}")
    else:
        targets = keyword_route(query)
        print(f"[Router] Gemini uncertain → fallback to keyword router → {targets}")

    # ---- Retrieve & Build Context ----
    chunks = retrieve(stores, query, targets)
    context = make_context(chunks)

        # ---- Build conversation context (cap last 3 turns) ----
    conversation = ""
    if history:
        trimmed_history = history[-3:]  # keep only last 3 turns
        for user_msg, bot_msg in trimmed_history:
            conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    conversation += f"User: {query}\nAssistant:"


    # ---- Answer Generation ----
    prompt = ANSWER_PROMPT.format(question=conversation, context=context)
    resp = llm.invoke(prompt)
    # ---- Log full conversation ----
    log_entry = {
        "timestamp": datetime.utcnow().isoformat(),
        "query": query,
        "response": resp.content,
        "history": history if history else [],
        "retrieved_sources": targets
    }
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(json.dumps(log_entry) + "\n")

    return resp.content



if __name__ == "__main__":
    qs = [
        "What are key federal statutes on non-compete agreements?",
        "Find precedents on arbitration clauses in employment agreements.",
        "Summarize risks in a termination clause of a contract."
    ]
    for q in qs:
        print("="*80)
        print("Q:", q)
        print("-"*80)
        print(answer(q))

  SESSION_ID = datetime.utcnow().strftime("%Y%m%dT%H%M%S")


Q: What are key federal statutes on non-compete agreements?
--------------------------------------------------------------------------------
[Router] Gemini classified → ['uscode']


  "timestamp": datetime.utcnow().isoformat(),


Based on the provided context, there are no specific federal statutes directly addressing or defining "non-compete agreements."

The context mentions:
*   Provisions related to antitrust laws, stating that certain chapters do not repeal, modify, or supersede them [USC TUnknown §§ 1224] and do not convey antitrust immunity or create defenses to antitrust actions [USC TUnknown §§ 12007]. These sections define "antitrust laws" as those set forth in section 12 of title 15 [USC TUnknown §§ 12007].
*   Clarification of remedies for federal employees, former federal employees, or applicants for federal employment, stating that nothing in a particular title shall prevent them from exercising any right otherwise available under U.S. laws [USC TUnknown §“SEC. 205. – CLARIFICATION OF REMEDIES.].

However, none of these snippets explicitly define, regulate, or list key federal statutes on non-compete agreements. The information regarding antitrust laws is a general statement about their applicabil

  "timestamp": datetime.utcnow().isoformat(),


Here are precedents on arbitration clauses, drawing solely from the provided context:

**General Principles of Arbitration Agreements:**
*   Arbitration agreements are governed by the Federal Arbitration Act (FAA) (U.S. Code Title 9 §§ 1–16) [CASE /opinion/10658506/riley-v-national-railroad-passenger-corporation/].
*   The FAA reflects a liberal federal policy favoring arbitration agreements, creating a body of federal substantive law of arbitrability (Case: Moses H. Cone Mem. Hosp. v. Mercury Constr. Corp., 460 U.S. 1, 24 (1983)) [CASE /opinion/10658506/riley-v-national-railroad-passenger-corporation/].
*   Congress's intent with the FAA was to move parties to an arbitrable dispute into arbitration quickly and easily (Case: Snap-on Tools Corp. v. Mason, 18 F.3d 1261, 1263 (5th Cir. 1994)) [CASE /opinion/10658506/riley-v-national-railroad-passenger-corporation/].
*   Under the FAA, arbitration agreements are "valid, irrevocable, and enforceable, save upon such grounds as exist at law o

  "timestamp": datetime.utcnow().isoformat(),


In [None]:
import gradio as gr

async def chat_fn(message, history):
    try:

        response = answer(message)
    except Exception as e:
        response = f"⚠️ Error: {str(e)}"
    return response

with gr.Blocks() as demo:
    gr.Markdown("## ⚖️ RAG Legal Assistant Chatbot\nAsk questions about case law, statutes, or contracts.")

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(label="Enter your legal question")

    async def respond(message, chat_history):
      # chat_fn is async, so await here
      bot_msg = await chat_fn(message, chat_history)   # pass history
      chat_history.append((message, bot_msg))
      return "", chat_history


    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch()

  chatbot = gr.Chatbot(height=500)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://44fb04e7d5db6d233c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


