In [1]:
!pip install pypdf pandas openpyxl aixplain PyPDF2



# API Key

In [None]:
import os
os.environ["TEAM_API_KEY"] = "#"

# Web Scraper Tool

In [12]:
from aixplain.factories import ModelFactory, AgentFactory

scraper = ModelFactory.get("66f423426eb563fa213a3531")
scraper_tool = AgentFactory.create_model_tool(
    model=scraper,
    description="Extracts and reads the content of a specified website. Input: URL text. Output: scraped page content."

)

# Federal API Utility tool

In [13]:
def federal_register_lookup(query: str, limit: int = 5) -> list:
    """
    Searches the Federal Register for documents related to a specific query.

    Parameters
    ----------
    query : str
        A natural language or keyword input (e.g., "Executive Order 14067").

    limit : int
        The number of top documents to return (default: 5).

    Returns
    -------
    list
        A list of matching documents with:
        - title: Document title
        - document_type: Type of publication (e.g., Executive Order, Notice)
        - publication_date: Date of publication
        - html_url: Link to the document
        - status: Document status if available (active/repealed), else None
    """
    import requests

    base_url = "https://www.federalregister.gov/api/v1/documents.json"
    params = {
        "per_page": limit,
        "order": "relevance",
        "conditions[term]": query
    }

    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        results = response.json().get("results", [])

        return [
            {
                "title": doc.get("title"),
                "document_type": doc.get("document_type"),
                "publication_date": doc.get("publication_date"),
                "html_url": doc.get("html_url"),
                "status": doc.get("status") if "status" in doc else None
            }
            for doc in results
        ]

    except requests.RequestException as e:
        return [{"error": f"Request failed: {e}"}]

In [20]:
utility_model = ModelFactory.create_utility_model(
    name="Federal Register tool",
    code=federal_register_lookup
)

federal_register_tool = AgentFactory.create_model_tool(model=utility_model.id)

# Knowledge and indexing

In [10]:
from aixplain.factories import IndexFactory, AgentFactory
from aixplain.modules.model.record import Record
import PyPDF2
import pandas as pd
import os

knowledge_index = IndexFactory.create(
    name="Documents Knowledge Base",
    description="Stores content from PDF and CSV files"
)

In [9]:
from aixplain.factories import IndexFactory

# To delete the index
index_delete = IndexFactory.get("68baf0c76e7528eb1aa54da4")

index_delete.delete()

In [15]:
index_tool = AgentFactory.create_model_tool(knowledge_index.id)

In [16]:
import io
import hashlib

def process_uploaded_file(uploaded_file, file_type):
    """
    Extract text content from PDF / CSV / XLSX in-memory.
    Returns a tuple (text_content, meta_bytes) where meta_bytes are used for hashing doc_id.
    """
    file_type = file_type.lower()
    raw_bytes = uploaded_file.read()
    uploaded_file.seek(0)

    if file_type == "pdf":
        pdf_reader = PyPDF2.PdfReader(io.BytesIO(raw_bytes))
        pages = []
        for p in pdf_reader.pages:
            text = p.extract_text() or ""
            pages.append(text)
        text_content = "\n".join(pages)

    elif file_type == "csv":
        df = pd.read_csv(io.BytesIO(raw_bytes))
        text_content = df.to_csv(index=False)

    elif file_type == "xlsx":
        df = pd.read_excel(io.BytesIO(raw_bytes))
        text_content = df.to_csv(index=False)

    else:
        raise ValueError(f"Unsupported file_type: {file_type}")

    return text_content, raw_bytes

def chunk_text(text, chunk_size=1000, chunk_overlap=200):
    chunks = []
    step = max(1, chunk_size - chunk_overlap)
    for i in range(0, len(text), step):
        chunk = text[i:i+chunk_size]
        if chunk.strip():
            chunks.append(chunk)
    return chunks

def make_doc_id(filename: str, raw_bytes: bytes):
    h = hashlib.md5(raw_bytes).hexdigest()[:12]
    return f"{os.path.basename(filename)}:{h}"

def batched(iterable, n=200):
    for i in range(0, len(iterable), n):
        yield iterable[i:i+n]

def index_uploaded_file(uploaded_file, file_type, chunk_size=1000, chunk_overlap=200, extra_meta=None):
    """
    Index a single UploadedFile into the shared index with metadata.
    Deterministic record IDs (doc_id:chunk_index) allow updates on re-upload.
    """
    if isinstance(uploaded_file, str):
        class FileWrapper:
            def __init__(self, path):
                self.file = open(path, "rb")
                self.name = os.path.basename(path)
            def read(self):
                return self.file.read()
            def seek(self, pos):
                return self.file.seek(pos)
        uploaded_file = FileWrapper(uploaded_file)

    content, raw_bytes = process_uploaded_file(uploaded_file, file_type)
    chunks = chunk_text(content, chunk_size, chunk_overlap)

    filename = uploaded_file.name
    doc_id = make_doc_id(filename, raw_bytes)

    base_meta = {
        "doc_id": doc_id,
        "file_name": filename,
        "total_chunks": len(chunks),
    }
    if extra_meta:
        base_meta.update(extra_meta)

    records = [
        Record(
            id=f"{doc_id}:{i}",
            value=chunk,
            attributes={**base_meta, "chunk_index": i},
        )
        for i, chunk in enumerate(chunks)
    ]

    for batch in batched(records, n=200):
        knowledge_index.upsert(batch)

    return doc_id, len(records)


# Index the first knowledge for the agent

In [None]:
index_uploaded_file(
    uploaded_file="/content/Firearms Provisions in US States.csv",
    file_type="csv",
    chunk_size=200,
    chunk_overlap=50
)

In [None]:
index_uploaded_file(
    uploaded_file="/content/Specifications for Electrical Installations.pdf",
    file_type="pdf",
    chunk_size=200,
    chunk_overlap=50
)

# Create the agent

In [18]:
INSTRUCTION = """You are a helpful Government Regulations Agent Specialist that helps users query and analyze regulations.
    IMPORTANT: Always answer the questions based on your knowledge only, do not make up answers and don't add any extra information that doesn't exist in your knowledge.
    IF YOU DON'T HAVE THE KNOWLEDGE, SAY YOU DON'T KNOW IN A POLITE WAY.

    When users ask questions related to firearm regulations or policies:
    - Search the Document Knowledge for relevant information
    - Provide accurate answers based on what you find
    - Cite the source file when providing information
    - If you can't find relevant information, say so clearly
    When users provide a URL:
    - Scrape the content
    - Provide accurate answer based on what you find
    - Cite the source file when providing information
    - If you can't find relevant information, say so clearly
    When users asks a question about any federal policy:
    - Use the federal register tool
    - Provide accurate answer based on what you find
    - Cite the source file when providing information
    - If you can't find relevant information, say so clearly
    When users asks any question about any policy or law or regulations:
    - Use the knowledge index
    - Provide accurate answer based on what you find
    - Cite the source file when providing information
    - If you can't find relevant information, say so clearly
    When users asks about any question about Specifications for Electrical Installations:
    - Use the knowledge index
    - Provide accurate answer based on what you find
    - Cite the source file when providing information
    - If you can't find relevant information, say so clearly

    Always keep your answers short, concice and strait to the point.

    When users ask questions that are NOT about government policies or regulations:
    - Do NOT answer the question.
    - Politely state that you only handle government policy/regulation queries and invite the user to reframe their request to that scope.
    - Do NOT provide workarounds, general advice, or non-policy information.

    Examples (do not answer these):
    - "What’s the weather in Austin today?" → Refuse.
    - "Write a Python script to parse JSON." → Refuse.
    - "Recommend a workout plan and diet." → Refuse.
    - "Who won the game last night?" → Refuse.
    - "Fix my iPhone Bluetooth issue." → Refuse.
    - "How to make coffee?" → Refuse.
    - Any question related to sports → Refuse.

    Borderline example (redirect to policy angle):
    - User: "Can you help me buy a firearm?"
      Assistant: "I can’t assist with purchasing advice. If you’d like, I can explain applicable firearm regulations or compliance requirements in your jurisdiction."



    """

DESCRIPTION = "A Government Regulations Agent Specialist that helps users query and analyze regulations, compliance policies, and public health guidelines with sourced, grounded answers."

agent = AgentFactory.create(
    name="Government Regulations Agent",
    tools=[
        scraper_tool,
        index_tool,
        federal_register_tool
    ],
    description=DESCRIPTION,
    instructions=INSTRUCTION,
    llm="669a63646eb56306647e1091"
)



# Print for UI usage

In [19]:
print("Agent ID: ", agent.id, " Index ID: ", knowledge_index.id)

Agent ID:  68baf18ddef19d770c25f3f3  Index ID:  68baf93ad4e0b5e6e1fb7182
