In [9]:
import rich
import json
from itertools import count

from langchain.docstore.base import Docstore
from langchain.retrievers import ElasticSearchBM25Retriever

from guidance import Program
from guidance.llms import OpenAI
from datasets import load_dataset
from elasticsearch import Elasticsearch

In [10]:
from ipython_secrets import get_secret

OPENAI_API_KEY = get_secret("OPENAI_API_KEY")

In [11]:
data = load_dataset("xyzNLP/nza-ct-zoning-codes-text")
data = data["train"].to_pandas().drop(columns=["embeddings"])
data

Found cached dataset parquet (/Users/maxdumas/.cache/huggingface/datasets/xyzNLP___parquet/xyzNLP--nza-ct-zoning-codes-text-579051b4b18688cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100%|██████████| 2/2 [00:00<00:00, 68.97it/s]


Unnamed: 0,Town,Page,Text
0,vernon,1,
1,vernon,2,"ZONING REGULATIONS\nTown of Vernon, Connecticu..."
2,vernon,3,SECTION 1 - GENERAL\n1.1\nThe following regula...
3,vernon,4,1.3\nThe boundaries of these zones are shown o...
4,vernon,5,1.3A\nProposed changes to the Zoning Map shall...
...,...,...,...
22083,newtown,266,APPENDIX E\nDESIGN ADVISORY BOARD ORDINANCE\nB...
22084,newtown,267,APPENDIX F\nList of Permitted Uses in Commerci...
22085,newtown,268,APPENDIX G\nList of Permitted Uses in Industri...
22086,newtown,269,APPENDIX H\nARTICLE VIII - SUPPLEMENTAL REGULA...


In [12]:
class ElasticSearchDocStore(Docstore):
    def __init__(self, index_name: str, texts: list[str], reset_index=True):
        if reset_index:
            es = Elasticsearch("http://localhost:9200")  # default client
            es.indices.delete(index=index_name, ignore=[400, 404])
        
        self.retriever = ElasticSearchBM25Retriever.create(
            elasticsearch_url="http://localhost:9200",
            index_name=index_name,
            k1=2.0,
            b=0.75,
        )
        self.retriever.add_texts(texts=texts)

    def search(self, query):
        return self.retriever.get_relevant_documents(query)

In [13]:
town = "ansonia"
docstore=ElasticSearchDocStore("ansonia", data.query("Town == @town")["Text"].to_list())



In [14]:
docstore.search("AAA Residence, minimum lot size")

[Document(page_content='SCHEDULE D - MAXIMUM DWELLING DENSITY\nEffective: April 25, 2003.\n52\nCELL (1, 1): \nZoning District\nCELL (1, 2): \nMinimum Lot Size (In\nSquare Feet)\nCELL (1, 3): \nMax. Residential Density (Dwelling Units\nPer Acre)\nCELL (2, 1): \nAAA\nCELL (2, 2): \n60,000\nCELL (2, 3): \n0.59\nCELL (3, 1): \nAA\nCELL (3, 2): \n30,000\nCELL (3, 3): \n1.00\nCELL (4, 1): \nA\nCELL (4, 2): \n12,500\nCELL (4, 3): \n2.40\nCELL (5, 1): \nB\nCELL (5, 2): \n7,500\nCELL (5, 3): \n4.00\n', metadata={}),
 Document(page_content='Section 720.18 - Age-Restricted Housing - Special Exception\n720.18.1 - General\nThe Commission may, of its own initiative or in response to an application hereunder,\ndesignate sites within "A" Residence zoning district as suitable for conversion to an age-\nrestricted residential community providing that:\na.\nThe site is no less than four and one-third (4.3) acres in size; and it is\nentirely location within an "A" Residence district;\nb.\nThe site is full

In [108]:
valid_next_steps = ['Action', 'Final Answer']
valid_tools = ['Search']

template = """
{{#system~}}
You are ZoningGPT, a powerful assistant who is an expert in reviewing portions of Zoning documents and answering questions about them.

You have access to the following tools:
* Search: Useful for when you need to answer questions about zoning documents. The input is a keyword search to retrieve pages containing relevant information.

To invoke one of your tools, response with something like:
{
    "next_step": "Continue",
    "rationale": "I need to find the list of all zoning districts in Ansonia, Connecticut.",
    "action": "Search",
    "action_input": "zoning district list, Ansonia, Connecticut"
}

When you are ready to answer a question, respond with something like:
{
    "next_step": "Stop",
    "rationale": "The table indicates that the minimum lot size for a single family home in the AAA Residence District is 60,000 square feet. So the answer is 60,000 square feet.",
    "action": "Answer",
    "action_input": "60000"
}

Pay close attention to your answers in the past and be sure to match their schema. You will be evaluated on the quality and correctness of your answers.
{{~/system}}

{{#block hidden=True}}
{{#user~}}
What is the minimum lot size, in square feet, for a single family home in the first listed residential district of the town of Ansonia, Connecticut?
{{~/user}}

{{#assistant~}}
{
    "next_step": "Continue",
    "rationale": "I need to find the list of all zoning districts in Ansonia, Connecticut.",
    "action": "Search",
    "action_input": "zoning district list, Ansonia, Connecticut"
}
{{~/assistant}}

{{#user~}}
'ARTICLE II - ESTALISHMENT OF DISTRICTS AND ZONING\nADMINISTRATION\nSection 205 Zoning Districts Established\nFor the purposes of these regulations, the City of Ansonia is hereby divided into the following\nclasses of districts:\nSection 210 Zoning Map\nThe boundaries of the districts specified in Section 205 are hereby established as shown on a\nmap entitled "Official Zoning Map, City of Ansonia", dated February 1990, including any\nspecial maps and boundary descriptions supplementary thereto, adopted and declared to be a part\nof this Ordinance. In addition, the boundaries of the Flood Plain District are as delineated in\nSection 220 - Flood Plain District.\nSection 215 - Determination of District Boundaries\nIn determining the boundaries of districts shown on the Official Zoning Map, the following rules\nshall apply\n215.1 Unless otherwise shown, the district boundaries shall be construed to coincide with\nthe center lines of streets, alleys, parkways, waterways and main tracks of railroads. -\n215.2\nWhere such boundaries are indicated as approximate following the property lines of\nparks or other publicly owned lands, such lines shall be construed to be such\n17\nCELL (1, 1): \nDistrict\nCELL (1, 2): \nMap Code\nCELL (2, 1): \nAAA Residence District\nCELL (2, 2): \nAAA\nCELL (3, 1): \nAA Residence District\nCELL (3, 2): \nAA\nCELL (4, 1): \nA Residence District\nCELL (4, 2): \nA\nCELL (5, 1): \nB Residence District\nCELL (5, 2): \nB\nCELL (6, 1): \nGA Multi-Family Residence District\nCELL (6, 2): \nGA\nCELL (7, 1): \nMM Multi-Family Residence District\nCELL (7, 2): \nMM\nCELL (8, 1): \nBB Multi-Family Residence District\nCELL (8, 2): \nBB\nCELL (9, 1): \nRR Multi-Family Residence-Retail District\nCELL (9, 2): \nRR\nCELL (10, 1): \nNR Neighborhood-Retail District\nCELL (10, 2): \nNR\nCELL (11, 1): \nC Central Commercial District\nCELL (11, 2): \nC\nCELL (12, 1): \nSC Special Commercial District\nCELL (12, 2): \nSC\nCELL (13, 1): \nLI Light Industrial District\nCELL (13, 2): \nLI\nCELL (14, 1): \nHI Heavy Industrial District\nCELL (14, 2): \nHI\nCELL (15, 1): \nFlood Plain District\nCELL (15, 2): \nCELL (16, 1): \nCP Commerce Park District\nCELL (16, 2): \nCP\nCELL (17, 1): \nCCZ City Center Overlay Zone\nCELL (17, 2): \nCCOZ\n'
{{~/user}}

{{#assistant~}}
{
    "next_step": "Continue",
    "rationale": "The first residential district is AAA Residence District. I need to find the minimum lot size for a single family home in this district.",
    "action": "Search",
    "action_input": "AAA Residence District, minimum lot size, single family home"
}
{{~/assistant}}

{{#user~}}
'SCHEDULE D - MAXIMUM DWELLING DENSITY\nEffective: April 25, 2003.\n52\nCELL (1, 1): \nZoning District\nCELL (1, 2): \nMinimum Lot Size (In\nSquare Feet)\nCELL (1, 3): \nMax. Residential Density (Dwelling Units\nPer Acre)\nCELL (2, 1): \nAAA\nCELL (2, 2): \n60,000\nCELL (2, 3): \n0.59\nCELL (3, 1): \nAA\nCELL (3, 2): \n30,000\nCELL (3, 3): \n1.00\nCELL (4, 1): \nA\nCELL (4, 2): \n12,500\nCELL (4, 3): \n2.40\nCELL (5, 1): \nB\nCELL (5, 2): \n7,500\nCELL (5, 3): \n4.00\n'
{{~/user}}

{{#assistant~}}
{
    "next_step": "Stop",
    "rationale": "The table indicates that the minimum lot size for a single family home in the AAA Residence District is 60,000 square feet. So the answer is 60,000 square feet.",
    "action": "Answer",
    "action_input": "60000"
}
{{~/assistant}}
{{/block}}

{{#user~}}
{{question}}
{{~/user}}

{{#geneach 'conversation' stop=False}}
{{#assistant}}{{gen 'this.response' stop='\\n'}}{{/assistant}}

{{#if (continue_iteration this.response @index)}}
{{#user~}}
{{set 'this.observation' (await 'observation')}}
{{~/user}}
{{else}}
{{break}}
{{/if}}
{{/geneach}}
"""

In [109]:
tools_dict = {
    "Search": lambda t: docstore.search(t)[0].page_content
}


class CustomAgentGuidance:
    def __init__(self, tools, num_iter=3):
        self.tools = tools
        self.num_iter = num_iter
        self.program = Program(
            template,
            await_missing=True,
            # silent=True,
            stream=False,
            caching=False,
            log=True,
            llm=OpenAI("gpt-3.5-turbo", api_key=OPENAI_API_KEY),
        )

    def do_tool(self, tool_name, actInput):
        return self.tools[tool_name](actInput)

    def continue_iteration(self, response, index):
        try:
            obj = json.loads(response)
            return obj["next_step"] != "Stop" and index < self.num_iter
        except json.decoder.JSONDecodeError:
            return False

    def __call__(self, query):
        # Create initial prompt with question, examples, and first round of thought
        
        result = self.program(question=query, continue_iteration=self.continue_iteration)
        rich.print(result.variables())
        response = json.loads(result["conversation"][-1]["response"])

        # Until we reach a final answer or we reach the max number of
        # iterations, keep reacting
        for iter in count():
            match response["next_step"]:
                case "Continue":
                    result = result(
                        observation=self.tools[response["action"]](
                            response["action_input"]
                        ),
                        continue_iteration=self.continue_iteration,
                    )
                    response = json.loads(result["conversation"][-1]["response"])
                case "Stop":
                    return response["action_input"], response["rationale"], iter
                case _:
                    raise ValueError(
                        f"Invalid intermediate answer: {response['next_step']}"
                    )


custom_agent = CustomAgentGuidance(tools_dict, num_iter=3)

In [110]:
custom_agent('What is the minimum lot size, in square feet, for a building in Commercial district of the town of Bethel, Connecticut?')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)