In [27]:
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_community.document_loaders import UnstructuredFileIOLoader
from unstructured.cleaners.core import clean_extra_whitespace
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document

from typing import Dict, List
import pprint

p = pprint.PrettyPrinter(indent=4)

import random

In [28]:
FILE_PATH: str = "pedpolicies-storm-drainage-policy.pdf"
OPENAI_API_KEY: str = "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"

questions: List[str] = [
    "What are the primary goals and objectives for stormwater and drainage management within the City of Hamilton, as outlined in the document?",
    "Can you describe the legislative framework that influences the stormwater management practices in the City of Hamilton?",
    "How does the document address the management of runoff quantity and what specific policies does it propose for flood management and erosion control?",
    "What are the guidelines mentioned for stormwater management in new developments versus existing developments?",
    "How does the City of Hamilton's Storm Drainage Policy approach the management of runoff quality?",
    "Can you explain the role and requirements of the Combined Sewer System as discussed in the document?",
    "What is the Cash-in-Lieu Policy mentioned in the document, and in what context is it applied?",
    "How does the document integrate the Planning and Design Process in stormwater management?",
    "What are the specific challenges and solutions proposed for stormwater management in the context of urban development in Hamilton?",
    "How does the document align with provincial and federal guidelines and objectives in the context of stormwater management?",
]

QUESTION: str = random.choice(questions)

def process_llm_response(llm_response: Dict[str, str | Document]):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(f"Text: {source.page_content}\n Page: {source.metadata['page_number']}")

In [29]:
with open(file=FILE_PATH, mode="rb") as f:
    loader = UnstructuredFileIOLoader(
        file=f,
        mode="elements",
        post_processors=[clean_extra_whitespace],
    )
    docs: List[Document] = loader.load()

In [30]:
vectordb = Chroma(
    persist_directory="./chroma_db",
).from_documents(
    documents=filter_complex_metadata(documents=docs),
    embedding=OpenAIEmbeddings(
        api_key=OPENAI_API_KEY,
        openai_api_type="davinci",
    ),
)

In [31]:
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(api_key=OPENAI_API_KEY),
    chain_type="stuff",
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 6}),
    return_source_documents=True,
)


In [32]:
llm_response = qa_chain.invoke(QUESTION)

In [33]:
process_llm_response(llm_response)

The document addresses the management of runoff quantity by discussing the "zero increase in peak runoff rate" policy that some municipal jurisdictions have implemented. This policy aims to control post-development runoff and prevent flooding. However, the document also acknowledges that this policy may have negative effects on watercourses, such as increased erosion.

Specific policies proposed for flood management and erosion control are not explicitly mentioned in the given context. Further information or sections of the document may provide more details on these specific policies.


Sources:
Text: GENERAL STORM DRAINAGE POLICIES....................................................................... 9 3.1 Management of Runoff Quantity............................................................................. 9 3.1.1 Flooding Management ................................................................................ 9 3.1.2 Watercourse Erosion Control...............................