In [2]:
!pip install -U pip
!pip install llama_index
!pip install -U transformers
!pip install panda
!pip install numpy
!pip install torch torchvision torchaudio
!pip install langchain>=0.0.152
!pip install python-dotenv



In [3]:
%reload_ext dotenv
%dotenv 

In [4]:
# Import necessary packages
import pandas as pd, numpy as np
import os, openai
from transformers import GPT2Tokenizer, GPT2LMHeadModel


os.environ['OPENAI_API_KEY'] = ""

#### Preparing the Dataset

In [5]:
df = pd.read_json("./regItems.json")
df = df.replace(to_replace="", value=np.nan).dropna(axis=0) # remove null values
df

Unnamed: 0,_id,chapter,article,title,paragraphText
0,{'$oid': '6462e3dcae035d1237b105d5'},Chapter 14. Water Permits,Article 4. Local Primacy Delegation,§ 64260. Workplans.,(a) Each local primacy agency shall develop an...
1,{'$oid': '6462e3ddae035d1237b105d6'},Chapter 14. Water Permits,Article 4. Local Primacy Delegation,§ 64259. Program Management.,(a) A local primacy agency shall establish and...
2,{'$oid': '6462e3ddae035d1237b105d7'},Chapter 14. Water Permits,Article 4. Local Primacy Delegation,§ 64258. Enforcement.,(a) A local primacy agency shall take enforcem...
7,{'$oid': '6462e3dfae035d1237b105dc'},Chapter 16. California Waterworks Standards,Article 7. Additives,"§ 64593. Use of Uncertified Chemicals, Materia...","(a) A water supplier may use a chemical, mater..."
8,{'$oid': '6462e3dfae035d1237b105dd'},Chapter 16. California Waterworks Standards,Article 7. Additives,§ 64590. Direct Additives.,"(a) Annual product testing,\n(b) Annual facili..."
...,...,...,...,...,...
725,{'$oid': '6462e4feae035d1237b108aa'},Chapter 16. California Waterworks Standards,Article 4. Materials and Installation of Water...,"§ 64576. Air-Release, Air Vacuum, and Combinat...",(a) Installed such that its vent opening is ab...
726,{'$oid': '6462e4feae035d1237b108ab'},Chapter 16. California Waterworks Standards,Article 4. Materials and Installation of Water...,§ 64575. Flushing.,(a) A flushing valve or blowoff shall be provi...
728,{'$oid': '6462e4ffae035d1237b108ad'},Chapter 16. California Waterworks Standards,Article 4. Materials and Installation of Water...,§ 64572. Water Main Separation.,(a) New water mains and new supply lines shall...
729,{'$oid': '6462e4ffae035d1237b108ae'},Chapter 16. California Waterworks Standards,Article 4. Materials and Installation of Water...,§ 64570. Materials and Installation.,(a) All newly installed water mains shall comp...


In [6]:
from langchain.llms.base import LLM
from llama_index import LLMPredictor, GPTVectorStoreIndex, PromptHelper, ServiceContext
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from IPython.display import Markdown
from typing import Optional, List, Mapping, Any

2023-05-19 18:14:44.170382: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 18:14:45.560943: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-05-19 18:14:45.561477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [7]:
# 2. Load the BigScience Bloomz model and tokenizer
model_name = "bigscience/bloom-560m" # "bigscience/bloomz"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, config='T5Config')

In [8]:
class CustomLLM(LLM):
    # 3. Create the pipeline for question answering
    pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        # device=0, # GPU device number
        # max_length=525,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        temperature=0.7
    )

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        prompt_length = len(prompt)
        response = self.pipeline(prompt, max_new_tokens=525)[0]["generated_text"]

        # only return newly generated tokens
        return response[prompt_length:]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"name_of_model": model_name}

    @property
    def _llm_type(self) -> str:
        return "custom"

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [9]:
# define prompt helper
# set maximum input size
max_input_size = 2048
# set number of output tokens
num_output = 525
# set maximum chunk overlap
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

In [10]:
#define our llm
llm_predictor = LLMPredictor(llm=CustomLLM())
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)

#### Create Documents for LLama-index 

In [11]:
from llama_index import download_loader, Document, GPTListIndex, SimpleWebPageReader, StorageContext, load_index_from_storage
from pathlib import Path

In [12]:
# instead of using data laoders methods, we can create documents
# for each regulation article in each chapter in llama index manually.
from json import loads, dumps
parsed = loads(df.to_json(orient="records"))

documents = []
for item in parsed:
    document = Document(item['paragraphText'], 
                        item['_id']['$oid'], 
                        extra_info={"chapter": item['chapter'], 
                                    "article": item['article'], 
                                    "title": item['title']})
    documents.append(document)

In [13]:
documents[0]

Document(text='(a) Each local primacy agency shall develop and submit to the State Board a proposed annual program workplan for the upcoming fiscal year. The local primacy agency shall submit the proposed annual workplan to the State Board no later than May 1 of each year for the fiscal year commencing July 1 of that year; except for the initial proposed annual workplan submitted in accordance with section 64252.\n(b) The workplan developed pursuant to subsection (a) shall describe the activities proposed to be performed by the local primacy agency during the forthcoming fiscal year and shall include:\n(1) The anticipated number of new small water system permits to be issued and the proposed number of existing community or noncommunity permits (designated by category) to be updated or amended.\n(2) A description of how the small water system inventory specified in section 64255 (a) will be maintained.\n(3) A description of how the surveillance activities specified in section 64255 (b) 

In [None]:
# no need to run this again, if you already create and save the vector index 
# in the storage folder, as it might consume your openai token to create
# vector embeddings.

# you can skip this section and next one

# using llama index vector store index
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)

In [14]:
# save the vector index in ./storage
index.storage_context.persist()

In [19]:
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)

In [20]:
%%time
query_engine = index.as_query_engine()
query_str = "if treatment facility classification is T1, what is the minimum certification of chief operator?"
response = query_engine.query(query_str)
display(Markdown(f"<b>{response}</b><br />"))

<b>
T1</b><br />

CPU times: user 235 ms, sys: 5.61 ms, total: 241 ms
Wall time: 1.74 s


- https://gpt-index.readthedocs.io/en/latest/examples/evaluation/TestNYC-Evaluation-Query.html

In [27]:
from llama_index.evaluation import QueryResponseEvaluator
from llama_index import Response
# define evaluator
evaluator = QueryResponseEvaluator(service_context=service_context)

In [28]:
# define jupyter display function
# this method displays the first resource from the list of references that vector index found
def display_eval_df(query: str, response: Response, eval_result: str) -> None:
    eval_df = pd.DataFrame(
        {
            "Query": query,
            "Response": str(response), 
            "Source": response.source_nodes[0].source_text[:1000] + "...",
            "Evaluation Result": eval_result
        },
        index=[0]
    )
    eval_df = eval_df.style.set_properties(
        **{
            'inline-size': '600px',
            'overflow-wrap': 'break-word',
        }, 
        subset=["Response", "Source"]
    )
    display(eval_df)

In [20]:
# running evaluator.evaluate might take a few minutes
# you can ignore the warning
eval_result = evaluator.evaluate(query_str, response)
display_eval_df(query_str, response, eval_result)



Unnamed: 0,Query,Response,Source,Evaluation Result
0,"if treatment facility classification is T1, what is the minimum certification of chief operator?",T1,"chapter: Chapter 13. Operator Certification article: Article 2. Operator Certification Grades title: § 63765. Water Treatment Facility Staff Certification Requirements. (a) Except as provided in (c), chief and shift operators shall possess valid operator certificates pursuant to Table 63765-A. Table 63765-A -- Minimum Certification Requirements for Chief and Shift Operators Treatment Facility Classification,Minimum Certification of Chief Operator,Minimum Certification of Shift Operator T1,T1,T1 T2,T2,T1 T3,T3,T2 T4,T4,T3 T5,T5,T3 (b) Treatment operators not designated by the water supplier as chief or shift operator pursuant to section 64413.5 shall be certified but may hold certificates of any grade. (c) Until January 1, 2003, a shift and/or chief operator may continue to be employed in that capacity provided that the operator: (1) Is in compliance with the certification requirements that were in effect on December 31, 2000, and (2) Has been in continuous employment since December...",YES


In [29]:
%%time
query_engine = index.as_query_engine()
query_str = """
When should a raw water sample be collected for a groundwater source that is treated 
    with a primary or residual disinfectant on a continuous basis and is not monitored 
    pursuant to Section 64654.8(b)(1)(B)? Select one from the following list of options:
    A) Each calendar quarter, with samples collected during the same month
    B) Each month, with samples collected during the same week
    C) Each year, with samples collected during the same season
    D) Only if the raw water sample is total coliform-positive
"""
response = query_engine.query(query_str)
display(Markdown(f"<b>{response}</b><br />"))

eval_result = evaluator.evaluate(query_str, response)
display_eval_df(query_str, response, eval_result)

<b>A) Each calendar quarter, with samples collected during the same month</b><br />



Unnamed: 0,Query,Response,Source,Evaluation Result
0,"When should a raw water sample be collected for a groundwater source that is treated with a primary or residual disinfectant on a continuous basis and is not monitored pursuant to Section 64654.8(b)(1)(B)? Select one from the following list of options:  A) Each calendar quarter, with samples collected during the same month  B) Each month, with samples collected during the same week  C) Each year, with samples collected during the same season  D) Only if the raw water sample is total coliform-positive","A) Each calendar quarter, with samples collected during the same month","chapter: Chapter 15. Domestic Water Quality and Monitoring Regulations article: Article 3. Primary Standards--Bacteriological Quality title: § 64421. General Requirements. (a) The requirements of this Article apply to public water systems. (b) In addition to the bacteriological monitoring requirements in Sections 64423, 64424, 64425, and 64426.9, a public water system shall perform special purpose bacteriological monitoring as follows: (1) After any system pressure loss to less than five psi. Samples collected shall represent the water quality in the affected portions of the system; and (2) For a groundwater (not GWUDI) source that is treated with a primary or residual disinfectant on a continuous basis and is not monitored pursuant to Section 64654.8(b)(1)(B): (A) A raw water sample shall be collected each calendar quarter, with samples collected during the same month (first, second, or third) of each calendar quarter; and (B) If the raw water sample is total coliform-positive, a raw...",YES


CPU times: user 8min 39s, sys: 14.4 s, total: 8min 54s
Wall time: 5min 16s


In [30]:
%%time
query_engine = index.as_query_engine()
query_str = """
When should a raw water sample be collected for a groundwater source that is treated 
    with a primary or residual disinfectant on a continuous basis and is not monitored 
    pursuant to Section 64654.8(b)(1)(B)? 
    A) Each calendar quarter, with samples collected during the same month
    B) Each month, with samples collected during the same week
    C) Each year, with samples collected during the same season
    D) Only if the raw water sample is total coliform-positive
"""
response = query_engine.query(query_str)
display(Markdown(f"<b>{response}</b><br />"))

eval_result = evaluator.evaluate(query_str, response)
display_eval_df(query_str, response, eval_result)

<b>
D) Only if the raw water sample is total coliform-positive</b><br />



Unnamed: 0,Query,Response,Source,Evaluation Result
0,"When should a raw water sample be collected for a groundwater source that is treated with a primary or residual disinfectant on a continuous basis and is not monitored pursuant to Section 64654.8(b)(1)(B)? A) Each calendar quarter, with samples collected during the same month  B) Each month, with samples collected during the same week  C) Each year, with samples collected during the same season  D) Only if the raw water sample is total coliform-positive",D) Only if the raw water sample is total coliform-positive,"chapter: Chapter 15. Domestic Water Quality and Monitoring Regulations article: Article 3. Primary Standards--Bacteriological Quality title: § 64421. General Requirements. (a) The requirements of this Article apply to public water systems. (b) In addition to the bacteriological monitoring requirements in Sections 64423, 64424, 64425, and 64426.9, a public water system shall perform special purpose bacteriological monitoring as follows: (1) After any system pressure loss to less than five psi. Samples collected shall represent the water quality in the affected portions of the system; and (2) For a groundwater (not GWUDI) source that is treated with a primary or residual disinfectant on a continuous basis and is not monitored pursuant to Section 64654.8(b)(1)(B): (A) A raw water sample shall be collected each calendar quarter, with samples collected during the same month (first, second, or third) of each calendar quarter; and (B) If the raw water sample is total coliform-positive, a raw...",YES


CPU times: user 8min 30s, sys: 12.6 s, total: 8min 43s
Wall time: 5min 11s


In [21]:
from typing import List
# define jupyter display function
# this method displays all context that vector index extracted/referenced and tells us whether they are 
# related to the query or not
def display_eval_sources(query: str, response: Response, eval_result: List[str]) -> None:
    
    sources = [s.node.get_text() for s in response.source_nodes]
    eval_df = pd.DataFrame(
        {
            "Source": sources,
            "Eval Result": eval_result, 
        },
    )
    eval_df.style.set_caption(query)
    eval_df = eval_df.style.set_properties(
        **{
            'inline-size': '600px',
            'overflow-wrap': 'break-word',
        }, 
        subset=["Source"]
    )
    

    display(eval_df)

In [49]:
%%time
query_engine = index.as_query_engine()
query_str = """When is a public water system in violation of the E. coli MCL? 
a) When the system has a total coliform-positive routine sample followed by a E. coli-positive repeat sample
b) When the system fails to take all required repeat samples following an E. coli-positive routine sample
c) When the system fails to test for E. coli when any repeat sample tests positive for total coliform
d) All of the above
"""
response = query_engine.query(query_str)
display(Markdown(f"<b>{response}</b><br />"))

<b>
d) All of the above</b><br />

CPU times: user 367 ms, sys: 3.91 ms, total: 371 ms
Wall time: 4.18 s


In [50]:
eval_source_result = evaluator.evaluate_source_nodes(query_str, response)



In [51]:
display_eval_sources(query_str, response, eval_source_result)

Unnamed: 0,Source,Eval Result
0,"chapter: Chapter 15. Domestic Water Quality and Monitoring Regulations article: Article 3. Primary Standards--Bacteriological Quality title: § 64426.1. E. coli Maximum Contaminant Level (MCL). (a) A public water system shall determine compliance with the (b) A public water system is in violation of the (1) The system has an (2) The system has a total coliform-positive repeat sample following an (3) The system fails to take all required repeat samples following an (4) The system fails to test for (c) If a public water system is not in compliance with subsections (b)(1) through (4), during any month in which it supplies water to the public, the system shall notify the State Board by the end of the day on which this is determined. The system shall also notify the public pursuant to Sections 64463, 64463.1, and 64465. (d) A public water system in violation of the reporting requirement in subsection (c) to notify the State Board shall notify the public pursuant to Sections 64463, 64463.7, and 64465. (e) A public water system shall not be eligible for a variance or exemption from the",YES
1,"chapter: Chapter 15. Domestic Water Quality and Monitoring Regulations article: Article 3. Primary Standards--Bacteriological Quality title: § 64425. Sample Invalidation. (a) A public water system may request the State Board to invalidate a routine or repeat sample for which a total coliform-positive result has been reported if the system demonstrates: (1) All repeat sample(s) collected at the same tap as the original total coliform-positive sample also are total coliform- positive and all repeat samples collected within five service connections of the original tap are not total coliform-positive; or (2) The laboratory did not follow the prescribed analytical methods pursuant to Section 64415(a), based on a review of laboratory documentation by the State Board. The system shall submit to the State Board a written request for invalidation along with the laboratory documentation, the system's sample collection records and any observations noted during sample collection and delivery. The system shall require the laboratory to provide the system with documentation which shall include, but not be limited to: (A) A letter from the director of the laboratory having generated the data, confirming the invalidation request by reason of laboratory accident or error; (B) Complete sample identification, laboratory sample log number (if used), date and time of collection, date and time of receipt by the laboratory, date and time of analysis for the sample(s) in question; (C) Complete description of the accident or error alleged to have invalidated the result(s); (D) Copies of all analytical, operating, and quality assurance records pertaining to the incident in question; and (E) Any observations noted by laboratory personnel when receiving and analyzing the sample(s) in question. (b) Whenever any total coliform routine or repeat sample result indicative of the absence of total coliforms has been declared invalid by the laboratory due to interference problems as specified at 40 Code Federal Regulations, Section 141.853(c)(2) (78 Fed. Reg. 10270 (February 13, 2013)), which is incorporated by reference, the public water system shall collect a replacement sample from the same location as the original sample within 24 hours of being notified of the interference problem, and have it analyzed for the presence of total coliforms. The system shall continue to re-sample at the original site within 24 hours and have the samples analyzed until a valid result is obtained. If the system is unable to collect the sample within the 24-hour time period or deliver the sample to the laboratory within 24 hours after collection because of circumstances beyond its control, the system shall notify the State Board within 24 hours. The State Board will then determine how much time the system will have to collect the replacement sample. (c) A total coliform-positive sample invalidated under this section does not count towards meeting the minimum routine and repeat sample monitoring requirements of Sections 64423 and 64424, respectively. (d) A public water system in violation of the replacement sample monitoring requirements of subsection (b) shall notify the State Board within 10 days after it learns of the violation and notify the public pursuant to Sections 64463, 64463.7, and 64465. (e) A public water system in violation of the reporting requirement in subsection (d) to notify the State Board shall notify the public pursuant to Sections 64463, 64463.7, and 64465.",YES


In [55]:
%%time
query_engine = index.as_query_engine()
response = query_engine.query("""What is the requirement for choosing sample sites for bacteriological sampling?
a) The sample sites should be located in the same pressure zone
b) The sample sites should be located in the same water source
c) The sample sites should be representative of water throughout the distribution system
d) The sample sites should be located in the same distribution reservoir
""")
display(Markdown(f"<b>{response}</b><br />"))

<b>
C) The sample sites should be representative of water throughout the distribution system.</b><br />

CPU times: user 235 ms, sys: 0 ns, total: 235 ms
Wall time: 4.72 s


#### Testing

In [21]:
queries = ["from Table 64423-A, if monthly population served is 1000, what is the range of service connections and minimum number of samples per month?", 
          "what is the maximum contaminant level of aluminum that public water system shall comply?",
          "if monthly population served is 500, what is the range of service connections and minimum number of samples per month?",
          "if monthly population served is 4000, what is the minimum number of samples per month?",
          "what will the PWS need to do if there is a violation in lead concentration?",
          "How do we determine how many samples a PWS will need to take for lead?",
          "What is the difference between a level 1 and 2 assessments?",
          "What is a level 2 assessment?",
          "What is a level 1 assessment?",
          "What is DLR?",
          "What does DLR stand for?",
          "what is the exact meaning of the water treatment facility?",
          "What are some requirements an applicant should have before taking the T2 operator exam?",
          "what does the term State Board stands for?",
          "what does awwa abbreviation mean?",
          "what is AWWA?", "what is cross-connection?"]

In [22]:
for query in queries[0: 8]:
    print(f"query: {query}")
    # for name, index in indices.items():
    #     print(f"{name}'s token usage:")
    response = query_engine.query(query)
        # print(f"{name} response:")
    display(Markdown(f"<b>{response}</b><br />"))
    print("----------------------------------------")
    print("----------------------------------------")

query: from Table 64423-A, if monthly population served is 1000, what is the range of service connections and minimum number of samples per month?


<b>
The range of service connections for a population of 1000 served is 101 to 500, and the minimum number of samples per month is 10.</b><br />

----------------------------------------
----------------------------------------
query: what is the maximum contaminant level of aluminum that public water system shall comply?


<b>
The maximum contaminant level of aluminum that public water systems shall comply is 0.2 mg/L.</b><br />

----------------------------------------
----------------------------------------
query: if monthly population served is 500, what is the range of service connections and minimum number of samples per month?


<b>
The range of service connections is 101 to 500 and the minimum number of samples per month is 5.</b><br />

----------------------------------------
----------------------------------------
query: if monthly population served is 4000, what is the minimum number of samples per month?


<b>
4</b><br />

----------------------------------------
----------------------------------------
query: what will the PWS need to do if there is a violation in lead concentration?


<b>
If there is a violation in lead concentration, the PWS will need to collect a follow-up sample at each affected site for each WQP that did not meet the Department-specified values. The criteria in paragraphs (d)(1) and (2) shall be applied to the followup sample results to determine if another excursion has occurred. The system will also need to resume standard WQP tap monitoring pursuant to section 64680 (General WQP Monitoring Requirements).</b><br />

----------------------------------------
----------------------------------------
query: How do we determine how many samples a PWS will need to take for lead?


<b>
To determine how many samples a PWS will need to take for lead, we must refer to § 64680. General WQP Monitoring Requirements. This section outlines the standard and reduced tap sampling requirements based on the system size. The number of samples required for each applicable WQP is specified in Table 64680-A.</b><br />

----------------------------------------
----------------------------------------
query: What is the difference between a level 1 and 2 assessments?


<b>
The difference between a Level 1 and Level 2 assessment is that a Level 1 assessment is conducted by the system operator or owner as soon as practical after exceeding any trigger, while a Level 2 assessment is arranged with the State Board to be conducted as soon as practical after exceeding any trigger. Additionally, a Level 1 assessment must be submitted to the State Board within 30 days, while a Level 2 assessment must be submitted to the State Board within 30 days and must include the State Board assessment.</b><br />

----------------------------------------
----------------------------------------
query: What is a level 2 assessment?


<b>
A Level 2 assessment is an assessment conducted by a public water system to identify the possible presence of sanitary defects and defects in distribution system coliform monitoring practices. It includes a review and identification of the minimum elements in subsections (a)(2)(A) through (E) and must be conducted as soon as practical after exceeding any trigger. The assessment must also describe sanitary defects detected (and if applicable, may note no sanitary defects were detected), corrective actions completed, and a proposed timetable for any corrective actions not already completed.</b><br />

----------------------------------------
----------------------------------------


### Evaluation

In [22]:
# define evaluator
evaluator = QueryResponseEvaluator(service_context=service_context)

#### set of multiple choices question

In [23]:
evaluationSet = [
    {"query":
    '''
    When should a raw water sample be collected for a groundwater source that is treated 
    with a primary or residual disinfectant on a continuous basis and is not monitored 
    pursuant to Section 64654.8(b)(1)(B)? Select one from the following list of options:
    A) Each calendar quarter, with samples collected during the same month
    B) Each month, with samples collected during the same week
    C) Each year, with samples collected during the same season
    D) Only if the raw water sample is total coliform-positive
    ''', "answer": "A"}
    ,
    {"query":
    '''
    What is the requirement for choosing sample sites for bacteriological sampling?
    a) The sample sites should be located in the same pressure zone
    b) The sample sites should be located in the same water source
    c) The sample sites should be representative of water throughout the distribution system
    d) The sample sites should be located in the same distribution reservoir
    ''', "answer": "C"}
    , 
    {"query": 
    '''
    What is the minimum number of routine bacteriological water samples required for a transient-noncommunity water system using only groundwater and serving more than 1000 persons during any month?
    a) One sample per quarter
    b) One sample per month
    c) Based on the known population served
    d) Not required to collect routine samples
    ''', "answer": "C"},
    {"query": 
     '''
    What is the deadline for reporting analytical results of all required samples collected for a public water system in a calendar month?
    a) The first day of the following month
    b) The fifth day of the following month
    c) The tenth day of the following month
    d) The fifteenth day of the following month
    ''', "answer": "C"},
    {"query": 
     '''What is the consequence for a public water system in violation of the monitoring requirement of subsection (a) to test the same sample for E. coli following a total coliform-positive routine sample?
    a) A fine will be imposed on the public water system
    b) The public water system will be shut down
    c) The public water system must notify the State Board within 10 days and notify the public
    d) The public water system must notify the State Board within 24 hours and notify the public
    ''', "answer": "C"},
    {"query": 
     '''What is the minimum number of repeat samples required for each total coliform-positive sample?
    a) One
    b) Two
    c) Three
    d) Four
    ''', "answer": "C"},
    {"query": 
     '''When collecting the repeat sample set, where should a public water system collect at least one repeat sample from?
    a) Anywhere within the distribution system
    b) At least five service connections upstream or downstream of the original site
    c) At least one mile upstream or downstream of the original site
    d) At least one repeat sample should be from upstream and one from downstream unless there is no upstream and/or downstream service connection
     ''', "answer": "D"},
    {"query": 
     ''' What should a public water system do if one or more samples in the repeat sample set is total coliform-positive?
    a) Collect and have analyzed an additional set of repeat samples
    b) Notify the State Board by the end of the day on which this is determined
    c) Repeat the process until either no total coliforms are detected in one complete repeat sample set or the system determines that a coliform treatment technique trigger has been exceeded
    d) All of the above
    ''', "answer": "D"},
    {"query": 
     '''What should a public water system do if a total coliform routine or repeat sample result indicative of the absence of total coliforms has been declared invalid by the laboratory due to interference problems?
    A) Collect a replacement sample from a different location
    B) Wait for the laboratory to provide a valid result
    C) Collect a replacement sample from the same location as the original sample within 24 hours of being notified of the interference problem
    D) Notify the State Board within 10 days after it learns of the invalidation
    ''', "answer": "C"},
    {"query": 
     '''What happens to a total coliform-positive sample invalidated under section 64425?
    A) It counts towards meeting the minimum routine and repeat sample monitoring requirements
    B) It does not count towards meeting the minimum routine and repeat sample monitoring requirements
    C) It is automatically replaced with a valid sample
    D) It is reported to the public pursuant to Sections 64463, 64463.7, and 64465
    ''', "answer": "B"},
    {"query": 
     '''What is considered a possible significant rise in bacterial count in a public water system collecting at least 40 samples per month?
    a) A total coliform-positive routine sample followed by one total coliform-positive repeat sample
    b) A total coliform-positive routine sample followed by two total coliform-positive repeat samples
    c) A total coliform-positive routine sample followed by three total coliform-positive repeat samples
    d) A total coliform-positive routine sample followed by four total coliform-positive repeat samples
    ''', "answer": "B"},
    {"query": 
     '''What should a public water system do when a significant rise in bacterial count is detected?
    a) Conduct an investigation and submit a report to the State Board within 48 hours
    b) Contact the State Board within 24 hours and conduct an investigation within 48 hours
    c) Contact the State Board by the end of the day and conduct an investigation within 24 hours
    d) Contact the State Board within 48 hours and conduct an investigation within 72 hours
    ''', "answer": "C"},
    {"query": 
     '''When is a public water system in violation of the E. coli MCL?
    a) When the system has a total coliform-positive routine sample followed by a E. coli-positive repeat sample
    b) When the system fails to take all required repeat samples following an E. coli-positive routine sample
    c) When the system fails to test for E. coli when any repeat sample tests positive for total coliform
    d) All of the above
    ''', "answer": "D"},
    {"query": 
     '''What is the coliform treatment technique violation?
    a) When a public water system fails to complete a State Board-approved start-up procedure prior to serving water to the public.
    b) When a public water system exceeds a treatment technique trigger specified in Section 64426.7 and then fails to conduct the required assessment or corrective actions within the timeframe specified in Section 64426.8.
    c) When a public water system fails to notify the State Board by the end of the next business day on which the violation is determined.
    d) When a public water system fails to notify the public pursuant to Sections 64463, 64463.4, and 64465.
    ''', "answer": "B"},
    {"query": 
     '''What is the consequence of a public water system violating the coliform treatment technique?
    a) The system will be shut down permanently.
    b) The system will be fined by the State Board.
    c) The system will be required to notify the State Board by the end of the next business day on which the violation is determined.
    d) The system will be required to notify the public pursuant to Sections 64463, 64463.4, and 64465.
    ''', "answer": "D"},
    {"query": 
     '''What is a Level 1 treatment technique trigger for coliform in California drinking water regulations?
    a) For a system taking 40 or more samples per month, the system exceeds 5.0% total coliform-positive samples for the month
    b) For a system taking fewer than 40 samples per month, the system has two or more total coliform-positive samples in the same month
    c) The system fails to take every required repeat samples after any single total coliform-positive sample
    d) The system has an E. coli MCL violation
    ''', "answer": "A"},
    {"query": 
     '''What is a Level 2 treatment technique trigger for coliform in California drinking water regulations?
    a) For a system taking 40 or more samples per month, the system exceeds 5.0% total coliform-positive samples for the month
    b) For a system taking fewer than 40 samples per month, the system has two or more total coliform-positive samples in the same month
    c) The system has an E. coli MCL violation
    d) The system has a second Level 1 treatment technique trigger, within a rolling 12-month period
    ''', "answer": "C or D"},
    {"query": 
    '''What is the purpose of a Level 1 assessment in California drinking water regulations?
    a) To identify the possible presence of sanitary defects and defects in distribution system coliform monitoring practices
    b) To comply with State Board directives
    c) To arrange with the State Board to conduct a Level 2 assessment
    d) To notify the public of violations
    ''', "answer": "A"},
    {"query": 
     ''' What is the timeframe for a public water system to submit a completed assessment to the State Board after exceeding a trigger for a Level 2 treatment technique?
    a) Within 10 days
    b) Within 20 days
    c) Within 30 days
    d) Within 40 days
    ''', "answer": "C"},
    {"query": 
     '''What is required for a seasonal system to obtain written State Board approval to serve water to the public?
    a) Completion of a State Board-approved start-up procedure
    b) Certification of compliance with the State Board-approved start-up procedure
    c) Submission of results of bacteriological and disinfectant residual monitoring
    d) All of the above
    ''', "answer": "D"}
]

#### first evaluation

In [24]:
import time
for index in range(len(evaluationSet)):
    query = evaluationSet[index]["query"]
    answer = evaluationSet[index]["answer"]
    start = time.time()
    response = query_engine.query(query)
    stop = time.time()
    duration = stop-start
    # display the expected and actual response
    print(f"no. {index+1}: \n expected {answer}, \n actual {response}, \n duration {round(duration, 2)}s\n")
    
    # display reference resources
    # eval_source_result = evaluator.evaluate_source_nodes(query, response)    
    # display_eval_sources(query_str, response, eval_source_result)

no. 1: 
 expected A, 
 actual A) Each calendar quarter, with samples collected during the same month, 
 duration 2.18s

no. 2: 
 expected C, 
 actual C) The sample sites should be representative of water throughout the distribution system, 
 duration 3.34s

no. 3: 
 expected C, 
 actual C) Based on the known population served, 
 duration 1.93s

no. 4: 
 expected C, 
 actual C) The tenth day of the following month., 
 duration 2.76s

no. 5: 
 expected C, 
 actual C) The public water system must notify the State Board within 10 days and notify the public., 
 duration 2.94s

no. 6: 
 expected C, 
 actual C) Three, 
 duration 1.48s

no. 7: 
 expected D, 
 actual Answer: d) At least one repeat sample should be from upstream and one from downstream unless there is no upstream and/or downstream service connection, 
 duration 3.94s

no. 8: 
 expected D, 
 actual d) All of the above, 
 duration 1.99s

no. 9: 
 expected C, 
 actual C) Collect a replacement sample from the same location as the or

#### second evaluation

In [89]:
import time
for index in range(len(evaluationSet)):
    query = evaluationSet[index]["query"]
    answer = evaluationSet[index]["answer"]
    start = time.time()
    response = query_engine.query(query)
    stop = time.time()
    duration = stop-start
    print(f"no. {index+1}: \n expected {answer}, \n actual {response}, \n duration {round(duration, 2)}s\n")

no. 1: 
 expected A, 
 actual A) Each calendar quarter, with samples collected during the same month, 
 duration 5.44s

no. 2: 
 expected C, 
 actual C) The sample sites should be representative of water throughout the distribution system, 
 duration 2.14s

no. 3: 
 expected C, 
 actual C) Based on the known population served, 
 duration 2.99s

no. 4: 
 expected C, 
 actual C) The tenth day of the following month., 
 duration 1.99s

no. 5: 
 expected C, 
 actual C) The public water system must notify the State Board within 10 days and notify the public., 
 duration 2.51s

no. 6: 
 expected C, 
 actual C) Three, 
 duration 1.72s

no. 7: 
 expected D, 
 actual Answer: d) At least one repeat sample should be from upstream and one from downstream unless there is no upstream and/or downstream service connection, 
 duration 3.41s

no. 8: 
 expected D, 
 actual d) All of the above, 
 duration 1.96s

no. 9: 
 expected C, 
 actual C) Collect a replacement sample from the same location as the or

#### third evaluation

In [90]:
import time
for index in range(len(evaluationSet)):
    query = evaluationSet[index]["query"]
    answer = evaluationSet[index]["answer"]
    start = time.time()
    response = query_engine.query(query)
    stop = time.time()
    duration = stop-start
    print(f"no. {index+1}: \n expected {answer}, \n actual {response}, \n duration {round(duration, 2)}s\n")

no. 1: 
 expected A, 
 actual A) Each calendar quarter, with samples collected during the same month, 
 duration 4.25s

no. 2: 
 expected C, 
 actual C) The sample sites should be representative of water throughout the distribution system, 
 duration 2.76s

no. 3: 
 expected C, 
 actual C) Based on the known population served, 
 duration 2.24s

no. 4: 
 expected C, 
 actual C) The tenth day of the following month., 
 duration 3.15s

no. 5: 
 expected C, 
 actual C) The public water system must notify the State Board within 10 days and notify the public., 
 duration 2.66s

no. 6: 
 expected C, 
 actual C) Three, 
 duration 1.6s

no. 7: 
 expected D, 
 actual Answer: d) At least one repeat sample should be from upstream and one from downstream unless there is no upstream and/or downstream service connection, 
 duration 3.7s

no. 8: 
 expected D, 
 actual d) All of the above, 
 duration 3.08s

no. 9: 
 expected C, 
 actual C) Collect a replacement sample from the same location as the orig