# Project compliance

In [1]:
import sys
import os

# Get the absolute path to the src directory
src_path = os.path.abspath("..")

# Add the src directory to the sys.path
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Set the __package__ attribute to simulate running as a package
__package__ = "src"

In [88]:
import requests
import json

# Definimos el endpoint
baseline_url = "http://localhost:6000"

# Vamos a generar una query al endpoint de http://localhost:6000/questions-from-standard para obtener las preguntas de un estándar en particular
rag_endpoint = "/questions-from-standard-baseline-rag"
graph_rag_free_endpoint = "/questions-from-standard-graph-rag-free-form"
graph_rag_schema_endpoint = "/questions-from-standard-graph-rag-schema-guided"

In [6]:
response = requests.get(baseline_url + graph_rag_schema_endpoint)

In [7]:
questions = json.loads(response.text)
questions = questions["questions"]
questions[0:4]

['1. Have all relevant factors been considered to address potential adverse project impacts on human health and the environment?',
 '2. Has the project characterized and estimated sources of air pollution as part of the environmental and social assessment?',
 '3. What additional strategies and measures have been adopted to avoid or minimize negative effects in areas where the project constitutes a significant source of emissions?',
 '4. Are there technically and financially feasible and cost-effective options implemented to avoid or minimize project-related air emissions during the design, construction, and operation phases?']

In [8]:
questions

['1. Have all relevant factors been considered to address potential adverse project impacts on human health and the environment?',
 '2. Has the project characterized and estimated sources of air pollution as part of the environmental and social assessment?',
 '3. What additional strategies and measures have been adopted to avoid or minimize negative effects in areas where the project constitutes a significant source of emissions?',
 '4. Are there technically and financially feasible and cost-effective options implemented to avoid or minimize project-related air emissions during the design, construction, and operation phases?',
 '5. Has a health and safety risk assessment of existing pollution been undertaken?',
 '6. Is the project in compliance with existing requirements for the management of hazardous wastes, including national legislation and applicable international conventions?',
 '7. Has the site been remediated in accordance with national law and Good International Industry Pract

# Project knowledge base

In [80]:
import requests
import json


async def get_answers_from_project(query):
    # Definimos el endpoint
    url = "http://localhost:8540/query-project"

    json_data = {
        "query": query,
    }

    # Realizamos la petición
    response = requests.post(url, json=json_data)
    return json.loads(response.text)


# Definimos el endpoint

response_json = await get_answers_from_project("Jindal?")

In [81]:
# response_json = json.loads(response.text)
import pprint

print("Response: ")
pprint.pprint(response_json["response"]["response"])
print("Source nodes: ")
pprint.pprint(response_json["response"]["source_nodes"][0]["node"]["text"])
pprint.pprint(response_json["response"]["source_nodes"][1]["node"]["text"])

Response: 
('Jindal Iron Ore (Pty) Ltd is involved in a project referred to as the Jindal '
 'MIOP (Mining and Infrastructure Optimization Project). This project aims to '
 'contribute to economic development and inclusive growth through revenue and '
 'tax generation, as well as the creation of employment opportunities. The '
 'project also aligns with various strategic frameworks and policies, such as '
 'the National Development Plan (NDP) and the Provincial Growth Development '
 'Strategy (PGDS) of KwaZulu-Natal (KZN), by supporting local economic '
 'development, social investment, and the inclusion of vulnerable groups in '
 'economic activities.')
Source nodes: 
('Jindal Iron Ore (Pty) Ltd SLR Project No: 720.10023.00001 \n'
 'Jindal MIOP EIA & EMPr   July 2023 \n'
 ' \n'
 ' \n'
 ' \n'
 ' 74  \n'
 ' \n'
 'Jindal Iron Ore Mine ESIA and EMPr - 09072023 FINAL Importantly, the NDP '
 'notes that while minerals beneficiation is a good way to increase '
 'productivity and export \n'
 

In [82]:
import pandas as pd

df = pd.DataFrame()
for question in questions:
    response_json = await get_answers_from_project(question)
    print("Question: ", question)
    print("Response: ")
    pprint.pprint(response_json["response"]["response"])
    print("Source nodes: ")
    pprint.pprint(response_json["response"]["source_nodes"][0]["node"]["text"])
    pprint.pprint(response_json["response"]["source_nodes"][1]["node"]["text"])
    print("")

    # now I want to save this into an excel file
    import pandas as pd
    import numpy as np

    df_ = pd.DataFrame(
        {
            "Question": [question],
            "Response": [response_json["response"]["response"]],
            "Source node 1": [
                response_json["response"]["source_nodes"][0]["node"]["text"]
            ],
            "Source node 2": [
                response_json["response"]["source_nodes"][1]["node"]["text"]
            ],
        }
    )

    df = pd.concat([df, df_], axis=0, ignore_index=True)

    # Now we can save the dataframe into an excel
    df.to_excel("project-compliance-validation_test.xlsx", index=False)

    # Create a Pandas Excel writer using XlsxWriter as the engine.
    # writer = pd.ExcelWriter('output.xlsx', engine='openpyxl')

Question:  1. Have all relevant factors been considered to address potential adverse project impacts on human health and the environment?
Response: 
('Yes, the project has considered various relevant factors to address '
 'potential adverse impacts on human health and the environment. These include '
 'adherence to international conventions and guidelines, compliance with '
 'national norms and standards for waste management, and the development of a '
 'Health and Safety Plan. Additionally, the project includes measures for '
 'environmental health management, stakeholder engagement, and the '
 'implementation of a grievance procedure. The planning and design phase also '
 'involves pre-construction monitoring, notification of relevant authorities, '
 'and the drafting of subsidiary plans and policies. Furthermore, the project '
 'emphasizes the importance of ecological sustainable development, '
 'biodiversity conservation, and water resource management.')
Source nodes: 
('Jindal Iro

In [83]:
def pretty_print_response_and_source_nodes(df, number_of_question):
    print("Question: ")
    pprint.pprint(df.iloc[number_of_question, :]["Question"])
    print("Response: ")
    pprint.pprint(df.iloc[number_of_question, :]["Response"])
    print("Source nodes: ")
    pprint.pprint(df.iloc[number_of_question, :]["Source node 1"])
    pprint.pprint(df.iloc[number_of_question, :]["Source node 2"])

In [84]:
# pretty_print_response_and_source_nodes(df, 15)


Question: 
('16. Are measures in place to minimize the generation of waste and to reuse, '
 'recycle, and recover waste in a manner that is safe for human health and the '
 'environment?')
Response: 
('Yes, measures are in place to minimize the generation of waste and to '
 'promote reuse, recycling, and recovery in a manner that is safe for human '
 'health and the environment. These measures include promoting a culture of '
 '"conserve, reduce, reuse & recycle," providing adequate scavenger-proof '
 'rubbish bins and waste disposal facilities, and ensuring proper storage and '
 'handling of hazardous substances. Additionally, there are guidelines for the '
 'disposal of waste from chemical toilets and the use of oil spill kits to '
 'manage spills of hydrocarbon chemicals.')
Source nodes: 
('Jindal Iron Ore (Pty) Ltd \n'
 'Jindal MIOP EIA & EMPr  SLR Project No: 720.10023.00001 \n'
 'July 2023 \n'
 ' \n'
 ' \n'
 ' 358  \n'
 ' \n'
 'Jindal Iron Ore Mine ESIA and EMPr - 09072023 FINAL 

In [85]:
# Function to replace or remove problematic Unicode characters
def remove_unicode(text):
    if isinstance(text, str):
        return text.encode("ascii", "ignore").decode("ascii")
    return text


# Function to escape special LaTeX characters
def escape_latex_special_chars(text):
    if isinstance(text, str):
        return (
            text.replace("&", "\\&")
            .replace("%", "\\%")
            .replace("$", "\\$")
            .replace("#", "\\#")
            .replace("_", "\\_")
            .replace("{", "\\{")
            .replace("}", "\\}")
            .replace("~", "\\textasciitilde{}")
            .replace("^", "\\textasciicircum{}")
        )
    return text

In [86]:
sample_df

Unnamed: 0,Question,Response
18,19. What steps are being taken to reduce or el...,Steps being taken to reduce or eliminate the u...
13,14. Are GIIP alternatives adopted for the envi...,"Yes, alternatives that align with Good Interna..."
19,"20. Are the methods, timing, and frequency of ...",The provided information does not mention spec...
12,13. Are pesticides used in public health progr...,The provided information does not address the ...
3,4. Are there technically and financially feasi...,"Yes, there are technically and financially fea..."
10,11. What measures are in place to avoid the ge...,Measures to avoid the generation of hazardous ...
23,24. Is chain of custody documentation obtained...,The provided information does not specify whet...
27,28. Are reputable and legitimate contractors b...,The provided information does not specify whet...
16,17. Does pesticide use take into account the n...,The provided information does not mention pest...
11,12. Are pesticide formulations of products bei...,The provided information does not mention pest...


In [87]:
sample_df = df.sample(30)
sample_df.drop(columns=["Source node 1", "Source node 2"], inplace=True)

sample_df = sample_df.applymap(escape_latex_special_chars)
sample_df = sample_df.applymap(remove_unicode)

latex_code = sample_df.to_latex(index=False)
print(latex_code)

\begin{tabular}{ll}
\toprule
Question & Response \\
\midrule
1. Have all relevant factors been considered to address potential adverse project impacts on human health and the environment? & Yes, the project has considered various relevant factors to address potential adverse impacts on human health and the environment. These include adherence to international conventions and guidelines, compliance with national norms and standards for waste management, and the development of a Health and Safety Plan. Additionally, the project includes measures for environmental health management, stakeholder engagement, and the implementation of a grievance procedure. The planning and design phase also involves pre-construction monitoring, notification of relevant authorities, and the drafting of subsidiary plans and policies. Furthermore, the project emphasizes the importance of ecological sustainable development, biodiversity conservation, and water resource management. \\
14. Are GIIP alternatives a

  sample_df = sample_df.applymap(escape_latex_special_chars)
  sample_df = sample_df.applymap(remove_unicode)


In [79]:
for i in range(3):
    # print("Question: ")
    print(sample_df["Question"].iloc[i])
    print("&")
    # print("Response: ")
    print(sample_df["Response"].iloc[i])
    print("&")

    # print("Source nodes: ")
    print(r"\n Source Node 1")
    print(sample_df["Source node 1"].iloc[i])
    print(r"\n Source Node 2")
    print(sample_df["Source node 2"].iloc[i])
    print("\\\\")
    print("\\hline")

19. What steps are being taken to reduce or eliminate the use of toxic or hazardous raw materials?
&
Steps being taken to reduce or eliminate the use of toxic or hazardous raw materials include:

1. Implementing clean and dirty water management systems to prevent contaminated runoff from entering the natural environment.
2. Ensuring all dirty water containment facilities remain outside of 'no-go' areas.
3. Prohibiting the dumping of waste within 'no-go' areas.
4. Administering proper storage and handling of hazardous substances for all mining activities.
5. Utilizing drip trays at fuel dispensing areas and during refueling.
6. Storing potentially hazardous materials in appropriate containment structures.
7. Conducting washing and cleaning of equipment only in designated areas far from 'no-go' areas.
8. Using drip trays beneath standing machinery if left for extended periods.
9. Prohibiting refueling or servicing of vehicles within 'no-go' areas.
10. Cleaning up spillages of fuels, oils

KeyError: 'Source node 1'

In [64]:
sample_df

Unnamed: 0,Question,Response,Source node 1,Source node 2
27,28. Are reputable and legitimate contractors b...,The provided information does not specify whet...,Engineer/ RE/ EO/ \nEnvironmental Manager Sign...,Jindal Iron Ore (Pty) Ltd \nJindal MIOP EIA \&...
24,25. Are applicable international conventions b...,"Yes, applicable international conventions are ...",Jindal Iron Ore (Pty) Ltd SLR Project No: 720....,Jindal Iron Ore (Pty) Ltd SLR Project No: 720...
12,13. Are pesticides used in public health progr...,The provided information does not address the ...,"However, the species is \nonly known from 10 ...",Jindal Iron Ore (Pty) Ltd \nJindal MIOP EIA \&...
