### Create DataSet

In [None]:
from langsmith import Client
client = Client()

In [None]:
examples = client.list_shared_examples(share_token="206cb644-3dd4-4713-878b-cd1ae980c103")

In [None]:
new_examples = set()

In [None]:
import re

def find_sparql_queries(message: str):
    return re.findall("```sparql(.*)```", message, re.DOTALL)

In [None]:
msg = "The retention times and parent masses of LCMS features annotated as Terpenoids by CANOPUS in Tabernaemontana coffeoides have been retrieved. Here is the information:\n\n- **SPARQL Query**: \n  ```sparql\n  PREFIX ns1: <https://enpkg.commons-lab.org/kg/>\n  PREFIX ns2: <https://enpkg.commons-lab.org/module/>\n\n  SELECT ?retentionTime ?parentMass\n  WHERE {\n      ?rawMaterial ns1:has_wd_id <http://www.wikidata.org/entity/Q15376858> .\n      ?rawMaterial ns1:has_lab_process ?labExtract .\n      ?labExtract ns1:has_LCMS ?analysis .\n      ?analysis ns1:has_lcms_feature_list ?featureList .\n      ?featureList ns1:has_lcms_feature ?feature .\n      ?feature ns1:has_canopus_annotation ?annotation .\n      ?annotation ns1:has_canopus_npc_pathway <https://enpkg.commons-lab.org/kg/npc_Terpenoids> .\n      ?feature ns1:has_retention_time ?retentionTime .\n      ?feature ns1:has_parent_mass ?parentMass .\n  }\n  ```\n\n- **File Path**: The results are saved in the file located at `/var/folders/20/4kgcw5656h12ss_nj18mndwm0000gn/T/kgbot/610f86c362554edfa3e22989fd52acdf/tmpgt4cvneq.csv`.\n\nPlease check the file for detailed results."

find_sparql_queries(msg)

In [None]:
from typing import List


found_sparql_queries = 0

node_inputs = []

for ex_index, ex in enumerate(examples):
    last_message_index_before_query = -1
    example_sparql_queries = 0 
    for message_index, message in enumerate(ex.outputs["__end__"]["messages"]):
        if isinstance(message["content"], str):
            sparql_queries = find_sparql_queries(message["content"])
            if len(sparql_queries) > 0:
                example_sparql_queries += len(sparql_queries)
                last_message_index_before_query = message_index
    
    if last_message_index_before_query != -1:
        found_sparql_queries += 1
        print(f"In example {ex_index} => {example_sparql_queries} have been found => last message index {last_message_index_before_query}")

        node_inputs.append({"index": ex_index, "messages":ex.outputs["__end__"]["messages"][:last_message_index_before_query]})
    
    else:
        print(f"In question `{ex.outputs["__end__"]["messages"][0]["content"]}` no sparql query was found!")
        node_inputs.append({"index":ex_index, "messages":"no_message"})


print(f"We found {found_sparql_queries} queries overall")


In [None]:
node_inputs

In [None]:
node_inputs[35]

In [None]:
node_inputs[35].update({"messages":examples[35].outputs["__end__"]["messages"][:4]})

In [None]:
from typing import List


found_sparql_queries = 0

query_outputs = []

for ex_index, ex in enumerate(examples):
    last_message_index_before_query = -1
    example_sparql_queries = 0 
    found_query = ""
    for message_index, message in enumerate(ex.outputs["__end__"]["messages"]):
        if isinstance(message["content"], str):
            sparql_queries = find_sparql_queries(message["content"])
            if len(sparql_queries) > 0:
                example_sparql_queries += len(sparql_queries)
                last_message_index_before_query = message_index
                found_query = sparql_queries[0]
    
    if last_message_index_before_query != -1:
        found_sparql_queries += 1
        print(f"In example {ex_index} => {example_sparql_queries} have been found => last message index {last_message_index_before_query}")

        query_outputs.append({"index": ex_index, "query": found_query})
    
    else:
        print(f"In question `{ex.outputs["__end__"]["messages"][0]["content"]}` no sparql query was found!")
        query_outputs.append({"index":ex_index, "query":"no_query"})


print(f"We found {found_sparql_queries} queries overall")


In [None]:
query_outputs[22]

In [None]:
query_outputs[22].update({"query": "\n PREFIX ns1: <https://enpkg.commons-lab.org/kg/>\\nPREFIX ns2: <https://enpkg.commons-lab.org/module/>\\n\\nSELECT ?InChIkey2D (COUNT(?InChIkey2D) AS ?frequency)\\nWHERE {\\n    ?rawMaterial ns1:has_wd_id <http://www.wikidata.org/entity/Q157756> .\\n    ?rawMaterial ns1:has_lab_process ?labExtract .\\n    ?labExtract ns1:has_LCMS ?analysis .\\n    ?analysis ns1:has_lcms_feature_list ?feature_list .\\n    ?feature_list ns1:has_lcms_feature ?feature .\\n    ?feature ns1:has_sirius_annotation ?annotation .\\n    ?annotation ns1:has_InChIkey2D ?InChIkey2D .\\n}\\nGROUP BY ?InChIkey2D\\nORDER BY DESC(?frequency)"})

In [None]:
node_inputs

In [None]:
for example in examples:
    new_examples.add((example.inputs["messages"][0]["content"],"supervisor"))


In [None]:
dataset_name = "KGBot Evaluation: SPARQL Generation Step"
if client.has_dataset(dataset_name=dataset_name):
    client.delete_dataset(dataset_name=dataset_name)

dataset = client.create_dataset(dataset_name=dataset_name)
client.create_examples(
    inputs = [{"messages":ex["messages"]} for ex in node_inputs],
    outputs = [{"query": ex["query"]} for ex in query_outputs],
    dataset_id=dataset.id
)

In [None]:
from rdflib.plugins.sparql.algebra import translateQuery
from rdflib.plugins.sparql.parser import parseQuery

def verify_query(query) -> bool:
    
    try:
        translateQuery(parseQuery(query))
    except Exception as e:
        return False

    return True

In [None]:
examples_dataset_v1 = client.list_examples(dataset_name="KGBot Evaluation: SPARQL Generation Step")

for ex in examples_dataset_v1:
    if not verify_query(ex.outputs["query"]):
        print(f"{ex.inputs["messages"][0]["content"]}")

# Prepare Evaluation

In [None]:
from langsmith import Client
client = Client()

In [None]:
dataset = client.read_dataset(dataset_name="KGBot Evaluation: SPARQL Generation Step")

dataset.example_count

In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join('..', '..'))  # Adjust the dots as needed
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

if os.path.exists("./app/config/logs") == False:
    os.makedirs("./app/config/logs")

In [None]:
from app.core.main_studio import app


# Target function for running the relevant step
async def run_validator_classifier(input: dict) -> dict:

    query = ""
    
    try:
        async for event in app.nodes['Sparql_query_runner'].astream_events(input,version="v1"):
            if (event["event"] == "on_tool_end" and event["name"] == "SPARQL_QUERY_RUNNER"):
                try:
                    # print(f"The Event is: | {event} |")
                    # print(f"The generated query is: | {event['data']["output"]["result"]["query"]} |" , flush=True)
                    query = event['data']["output"]["result"]["query"]
                except:
                    # print("\n\nAn error was raised\n\n")
                    pass
            # print(event)
    except:
        pass

    return {"query": query}

In [None]:
from rdflib.plugins.sparql.algebra import translateQuery
from rdflib.plugins.sparql.parser import parseQuery

def verify_query(query) -> bool:
    
    try:
        translateQuery(parseQuery(query))
    except Exception as e:
        return False

    return True

In [None]:
# Evaluator
def correct(outputs: dict, reference_outputs: dict) -> bool:
    """Check if the agent generated a correct query."""
    return verify_query(outputs["query"]) 

In [None]:
# Run evaluation
experiment_results = await client.aevaluate(
    run_validator_classifier,
    data=dataset.name,
    evaluators=[correct],
    experiment_prefix="kgbot-single-sparql-hf-llama3.3:70b",
    max_concurrency=4,
)
experiment_results.to_pandas()

# Local Testing

In [None]:
examples_it = client.list_examples(dataset_name="KGBot Evaluation: SPARQL Generation Step")
examples = []
for ex in examples_it:
    examples.append(ex)

In [None]:
examples[42].inputs

In [None]:
result = await run_validator_classifier(examples[42].inputs)

In [None]:
count = 0
for ex in examples[:3]:
    try:
        result = await run_validator_classifier(ex.inputs)
        if ["query"] != "":
            count +=1
    except:
        pass

In [None]:
count