In [1]:
from pipeline import pipelines
from pipeline.simple_llm_query_generator import SimpleLLMQueryGenerator
from pipeline.query_engine_component import QueryExecutorStep
import os
import traceback


def run_pipelines(natural_language_question, expected_sparql):
    sparql_llm_pipeline = None
    sparql_execution_pipeline = None
    try:
        query_generator = SimpleLLMQueryGenerator(model_name="meta-llama/Llama-3.2-3B-Instruct")
        query_executor = QueryExecutorStep(
            engine_name="milleniumDB",
            graph_path="rdf_100_sphn.nt",
            verbose=True,
            query_format="sparql",
            construct_graph=False
        )

        sparql_llm_pipeline = pipelines.InitialPipeline([query_generator, query_executor])
        sparql_execution_pipeline = pipelines.InitialPipeline([query_executor])

        sparql_llm_pipeline.initialize()
        sparql_execution_pipeline.initialize()

        result_llm_without_context = sparql_llm_pipeline.run(
            natural_language_question=natural_language_question,
            sparql_is_path=False
        )
        result_llm_with_context = None
        # Here context needs to be filled in depending on the Natural Language Question
        # result_llm_with_context = sparql_llm_pipeline.run(natural_language_question=natural_language_question,
        #                                                   context="",
        #                                                   sparql_is_path=False)
        expected_sparql_result = sparql_execution_pipeline.run({"query": expected_sparql})

        return result_llm_without_context, result_llm_with_context, expected_sparql_result

    except Exception as ex:
        traceback.print_exc()

    finally:
        if sparql_llm_pipeline:
            try:
                sparql_llm_pipeline.close()
            except Exception as e:
                print(f"Error while closing sparql_llm_pipeline: {e}")
        if sparql_execution_pipeline:
            try:
                sparql_execution_pipeline.close()
            except Exception as e:
                print(f"Error while closing sparql_execution_pipeline: {e}")

    return None, None, None  # Ensures consistent return type




  from .autonotebook import tqdm as notebook_tqdm


4.52.3
4.52.3


In [2]:
import pandas as pd
import numpy as np

updated_q_sparql_pairs = pd.read_csv("../sparql_queries/updated_qq_pairs/question_query_pairs_100.csv")
# Get 100 random samples
sampled_pairs = updated_q_sparql_pairs.sample(n=10, random_state=42).reset_index(drop=True)


In [3]:
queries = sampled_pairs["SPARQL Query"].values
questions = sampled_pairs["Question"].values
result_llm_without_context_list = []
result_llm_with_context_list = []
expected_sparql_result_list = []
for expected_sparql, question in zip(queries, questions):
    print(f"Run pipeline for question: {question}")
    result_llm_without_context, result_llm_with_context, expected_sparql_result = run_pipelines(
        natural_language_question=question,
        expected_sparql=expected_sparql)
    if result_llm_without_context:
        result_llm_without_context_list.append(result_llm_without_context["result"])
    else:
        result_llm_without_context_list.append(None)

    if result_llm_with_context:
        result_llm_with_context_list.append(result_llm_with_context["result"])
    else:
        result_llm_with_context_list.append(None)

    if expected_sparql_result:
        expected_sparql_result_list.append(expected_sparql_result["result"])
    else:
        expected_sparql_result_list.append(None)

Run pipeline for question: What are the codes for the substances used in the drug prescription 17205470?
Using device: cuda
Checking available images...
['mdb:latest']
['hello-world:latest']
['lyrasis/blazegraph:2.1.5']


file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 75.15it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ``` 
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT?code
WHERE {
   ?prescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?ingredient.
   ?ingredient sphn:hasActiveIngredient?substance.
   ?substance sphn:hasCode?code.
   ?prescription sphn:hasSubjectPseudoIdentifier?prescriptionId.
   ?prescription sphn:hasSubjectPseudoIdentifier?substanceId.
    FILTER (?prescriptionId = "17205470"^^xsd:string).
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT?code
WHERE {
   ?prescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?ingredient.
   ?ingredient sphn:hasActiveIngredient?substance.
   ?substance sphn:hasCode?code.
   ?prescription sphn:hasSubjectPseudoIdentifier?prescriptionId.
   ?presc

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  6.43it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?activeIngredient.
   ?activeIngredient sphn:hasActiveIngredient?substance.
   ?substance sphn:hasCodingSystemAndVersion?code.
   ?drugPrescription sphn:hasSubjectPseudoIdentifier?subject.
   ?subject sphn:hasSubjectPseudoIdentifier?code1.
    FILTER(?code =?code1)
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?activeIngredient.
   ?activeIngredient sphn:hasActiveIngredient?substance.
   

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.40it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```sparql
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?activeIngredient
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?activeIngredient.
   ?drugPrescription sphn:id "17068689".
}
```

✅ Final SPARQL Query:
 PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?activeIngredient
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?activeIngredient.
   ?drugPrescription sphn:id "17068689".
}
✅ Step 'SimpleLLMQueryGenerator' completed in 8.14 seconds

▶ Running step: QueryExecutorStep
>>> Query being

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.40it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ``` 
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
   ?patient sphn:hasSubjectPseudoIdentifier?patientId.
    filter (?patientId = "19899327" ).
   ?test sphn:hasLabTest?labTest.
   ?labTest sphn:hasLabTest?test.
   ?test sphn:hasCodingSystemAndVersion?code.
   ?test sphn:hasSubjectPseudoIdentifier?patient.
   ?patient sphn:hasDrugPrescription?drugPrescription.
   ?drugPrescription sphn:hasCode?code.
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
   ?patient sphn:hasSubjectPseudoIdentifier?patientId.
    filter (?patientId = "19899327" ).
   ?test sphn:hasLabTest?labTest.
   ?labTe

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.44it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?ingredient
WHERE {
   ?drugPrescription sphn:hasActiveIngredient?ingredient.
   ?drugPrescription sphn:id "15065147".
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?ingredient
WHERE {
   ?drugPrescription sphn:hasActiveIngredient?ingredient.
   ?drugPrescription sphn:id "15065147".
}
✅ Step 'SimpleLLMQueryGenerator' completed in 6.67 seconds

▶ Running step: QueryExecutorStep
>>> Query being sent to MillenniumDB:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 77.66it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```sparql
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
   ?patient sphn:hasSubjectPseudoIdentifier?subjectId.
   ?patient sphn:hasSubjectPseudoIdentifier?subjectId ;
                    sphn:hasDrug?drug.
   ?drug sphn:hasCode?code ;
        sphn:hasLabTest?test.
   ?test sphn:hasLabTest?test ;
        sphn:hasSubjectPseudoIdentifier?subjectId.
   ?patient sfn:hasSubjectPseudoIdentifier "10764017".
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
   ?patient sphn:hasSubjectPseudoIdentifier?subjectId.

millenniumdb_driver.millenniumdb_error.MillenniumDBError: Query Exception: Bad query semantic: `The prefix 'sfn' is not defined`.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Johan\AppData\Local\Temp\ipykernel_24388\473334183.py", line 27, in run_pipelines
    result_llm_without_context = sparql_llm_pipeline.run(
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\pipelines.py", line 24, in run
    data = step.run(data, **kwargs)
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\query_engine_component.py", line 45, in run
    data["result"]=self.query(query, path=sparql_is_path)
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\query_engine_component.py", line 94, in query
    return self.mdb_server_query(query)
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\query_engine_component.py", line 113, in mdb_server_query
    result = sessi

Error while closing sparql_execution_pipeline: 404 Client Error for http+docker://localnpipe/v1.47/containers/aa72af862154e0d00074c73948f51ff79432b75d617101c1860702eb887c6a59?v=True&link=False&force=True: Not Found ("No such container: aa72af862154e0d00074c73948f51ff79432b75d617101c1860702eb887c6a59")
Run pipeline for question: What are the codes for the substances used in the drug prescription 17727916?
Using device: cuda
Checking available images...
['mdb:latest']
['hello-world:latest']
['lyrasis/blazegraph:2.1.5']


file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.52it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```sparql
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT?code
WHERE {
   ?prescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?substance.
   ?substance sphn:hasCode?code.
   ?prescription sphn:hasSubjectPseudoIdentifier?subject.
   ?subject sphn:hasSubjectPseudoIdentifier?code.
    FILTER(?subject = "17727916").
}
```

✅ Final SPARQL Query:
 PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT?code
WHERE {
   ?prescription sphn:hasDrug?drug.
   ?drug sphn:hasActiveIngredient?substance.
   ?substance sphn:hasCode?code.
   ?prescription sphn:hasSubje

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.09it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
 ?patient sphn:hasSubjectPseudoIdentifier?subjectPseudoIdentifier.
 ?patient sphn:hasSubjectPseudoIdentifier?test.
 ?test sphn:hasCodingSystemAndVersion?code.
 ?test sphn:hasLabTest?labTest.
 ?labTest sphn:hasDrugPrescription?drugPrescription.
 ?drugPrescription sphn:hasDrug?drug.
 ?drug spn:hasName "Drug Prescription".
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?code
WHERE {
 ?patient sphn:hasSubjectPseudoIdentifier?subjectPseudoIdentifier.
 ?patient sphn:hasSubjectPseudoIdentifier?test.
 ?test sphn:hasCodingSystemAndVersion?code.
 ?test sph

millenniumdb_driver.millenniumdb_error.MillenniumDBError: Query Exception: Bad query semantic: `The prefix 'spn' is not defined`.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Johan\AppData\Local\Temp\ipykernel_24388\473334183.py", line 27, in run_pipelines
    result_llm_without_context = sparql_llm_pipeline.run(
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\pipelines.py", line 24, in run
    data = step.run(data, **kwargs)
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\query_engine_component.py", line 45, in run
    data["result"]=self.query(query, path=sparql_is_path)
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\query_engine_component.py", line 94, in query
    return self.mdb_server_query(query)
  File "C:\Users\Johan\PycharmProjects\MAI_Project_PHKG\src\pipeline\query_engine_component.py", line 113, in mdb_server_query
    result = sessi

Error while closing sparql_execution_pipeline: 404 Client Error for http+docker://localnpipe/v1.47/containers/f56413dda7069b130d03b338b74b5aed760f32b1c83842bf49b9071870d67324?v=True&link=False&force=True: Not Found ("No such container: f56413dda7069b130d03b338b74b5aed760f32b1c83842bf49b9071870d67324")
Run pipeline for question: What are the drugs in drug prescription 16882589?
Using device: cuda
Checking available images...
['mdb:latest']
['hello-world:latest']
['lyrasis/blazegraph:2.1.5']


file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 22.35it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```sparql
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?drug
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drugPrescription sphn:drugPrescriptionID "16882589".
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?drug
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drugPrescription sphn:drugPrescriptionID "16882589".
}
✅ Step 'SimpleLLMQueryGenerator' completed in 6.72 seconds

▶ Running step: QueryExecutorStep
>>> Query being sent to MillenniumDB:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema

file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.
file:///C:/Users/Johan/PycharmProjects/MAI_Project_PHKG/data\ontology.ttl does not look like a valid URI, trying to serialize this will break.


▶ Running step: SimpleLLMQueryGenerator
Prompt: 
Task: Generate a SPARQL SELECT statement for querying a graph database.
For instance, to find all email addresses of John Doe, the following query in backticks would be suitable:
```
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?email
WHERE {
    ?person foaf:name "John Doe" .
    ?person foaf:mbox ?email .
}
```
Keep in mind that you might need several classes in order to provide the correct answer. 

Instructions:
Use only the node types and properties provided in the ontology.
Do not use any node types and properties that are not explicitly provided.
Include all necessary prefixes and relations.

The ontology is:
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sphn:hasActiveIngredient a owl:ObjectProperty ;
    rdfs:label "has Active Ingredient" ;
    rdfs:comme

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 33.18it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


📥 Raw LLM Output:
 ```sparql
PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?drug
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drugPrescription sphn:hasSubjectPseudoIdentifier "19653006".
}
```

✅ Final SPARQL Query:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT?drug
WHERE {
   ?drugPrescription sphn:hasDrug?drug.
   ?drugPrescription sphn:hasSubjectPseudoIdentifier "19653006".
}
✅ Step 'SimpleLLMQueryGenerator' completed in 6.50 seconds

▶ Running step: QueryExecutorStep
>>> Query being sent to MillenniumDB:
 PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/20

In [8]:
tp_with_context = fp_with_context = fn_with_context = 0
tp_without_context = fp_without_context = fn_without_context = 0

for result_llm_without_context, result_llm_with_context, result_exact in zip(
        result_llm_without_context_list,
        result_llm_with_context_list,
        expected_sparql_result_list):

    # Skip only if the expected result is missing
    if not result_exact:
        continue

    values_exact = set(v[0] for v in result_exact.values())

    # Without context
    if result_llm_without_context:
        values_llm_without = set(v[0] for v in result_llm_without_context.values())
    else:
        values_llm_without = set()

    tp = len(values_llm_without & values_exact)
    fp = len(values_llm_without - values_exact)
    fn = len(values_exact - values_llm_without)
    tp_without_context += tp
    fp_without_context += fp
    fn_without_context += fn

    # With context
    if result_llm_with_context:
        values_llm_with = set(v[0] for v in result_llm_with_context.values())
    else:
        values_llm_with = set()

    tp = len(values_llm_with & values_exact)
    fp = len(values_llm_with - values_exact)
    fn = len(values_exact - values_llm_with)
    tp_with_context += tp
    fp_with_context += fp
    fn_with_context += fn

# Metric functions
def precision(tp, fp):
    return tp / (tp + fp) if (tp + fp) else 0

def recall(tp, fn):
    return tp / (tp + fn) if (tp + fn) else 0

def accuracy(tp, fp, fn):
    total = tp + fp + fn
    return tp / total if total else 0

# Output
print("\n== Without Context ==")
print(f"Precision: {precision(tp_without_context, fp_without_context):.2f}")
print(f"Recall:    {recall(tp_without_context, fn_without_context):.2f}")
print(f"Accuracy:  {accuracy(tp_without_context, fp_without_context, fn_without_context):.2f}")

print("\n== With Context ==")
print(f"Precision: {precision(tp_with_context, fp_with_context):.2f}")
print(f"Recall:    {recall(tp_with_context, fn_with_context):.2f}")
print(f"Accuracy:  {accuracy(tp_with_context, fp_with_context, fn_with_context):.2f}")



== Without Context ==
Precision: 0.00
Recall:    0.00
Accuracy:  0.00

== With Context ==
Precision: 0.00
Recall:    0.00
Accuracy:  0.00
