In [76]:
placeholder_to_entity_dict = {
    'DIAGNOSIS': ["I10","I708","I25110"],
    'THRESHOLD': ["1.0","2.0","7.0"],
    'DRUG': "M01AE01",
    'AGE': ["16","42","66"],
    'SEVERITY': "Serious"
}

In [77]:
from itertools import product
import re
from typing import Dict, List, Tuple


def replace_placeholders(
    question_template: str,
    query_template: str,
    placeholder_to_entity_dict: Dict[str, List[str] | str],
) -> List[Tuple[str, str]]:

    # Define the regex pattern for the chosen delimiter
    question_pattern = r'\{(.*?)\}'
    query_pattern = r'§(.*?)§'

    # Extract placeholders used in question and query
    placeholders_query = set(re.findall(query_pattern, query_template))
    placeholders_question = set(re.findall(question_pattern,question_template))


    placeholders = placeholders_query  # or placeholders_question, since they are equal
    # Build replacement lists for each placeholder
    replacements = []
    keys = []
    for key in placeholders:
        value = placeholder_to_entity_dict.get(key.upper())
        if value is None:
            continue  # Skip unknown placeholders
        if isinstance(value, list):
            replacements.append(value)
        else:
            replacements.append([value])
        keys.append(key)

    # Generate all combinations
    all_combinations = list(product(*replacements))

    results = []
    for combo in all_combinations:
        question = question_template
        query = query_template
        for key, val in zip(keys, combo):
                question = re.sub(f'{{{key}}}', val,question)
                query = re.sub(f'§{key}§', val, query)
        results.append((question, query))

    return results

In [78]:
import pandas as pd

expanded_templates_df = pd.read_csv('/home/mathiasyap/Code/university/phkg/MAI_Project_PHKG/tests/expanded_templates.csv')

In [79]:
expanded_templates_df.size

240

In [80]:
further_df = expanded_templates_df
# Expand each row into multiple rows for each combination of placeholders
expanded_rows = []
for idx, row in further_df.iterrows():
    outcomes = replace_placeholders(row['question_template'], row['sparql_template'], placeholder_to_entity_dict)
    for question, sparql in outcomes:
        expanded_rows.append({'question': question, 'sparql': sparql})
        expanded_rows[-1]['question_template'] = row['question_template']
        expanded_rows[-1]['sparql_template'] = row['sparql_template']
    
# Create a new DataFrame with the expanded results
results_df = pd.DataFrame(expanded_rows)
results_df

Unnamed: 0,question,sparql,question_template,sparql_template
0,Which patients have received a diagnosis of I10?,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
1,Which patients have received a diagnosis of I708?,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
2,Which patients have received a diagnosis of I2...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
3,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
4,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
...,...,...,...,...
265,Which patients have encountered an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have encountered an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
266,Which patients have experienced an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have experienced an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
267,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...
268,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...


In [81]:
results_df.size

1080

In [82]:
results_df['sparql'][90]

'PREFIX sphn:  <https://www.biomedit.ch/rdf/sphn-schema/sphn/>\nSELECT ?patient WHERE {\n  ?presc sphn:hasSubjectPseudoIdentifier ?patient .\n  ?presc sphn:hasDrug ?drug .\n  ?drug sphn:hasCode ?code .\n  ?code sphn:hasValue "M01AE01"\n}\n'

In [83]:
from pipeline.query_engine_component import QueryExecutorStep
query_executor = QueryExecutorStep(engine_name = "milleniumDB", graph_path="rdf_400_sphn_augmented_hybrid.nt"
, construct_graph=False)

['mdb:latest']
['docker-aseprite:latest']
['ghcr.io/avantlab/avantgraph:openaire-transport']
['ghcr.io/avantlab/avantgraph:openaire-energy']
['ghcr.io/avantlab/avantgraph:openaire-neuro']
['ghcr.io/avantlab/avantgraph:openaire-cancer']
['hello-world:latest']
['ghcr.io/avantlab/avantgraph:ckg']
['ghcr.io/avantlab/avantgraph:release-2024-01-31']


In [84]:
results_df.shape

(270, 4)

In [85]:
success_count = 0
fail_count = 0

def safe_query(q):
    global success_count, fail_count
    try:
        result = query_executor.query(q, path=False)
        success_count += 1
        return result
    except Exception:
        fail_count += 1
        return None

results_df['expected_result'] = results_df['sparql'].apply(safe_query)
print(f"Succeeded: {success_count}, Failed: {fail_count}")

Succeeded: 269, Failed: 1


In [86]:
failed_queries = results_df[results_df['expected_result'].isna()]
failed_queries.to_csv("failed_queries.csv", index=False)

In [87]:
unique_templates_with_no_result = results_df[results_df['expected_result'].isna()]['sparql_template'].unique()
unique_templates_with_no_result

array(['PREFIX sphn:  <https://www.biomedit.ch/rdf/sphn-schema/sphn/>\nSELECT ?patient WHERE {\n  ?presc sphn:hasSubjectPseudoIdentifier ?patient .\n  ?presc sphn:hasDrug ?drug .\n  ?drug sphn:hasCode ?code .\n  ?code sphn:hasValue "§Drug§"\n'],
      dtype=object)

In [88]:

unique_templates_with_result = results_df[results_df['expected_result'].notna()]['sparql_template'].unique()
unique_templates_with_result

array(['PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>\nPREFIX icd: <https://www.biomedit.ch/rdf/sphn-schema/sphn/icd#>\n\nSELECT ?patient WHERE {\n  ?patient sphn:hasDiagnosis ?diag .\n  ?diag sphn:hasCode ?code .\n  ?code sphn:hasCodeValue icd:§Diagnosis§ .\n}\n',
       'PREFIX sphn:  <https://www.biomedit.ch/rdf/sphn-schema/sphn/>\nSELECT ?patient WHERE {\n  ?event sphn:hasSubjectPseudoIdentifier ?patient .\n  ?event sphn:hasLabResult ?res .\n  ?res sphn:hasQuantityValue ?val .\n  FILTER(?val > §Threshold§)\n}\n',
       'PREFIX sphn:  <https://www.biomedit.ch/rdf/sphn-schema/sphn/>\nSELECT ?patient WHERE {\n  ?presc sphn:hasSubjectPseudoIdentifier ?patient .\n  ?presc sphn:hasDrug ?drug .\n  ?drug sphn:hasCode ?code .\n  ?code sphn:hasValue "§Drug§"\n}\n',
       'PREFIX sphn: <https://www.biomedit.ch/rdf/sphn-schema/sphn/>\nPREFIX icd: <https://www.biomedit.ch/rdf/sphn-schema/sphn/icd#>\n\nSELECT (COUNT(*) AS ?numPatients) WHERE {\n  {\n    SELECT DISTINCT ?patient 

In [92]:
what = results_df[results_df['question_template'].str.contains(r'Which patients have administered', regex=True)]

In [96]:
print(what['sparql'][91])

PREFIX sphn:  <https://www.biomedit.ch/rdf/sphn-schema/sphn/>
SELECT ?patient WHERE {
  ?presc sphn:hasSubjectPseudoIdentifier ?patient .
  ?presc sphn:hasDrug ?drug .
  ?drug sphn:hasCode ?code .
  ?code sphn:hasValue "M01AE01"
}



In [98]:
results_df

Unnamed: 0,question,sparql,question_template,sparql_template,expected_result
0,Which patients have received a diagnosis of I10?,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
1,Which patients have received a diagnosis of I708?,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
2,Which patients have received a diagnosis of I2...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
3,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
4,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
...,...,...,...,...,...
265,Which patients have encountered an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have encountered an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...
266,Which patients have experienced an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have experienced an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...
267,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...
268,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...


In [97]:
filtered_results_df = results_df[results_df['expected_result'].notna()]
filtered_results_df

Unnamed: 0,question,sparql,question_template,sparql_template,expected_result
0,Which patients have received a diagnosis of I10?,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
1,Which patients have received a diagnosis of I708?,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
2,Which patients have received a diagnosis of I2...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have received a diagnosis of {D...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
3,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
4,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have been given a diagnosis of ...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': https://www.biomedit.ch/rdf/sphn-...
...,...,...,...,...,...
265,Which patients have encountered an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have encountered an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...
266,Which patients have experienced an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have experienced an adverse eve...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...
267,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...
268,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,Which patients have undergone an adverse event...,PREFIX sphn: <https://www.biomedit.ch/rdf/sphn...,[{'patient': http://kg-representation-ehr.org/...


In [99]:
filtered_results_df.to_csv('test_set.csv')