# ONTOLOGY EXPLORATION

In [None]:
import pathlib
import sys

NOTEBOOK_DIR = pathlib.Path.cwd()
if '_NB_SYS_PATH_ADJUSTED' not in globals():
    sys.path.insert(0, str(NOTEBOOK_DIR))
    _NB_SYS_PATH_ADJUSTED = True

import pandas as pd 
import rdflib

In [None]:
import modules.rdf_utils as rdf_utils

def load_graph(input_ttl: pathlib.Path) -> rdflib.Graph:
    graph = rdf_utils.read_graph(str(input_ttl))
    return graph


In [None]:
#  read input ontology file
INPUT_TTL = NOTEBOOK_DIR.parent / "ontologies" / "ontology_euclid_book1.ttl"
g = load_graph(INPUT_TTL)


In [None]:
import datetime

OUTPUT_PATH = NOTEBOOK_DIR / "output"
TIMESTAMP_FORMAT = "%Y%m%d-%H%M%S"

PREFIXES = """
    PREFIX core: <https://www.foom.com/core#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
"""

def run_query(g: rdflib.Graph, sparql_query: str) -> pd.DataFrame:
    results = g.query(PREFIXES + sparql_query)

    def _to_python(term):
        if term is None:
            return None
        return term.toPython() if hasattr(term, "toPython") else str(term)

    df = pd.DataFrame(results.bindings)
    for col in df.columns:
        df = df.applymap(_to_python)
    print(f"Query returned {len(df)} results.")
    return df

def df_to_csv(df: pd.DataFrame, output_path: pathlib.Path) -> None:
    timestamp = datetime.datetime.now().strftime(TIMESTAMP_FORMAT)
    output_file = f"{OUTPUT_PATH}_{timestamp}.csv"
    df.to_csv(output_path, index=False)
    print(f"Saved results to {output_file}")

def main(g: rdflib.Graph, 
         sparql_query: str, 
         write_to_csv: bool=False,
         output_path: pathlib.Path=OUTPUT_PATH) -> None:
    df = run_query(g, sparql_query)
    if write_to_csv:
        df_to_csv(df, output_path)
    return df