In [4]:
import logging
import os
import sys
from collections import defaultdict
import yaml
import json
import os
import re
import logging
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from functools import partial
from rdflib import Graph, Literal, URIRef
from rdflib.namespace import RDF, RDFS
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import dycomutils as common_utils
from typing import List, Dict, Any, Optional, Set, Tuple, DefaultDict

sys.path.append("/home/desild/work/research/chatbs/v2")

from src.utils.helpers import setup_logger
from src.utils.parser import graph_query_to_sexpr, is_inv_rel, get_inv_rel, graph_query_to_sparql
from src.utils.kg import get_readable_relation, get_readable_class, get_non_literals, get_nodes_by_class, \
    get_reverse_relation, get_reverse_readable_relation, prune_graph_query, legal_class, legal_relation
from src.utils.arguments import Arguments
from src.utils.sparql import SPARQLUtil, get_freebase_label, get_freebase_literals_by_cls_rel, \
    get_freebase_entid_lbl_by_cls
from src.utils.maps import literal_map

from transformers import set_seed
from tqdm import tqdm

logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s', datefmt='%m/%d/%Y %H:%M:%S',
                    level=logging.INFO)
logger = logging.getLogger(__name__)

from src.explorer_updates import Explorer, ExecutableProgram, GraphManager, regex_add_strings

In [5]:
# --- 1. Setup & Configuration ---
ROOT_DIR = os.path.abspath("/home/desild/work/research/chatbs")
V2_DIR = os.path.join(ROOT_DIR, "v2")

# Setup basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
log = logging.getLogger(__name__)

In [6]:
# Load .env file from the specified path
# R: load_dot_env("../ChatBS-NexGen/.env")
env_path = os.path.join(V2_DIR, ".env")
log.info(f"Loading .env file from: {env_path}")
load_dotenv(env_path)

# Load YAML config
# R: config <- yaml::read_yaml(...)
config_path = os.path.join(V2_DIR, "prov.config.yaml")
log.info(f"Loading config: {config_path}")
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Load JSON metadata
# R: ttl_metadata <- readLines("QGraph_metadata.json")
metadata_path = os.path.join(V2_DIR, "data/workflow/chatbs_sample_metadata.json")
log.info(f"Loading metadata: {metadata_path}")
with open(metadata_path, 'r') as f:
    ttl_metadata = json.load(f)

11/09/2025 17:58:34 - INFO - __main__ -   Loading .env file from: /home/desild/work/research/chatbs/v2/.env
11/09/2025 17:58:34 - INFO - __main__ -   Loading config: /home/desild/work/research/chatbs/v2/prov.config.yaml
11/09/2025 17:58:34 - INFO - __main__ -   Loading metadata: /home/desild/work/research/chatbs/v2/data/workflow/chatbs_sample_metadata.json


In [7]:
graph_manager = GraphManager(config, os.path.join(V2_DIR, "data/workflow/chatbs_sample.ttl"))

11/09/2025 17:58:34 - INFO - src.explorer_updates -   Initializing GraphManager...
11/09/2025 17:58:34 - INFO - src.explorer_updates -   Graph loaded with 1053 triples.


In [None]:


# R: sparql_get_questions <- "..."
sparql_get_questions = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX ep: <http://linkedu.eu/dedalo/explanationPattern.owl#>
PREFIX eo: <https://purl.org/heals/eo#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX food: <http://purl.org/heals/food/>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX provone: <http://purl.org/provone#>
PREFIX sio:<http://semanticscience.org/resource/>

SELECT distinct ?obj ?value where {\n  ?obj provone:hasOutPort ?value  .\n}
"""

# R: question_df <- query_func(graph_func$graph, sparql_get_questions,)
question_df = graph_manager.query(sparql_get_questions)
log.info(f"Loaded {len(question_df)} questions from the graph.")
question_df

11/09/2025 18:08:49 - INFO - __main__ -   Loaded 0 questions from the graph.


Unnamed: 0,obj,value
