# Experimental Study Notebook

This notebook mirrors `dev.ipynb` but exercises the refactored modules under `modules`.
Use it to benchmark the updated pipeline without touching the legacy code.

**Default selections:** history = top 10% of activation scores; co-occurrence = top 5% of pair activations.
**Default history weights:** direct = 6/9, hierarchical = 1/9, mereological = 2/9.


In [None]:
from __future__ import annotations

import pathlib
import sys

NOTEBOOK_DIR = pathlib.Path.cwd()
if str(NOTEBOOK_DIR) not in sys.path:
    sys.path.append(str(NOTEBOOK_DIR))

import pandas as pd
import rdflib

from modules import potential as exp_potential
from modules import rdf_utils
from modules.query_runner import QueryRunner

print(sys.version)


In [None]:
INPUT_TTL = NOTEBOOK_DIR.parent / "ontologies" / "ontology_euclid_book1.ttl"

def load_graph(input_ttl: pathlib.Path = INPUT_TTL) -> rdflib.Graph:
    graph = rdf_utils.read_graph(str(input_ttl))
    return graph


In [None]:
from modules.surprise_score import SelectionCriteria, SelectionMode

DEFAULT_HISTORY_SELECTION = SelectionCriteria(SelectionMode.TOP_FRACTION, 1 / 10)
DEFAULT_COOCCURRENCE_SELECTION = SelectionCriteria(SelectionMode.TOP_FRACTION, 1 / 20)
DEFAULT_HISTORY_WEIGHTS = (6 / 9, 1 / 9, 2 / 9)

def analyse(
    input_ttl: pathlib.Path = INPUT_TTL,
    upper_proposition_number: int = 10,
    history_weights: tuple[float, float, float] = DEFAULT_HISTORY_WEIGHTS,
    history_selection: SelectionCriteria = DEFAULT_HISTORY_SELECTION,
    cooccurrence_selection: SelectionCriteria = DEFAULT_COOCCURRENCE_SELECTION,
) -> pd.DataFrame:
    graph = load_graph(input_ttl)
    runner = QueryRunner(graph)

    analyses = []
    for proposition in range(1, upper_proposition_number + 1):
        print(f"Analysing proposition {proposition}")
        background_concepts, surprising = exp_potential.main(
            graph,
            proposition,
            history_weights=history_weights,
            history_selection=history_selection,
            cooccurrence_selection=cooccurrence_selection,
        )

        analyses.append({
            "proposition": proposition,
            "background_concepts": " ; ".join(sorted(background_concepts)),
            "surprising_concepts": " ; ".join(sorted(surprising)),
        })
    return pd.DataFrame(analyses)


In [None]:
DEFAULT_HISTORY_SELECTION = SelectionCriteria(SelectionMode.TOP_FRACTION, 1 / 10)
DEFAULT_COOCCURRENCE_SELECTION = SelectionCriteria(SelectionMode.TOP_FRACTION, 1 / 20)
DEFAULT_HISTORY_WEIGHTS = (6 / 9, 1 / 9, 2 / 9)

history_selection = DEFAULT_HISTORY_SELECTION
cooccurrence_selection = DEFAULT_COOCCURRENCE_SELECTION
history_weights = DEFAULT_HISTORY_WEIGHTS

results_df = analyse(
    upper_proposition_number=49,
    history_selection=history_selection,
    cooccurrence_selection=cooccurrence_selection,
    history_weights=history_weights,
    verbose=False,
)
results_df
