# Typical activation potential

This notebook implements the *typical activation potential* from `main.tex`.

Notation (from `main.tex`):
- Phi_T(r, C) is the typical activation potential for resource r in context C.
- Phi_h(r, C) is the historical component of activation potential.
- Phi_Tc(r, C) is the typical co-occurrence component (Phi_{T_c}).
- delta (DELTA in code) is the weight in Phi_T = delta * Phi_h + (1 - delta) * Phi_Tc, with delta in [0, 1].

Assumptions (matching the existing SPARQL queries):
- Context C for proposition n is the prefix of definitions/postulates/common notions plus propositions/proofs 1..n-1.
- "Together" for co-occurrence means resources that co-occur within the same definition/postulate/common notion or within the same proposition/proof.
- Phi_h is computed from history queries; Phi_Tc is computed from Hebbian pair degrees (co-occurrence links); Phi_T uses the weighted sum above.
- Empty denominators yield 0 for the corresponding potential.
- TYPE_SELECTION toggles type-based co-occurrence for propositions/proofs (relation/operation types) when true.


In [None]:
from __future__ import annotations

import datetime as dt
from pathlib import Path

import pandas as pd

from modules import rdf_utils, file_utils
from modules.calculate_activation_potential import history as history_potential
from modules.calculate_activation_potential import hebb as hebb_potential
from modules.query_runner import QueryRunner


In [None]:
# Parameters (Phi_T uses DELTA = delta)
DELTA = 0.5  # delta in Phi_T = delta * Phi_h + (1 - delta) * Phi_Tc
HISTORY_WEIGHTS = (6 / 9, 1 / 9, 1 / 9, 1 / 9)  # Phi_h weights: direct, hierarchical, mereological, concept-membership
TYPE_SELECTION = False  # toggle type-based co-occurrence in propositions/proofs
START_PROPOSITION = 1
END_PROPOSITION = 48

OUTPUT_DIR = Path('output')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
# Load latest ontology TTL and reuse a cached QueryRunner
INPUT_TTL = file_utils.latest_file(folder=Path('ontologies'), filename_fragment='ontology_', extension='ttl')
graph = rdf_utils.load_graph(INPUT_TTL)
runner = QueryRunner(graph)


In [None]:
def compute_phi_tc(hebb_df: pd.DataFrame) -> pd.DataFrame:
    """Return Phi_Tc (typical co-occurrence component, Phi_{T_c}) per resource."""
    if hebb_df.empty:
        return pd.DataFrame(columns=['o', 'phi_tc'])

    degrees: dict[str, float] = {}  # aggregate Hebbian degree per resource
    for _, row in hebb_df.iterrows():
        o1 = str(row['o1'])  # first resource in the pair
        o2 = str(row['o2'])  # second resource in the pair
        weight = float(row['activation_potential'])  # normalized co-occurrence weight
        degrees[o1] = degrees.get(o1, 0.0) + weight
        degrees[o2] = degrees.get(o2, 0.0) + weight

    total_degree = sum(degrees.values())  # denominator for degree normalization
    if total_degree == 0:
        return pd.DataFrame({'o': list(degrees.keys()), 'phi_tc': [0.0] * len(degrees)})

    return pd.DataFrame({
        'o': list(degrees.keys()),
        'phi_tc': [value / total_degree for value in degrees.values()],
    })


def compute_phi_t_for_proposition(proposition_number: int) -> pd.DataFrame:
    """Compute Phi_h, Phi_Tc, and Phi_T for the proposition's context C."""
    # Phi_h from historical use (direct/hierarchical/mereological + concept-membership)
    history_df = history_potential(
        graph,
        proposition_number,
        weights=HISTORY_WEIGHTS,
        runner=runner,
    )

    # Hebbian co-occurrence pairs for Phi_Tc
    # TYPE_SELECTION=True uses relation/operation types for proposition/proof co-occurrence.
    hebb_df = hebb_potential(graph, proposition_number, runner=runner, type_selection=TYPE_SELECTION)
    phi_tc_df = compute_phi_tc(hebb_df)

    # Align Phi_h and Phi_Tc on the same resource universe, defaulting missing to 0
    # Each series maps resource IRI -> component value; empty inputs yield empty series.
    phi_h = history_df.set_index('o')['activation_potential'] if not history_df.empty else pd.Series(dtype=float)  # Phi_h per resource
    phi_tc = phi_tc_df.set_index('o')['phi_tc'] if not phi_tc_df.empty else pd.Series(dtype=float)  # Phi_Tc per resource

    # Union of resources seen historically or co-occurring in context C.
    # Sorting makes the output stable across runs for identical inputs.
    universe = sorted(set(phi_h.index) | set(phi_tc.index))
    # Reindex so every resource has both components; missing values become 0.
    phi_h = phi_h.reindex(universe, fill_value=0.0)
    phi_tc = phi_tc.reindex(universe, fill_value=0.0)  # same universe as Phi_h

    # Phi_T is the convex combination from main.tex
    phi_t = DELTA * phi_h + (1 - DELTA) * phi_tc

    df = pd.DataFrame({
        'proposition': proposition_number,
        'concept': universe,
        'phi_h': phi_h.values,
        'phi_tc': phi_tc.values,
        'phi_t': phi_t.values,
    })
    return df


In [None]:
# Compute Phi_T(r, C) per proposition context C
results = []  # collect per-proposition Phi_T tables
for proposition in range(START_PROPOSITION, END_PROPOSITION + 1):
    results.append(compute_phi_t_for_proposition(proposition))

results_df = pd.concat(results, ignore_index=True)  # long table over propositions
results_df

# Report overall min/max Phi_T with the corresponding concepts
# Also report the propositions (proofs) where those extremes occur.
min_phi_t = results_df['phi_t'].min()
max_phi_t = results_df['phi_t'].max()
min_concepts = results_df.loc[results_df['phi_t'] == min_phi_t, 'concept'].unique()
min_props = results_df.loc[results_df['phi_t'] == min_phi_t, 'proposition'].unique()
max_concepts = results_df.loc[results_df['phi_t'] == max_phi_t, 'concept'].unique()
max_props = results_df.loc[results_df['phi_t'] == max_phi_t, 'proposition'].unique()
print(f"Min phi_t: {min_phi_t} | concepts: {', '.join(map(str, min_concepts))} | propositions: {', '.join(map(str, sorted(min_props)))}")
print(f"Max phi_t: {max_phi_t} | concepts: {', '.join(map(str, max_concepts))} | propositions: {', '.join(map(str, sorted(max_props)))}")


In [None]:
# Persist results with parameterized filename
weights_token = '-'.join(f'{w:.4f}'.rstrip('0').rstrip('.') for w in HISTORY_WEIGHTS)  # compact weight signature
timestamp = dt.datetime.now().strftime('%Y%m%d_%H%M%S')  # make outputs unique and sortable
output_path = OUTPUT_DIR / f'typical_phiT_delta-{DELTA:.3f}_weights-{weights_token}_p{START_PROPOSITION}-p{END_PROPOSITION}_{timestamp}.csv'
results_df.to_csv(output_path, index=False)  # CSV for downstream analyses
output_path


In [None]:
# Tiny sanity check: propositions 1-3, top/bottom resources by Phi_T
for prop in range(1, min(3, END_PROPOSITION) + 1):
    subset = results_df[results_df['proposition'] == prop].copy()
    subset = subset.sort_values('phi_t', ascending=False)  # rank by typical activation potential
    print(f'Proposition {prop}: top 5')
    print(subset.head(5)[['concept', 'phi_t']].to_string(index=False))
    print(f'Proposition {prop}: bottom 5')
    print(subset.tail(5)[['concept', 'phi_t']].to_string(index=False))
    print()
