In [None]:
!python --version
print("---")

# install and import modules
%pip install rdflib

import google
import pandas as pd
import rdflib
import os
import typing

# mount drive here to read files from the folder "My Drive > Colab_Notebooks > Formal_Ontology_of_Mathematics > creativity"
google.colab.drive.mount('/content/drive')

os.chdir("/content/drive/My Drive/Colab_Notebooks/Formal_Ontology_of_Mathematics/creativity")

print("---")
!pwd

print("---")
!ls

# ACTIVATION POTENTIAL

In [None]:
# parameters
file_name = "ontology_output_v2.ttl"


direct_history_preface = {
    "direct_history_definitions.sparql",
    "direct_history_postulates.sparql",
    "direct_history_common_notions.sparql"
}

hierarhical_history_preface = {
    "hierarchical_history_definitions.sparql",
    "hierarchical_history_postulates.sparql",
    "hierarchical_history_common_notions.sparql"
}

indirect_mereological_history_medium_importance_preface = {
    "horizontal_history_part1_definitions.sparql",
    "horizontal_history_part1_postulates.sparql",
    "horizontal_history_part1_common_notions.sparql"
}

In [None]:
# general functions

def access_graph(file_name: str,
                 folder_name: str = "input") -> rdflib.Graph:
    """Accesses the RDF graph from the specified file.

    Args:
        file_name: The name of the file containing the RDF graph (e.g., "ontology_output.ttl").

    Returns:
        An rdflib.Graph object representing the RDF graph.
    """
    input_file = os.path.join(folder_name, file_name)
    print(input_file)
    return rdflib.Graph().parse(input_file)


def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = os.path.join("input", "sparql_queries")
                     ) -> rdflib.query.Result:
    """Runs a SPARQL query on the provided knowledge graph.

    Args:
        knowledge_graph: The rdflib.Graph object representing the knowledge graph.
        sparql_query_name: The name of the SPARQL query file (e.g., "query_6.sparql").
        folder_name: The folder containing the SPARQL query file. Defaults to "input/sparql_queries".

    Returns:
        The result of the SPARQL query as an rdflib.query.Result object.
    """
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_query_name)
    print(query_path)
    # with open(query_path, "r") as query_file:
    #     sparql_query = query_file.read()
    #     print(sparql_query)

    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)


def get_table_with_links(knowledge_graph: rdflib.Graph,
                         sparql_queries: set = direct_history_preface
                         ) -> pd.DataFrame:
    """Retrieves a table of textual units, conceptual items, and their usage numbers.

    Executes a set of SPARQL queries on the knowledge graph to extract links between
    textual units and conceptual items, along with their usage numbers.

    Args:
        knowledge_graph: The rdflib.Graph object representing the knowledge graph.
        sparql_queries: A list of SPARQL query names to execute. Defaults to sparql_queries_withouth_hierarchical_imports.

    Returns:
        A pandas DataFrame with columns "textual_unit", "conceptual_item", and "use_number".
    """
    # Initialize an empty list to store results
    all_results = []

    # Run SPARQL queries and append results to the list
    for sparql_query in sparql_queries:
        sparql_results = run_sparql_query(knowledge_graph, sparql_query)
        for result in sparql_results:
            all_results.append([
                # getattr(result.s, "toPython", lambda: result.s)(),  # Use getattr with default lambda
                getattr(result.o, "toPython", lambda: result.o)(),  # Use getattr with default lambda
                int(result.links)
            ])

    # Return the pandas DataFrame from the list of results
    results = pd.DataFrame(all_results,
                           columns=["conceptual_item", "use_number"])
    # results = pd.DataFrame(all_results,
    #                        columns=["textual_unit", "conceptual_item", "use_number"])

    # Order results
    results = results.sort_values(by=["use_number"], ascending=[False])

    return results

## HISTORICAL ACTIVATION POTENTIAL

### DIRECT USE

In [None]:
"""
History of direct use of concepts up to the given proposition.
"""


# def get_direct_history(kg: rdflib.Graph,
#                        sparql_queries_folder: str = os.path.join("input", "sparql_queries")):
#     pass
    # get_direct_history_definitions()
    # get_direct_history_postulates()
    # get_direct_history_common_notions()

    # return direct_history

def get_preface_history(kg: rdflib.Graph,
                        direct_history_preface: set = direct_history_preface,
                        hierarhical_history_preface: set = hierarhical_history_preface,
                        indirect_mereological_history_medium_importance_preface: set = indirect_mereological_history_medium_importance_preface,
                        sparql_queries_folder: str = os.path.join("input", "sparql_queries")):
    # preface history: definition, postulates, common axioms
    direct_history_preface_df = get_table_with_links(kg, direct_history_preface)

    # get indirect hierarchical history
    hierachical_history_preface_df = get_table_with_links(kg, hierarhical_history_preface)

    # get indirect mereological history
    indirect_mereological_history_preface_df = get_table_with_links(kg, indirect_mereological_history_medium_importance_preface)

    return [direct_history_preface_df, hierachical_history_preface_df, indirect_mereological_history_preface_df]

def historical_activation_computation(historical_activation_potential: dict,
                                      specific_history: pd.DataFrame,
                                      weight: float):
    total = specific_history["use_number"].sum()
    for index in specific_history.index:
        conceptual_item = specific_history["conceptual_item"][index]
        historical_activation_potential[conceptual_item] += ( (weight * specific_history["use_number"][index]) / total )
    return historical_activation_potential

def get_historical_activation_potential(history: list,
                                        weight_direct: float = 6/9,
                                        weight_hierarchical: float = 1/9,
                                        weight_mereological: float = 2/9):
    # initialize the dictionary to compute the historical activation potential
    historical_activation_potential = {
        conceptual_item: 0
            for specific_history in history
            for conceptual_item in specific_history["conceptual_item"]
    }
    # add the use numbers from the three histories
    for specific_history in history:
        activation = historical_activation_computation(
            historical_activation_potential, specific_history, weight_direct)

    # concert the dictionary to a dataframe
    historical_activation_potential = pd.DataFrame(
            list(historical_activation_potential.items()),
            columns=['conceptual_item', 'activation_potential']
        )

    return historical_activation_potential



def get_history(kg: rdflib.Graph,
                       sparql_queries_folder: str = os.path.join("input", "sparql_queries"),
                       up_to_proposition: int = 0,
                       base: dict = {}):
    # history = pd.DataFrame()
    if base:
        print("base")
        # find highest proposition P in base
        # if P < up_to_proposition,
        # find the history of the propositions
        # between P (excluded) and up_to_proposition (included)

        # if P = up_to_proposition,
        # find the history of the propositions
        # between P (excluded) and up_to_proposition (included)

        # if P > up_to_proposition,
        # remove the history for propositions > up_to_proposition
        # return history
    else:
        # get direct history of definitions, postulates, and common notions

        # get direct history up to the given proposition number
        if up_to_proposition == 0:
            print(0)
            history = get_preface_history(kg)
            return history
        elif up_to_proposition > 1:
            pass
            # return history
        else:
            return ValueError

In [None]:

def main(file_name: str):
    # access turtle file
    kg = access_graph(file_name)

    # direct history
    history = get_history(kg)

    historical_activation_potential = get_historical_activation_potential(history)

    # indirect hierarchical history

    # indirect mereological history

    return history, historical_activation_potential


history, historical_activation_potential = main(file_name)

In [None]:
for index in historical_activation_potential.index:
    print(historical_activation_potential["conceptual_item"][index], historical_activation_potential["activation_potential"][index])

In [None]:
direct = history[0]
for index in direct.index:
    if not "https" in direct["conceptual_item"][index]:
        print(direct["conceptual_item"][index])
# direct[:5]

In [None]:
hierarchical = history[1]

for index in hierarchical.index:
    if not "https" in hierarchical["conceptual_item"][index]:
        print(hierarchical["conceptual_item"][index])
# hierarchical[:5]

In [None]:
mereological = history[2]
for index in mereological.index:
    if not "https" in mereological["conceptual_item"][index]:
        print(mereological["conceptual_item"][index])
# mereological[:5]

### INDIRECT USE

In [None]:
horizontal = history[2]
horizontal[:50]

In [None]:
# parameters
file_name = "ontology_output.ttl"
sparql_queries_withouth_hierarchical_imports: list = [
    "query_6.sparql",
    "query_7.sparql",
    "query_8.sparql",
    "query_12.sparql",
    "query_14.sparql"
]

sparql_queries_with_hierarchical_imports: list = [
    "query_9.sparql",
    "query_10.sparql",
    "query_11.sparql",
    "query_13.sparql",
    "query_15.sparql"
]

def access_graph(file_name: str) -> rdflib.Graph:
    """Accesses the RDF graph from the specified file.

    Args:
        file_name: The name of the file containing the RDF graph (e.g., "ontology_output.ttl").

    Returns:
        An rdflib.Graph object representing the RDF graph.
    """
    input_file = os.path.join("input", file_name)
    return rdflib.Graph().parse(input_file)

def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = os.path.join("input", "sparql_queries")
                     ) -> rdflib.query.Result:
    """Runs a SPARQL query on the provided knowledge graph.

    Args:
        knowledge_graph: The rdflib.Graph object representing the knowledge graph.
        sparql_query_name: The name of the SPARQL query file (e.g., "query_6.sparql").
        folder_name: The folder containing the SPARQL query file. Defaults to "input/sparql_queries".

    Returns:
        The result of the SPARQL query as an rdflib.query.Result object.
    """
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_query_name)

    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)

def get_table_with_links(knowledge_graph: rdflib.Graph,
                         sparql_queries: set = sparql_queries_withouth_hierarchical_imports
                         ) -> pd.DataFrame:
    """Retrieves a table of textual units, conceptual items, and their usage numbers.

    Executes a set of SPARQL queries on the knowledge graph to extract links between
    textual units and conceptual items, along with their usage numbers.

    Args:
        knowledge_graph: The rdflib.Graph object representing the knowledge graph.
        sparql_queries: A list of SPARQL query names to execute. Defaults to sparql_queries_withouth_hierarchical_imports.

    Returns:
        A pandas DataFrame with columns "textual_unit", "conceptual_item", and "use_number".
    """
    # Initialize an empty list to store results
    all_results = []

    # Run SPARQL queries and append results to the list
    for sparql_query in sparql_queries:
        sparql_results = run_sparql_query(knowledge_graph, sparql_query)
        for result in sparql_results:
            all_results.append([
                getattr(result.s, "toPython", lambda: result.s)(),  # Use getattr with default lambda
                getattr(result.o, "toPython", lambda: result.o)(),  # Use getattr with default lambda
                int(result.links)
            ])

    # Return the pandas DataFrame from the list of results
    return pd.DataFrame(all_results,
                         columns=["textual_unit", "conceptual_item", "use_number"])


def dataframe_to_csv(df: pd.DataFrame,
                     filename: str = "output.csv",
                     output_folder: str = "output"):
    """Saves a pandas DataFrame to a CSV file in the 'output' folder.

    Args:
        df: The pandas DataFrame to save.
        filename: The name of the CSV file (default: "output.csv").
    """
    # Construct the full file path
    filepath = os.path.join(output_folder, f"{filename}.csv")

    # Save the DataFrame to the CSV file
    df.to_csv(filepath, index=False)
    print(f"DataFrame saved to: {filepath}")

def get_tables_of_textual_units_and_concepts(file_name: str):
    """Retrieves tables of textual units linked to direct and indirect concepts.

    This function reads an RDF graph from a file, extracts links between textual units
    and concepts (both direct and indirect), and returns two pandas DataFrames:
    one for direct links and one for indirect links.

    Args:
        file_name: The name of the file containing the RDF graph (e.g., "ontology_output.ttl").

    Returns:
        A tuple containing two pandas DataFrames:
            - direct_links: DataFrame with columns "textual_unit", "conceptual_item", and "use_number" for direct links.
            - indirect_links: DataFrame with columns "textual_unit", "conceptual_item", and "use_number" for indirect links.
    """
    # access turtle file
    # and populate a Graph object
    kg = access_graph(file_name)

    # table of textual units and direct concepts
    direct_links = get_table_with_links(kg,
                                        sparql_queries_withouth_hierarchical_imports)

    # table of textual units and indirect concepts
    indirect_links = get_table_with_links(kg,
                                          sparql_queries_with_hierarchical_imports)

    return direct_links, indirect_links

def main_get_tables_of_textual_units_and_concepts(file_name: str):
    """Retrieves and saves tables of textual units linked to direct and indirect concepts.

    This function calls `get_tables_of_textual_units_and_concepts` to retrieve the
    DataFrames for direct and indirect links, saves them as CSV files, and then
    returns the DataFrames.

    Args:
        file_name: The name of the file containing the RDF graph (e.g., "ontology_output.ttl").

    Returns:
        A tuple containing two pandas DataFrames:
            - direct_links: DataFrame with columns "textual_unit", "conceptual_item", and "use_number" for direct links.
            - indirect_links: DataFrame with columns "textual_unit", "conceptual_item", and "use_number" for indirect links.
    """
    direct_links, indirect_links = get_tables_of_textual_units_and_concepts(file_name)

    dataframe_to_csv(direct_links, filename = "direct_links")
    dataframe_to_csv(indirect_links, filename = "indirect_links")

    return direct_links, indirect_links

direct_links, indirect_links = main_get_tables_of_textual_units_and_concepts(file_name)
direct_links

In [None]:
from matplotlib import pyplot as plt
direct_links['use_number'].plot(kind='line', figsize=(8, 4), title='use_number')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
direct_links['use_number'].plot(kind='hist', bins=20, title='use_number')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
indirect_links

In [None]:
from matplotlib import pyplot as plt
indirect_links['use_number'].plot(kind='line', figsize=(8, 4), title='use_number')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
from matplotlib import pyplot as plt
indirect_links['use_number'].plot(kind='hist', bins=20, title='use_number')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
# parameters
file_name = "ontology_output.ttl"

def access_graph(file_name: str):
    input_file = os.path.join("input", file_name)
    return rdflib.Graph().parse(input_file)

def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = os.path.join("input", "sparql_queries")
                     ):
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_queries[sparql_query_name])
    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)

def get_initial_activation_potential(knowledge_graph: rdflib.Graph,
                                     sparql_queries_direct_link: set = {
                                         "query_6.sparql",
                                         "query_7.sparql",
                                         "query_8.sparql
                                     },
                                     sparql_queries_hierachical_link: set = {
                                         "query_9.sparql",
                                         "query_10.sparql",
                                         "query_11.sparql
                                     }
                                     ):

    return


def prepare_initial_hebbian_connections():
    return


def main_routine_starting_state(file_name: str):
    # access turtle file
    # and populate a Graph object
    kg = access_graph(file_name)

    # run sparql queries;
    # organize the query results;
    # return several analytical results,
    # including the initial state concerning the activation potential
    # of the conceptual items in the graph

    # initial_state_intermediate_results, initial_state = get_initial_state_euclid(kg)

    return initial_state


In [None]:
# file_name = "creativity_graph.nt"
file_name = "ontology_output.ttl"
sparql_queries: dict = {
    "definitions": "query_3.sparql",
    "postulates": "query_1.sparql",
    "common_notions": "query_2.sparql",
    "propositions": "query_4.sparql",
    "proofs": "query_5.sparql",
}


def access_graph(file_name: str):
    input_file = os.path.join("input", file_name)
    return rdflib.Graph().parse(input_file)

def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = "input"):
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_queries[sparql_query_name])
    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)

def get_connection_weights(knowledge_graph: rdflib.Graph,
                           sparql_queries_initial_state: set = {"definitions", "postulates", "common_notions"}):
    # initialize dictionary to store the query results
    results: dict = {
        "subject": [],
        "object": [],
        "connection_weight": []
        }
    for sparql_query_name in sparql_queries_initial_state:
        query_results = run_sparql_query(knowledge_graph, sparql_query_name)
        for result in query_results:
            results["subject"].append(result.s)
            results["object"].append(result.o)
            results["connection_weight"].append(int(result.links))

    return results

def join_results(list_of_results: list):
    joined_results: dict = {}

    for result in list_of_results:
        for subject_triple, object_triple, connection_weight in zip(result["subject"], result["object"], result["connection_weight"]):
            key: str = f"{subject_triple}||{object_triple}"
            joined_results[key] = joined_results.get(key, 0) + connection_weight
    return joined_results

def prepare_intermediate_results_dataframe(joined_results: dict):
    # initial_state = pd.DataFrame(columns=["subject", "object", "connection_weight"])
    initial_state_intermediate_results: dict = {
        "subject": [],
        "object": [],
        "use_iterations": []
    }
    for key, value in joined_results.items():
        subject_triple, object_triple = key.split("||")
        initial_state_intermediate_results["subject"].append(subject_triple)
        initial_state_intermediate_results["object"].append(object_triple)
        initial_state_intermediate_results["use_iterations"].append(value)
    return pd.DataFrame(initial_state_intermediate_results)

def prepare_initial_state_dataframe(initial_state_intermediate_result: pd.DataFrame):
    # Group by the 'object' column, summing up the connection_weight
    intial_state: pd.DataFrame = initial_state_intermediate_result.groupby('object', as_index=False)['use_iterations'].sum()

    # Rename columns
    intial_state.columns = ['conceptual_item', 'use_weight']

    # Sort the DataFrame by 'use_weight' in descending order
    intial_state = intial_state.sort_values(by='use_weight', ascending=False).reset_index(drop=True)

    return intial_state


def get_initial_state_euclid(knowledge_graph: rdflib.Graph):
    list_of_results = []

    # get connection from definitions,
    # postulates, and common notions
    list_of_results.append(get_connection_weights(knowledge_graph))

    # join results
    joined_results = join_results(list_of_results)

    # prepare dataframe of the initial state
    initial_state_intermediate_results_with_hierarhical_imports = prepare_intermediate_results_dataframe(joined_results)
    initial_state_with_hierarchical_imports = prepare_initial_state_dataframe(initial_state_intermediate_results_with_hierarhical_imports)

    return initial_state_intermediate_results_with_hierarhical_imports, initial_state_with_hierarchical_imports

def main_routine_starting_state(file_name: str):
    kg = access_graph(file_name)
    initial_state_intermediate_results, initial_state = get_initial_state_euclid(kg)

    return initial_state

initial_state: pd.DataFrame = main_routine_starting_state(file_name)
initial_state

In [None]:
get_initial_state_euclid(access_graph())