In [None]:
!python --version
print("---")

# install and import modules
%pip install rdflib

import google
import pandas as pd
import rdflib
import os
import typing

# mount drive here to read files from the folder "My Drive > Colab_Notebooks > Formal_Ontology_of_Mathematics > creativity"
google.colab.drive.mount('/content/drive')

os.chdir("/content/drive/My Drive/Colab_Notebooks/Formal_Ontology_of_Mathematics/creativity")

print("---")
!pwd

print("---")
!ls

In [None]:
# parameters
file_name = "ontology_output.ttl"
sparql_queries_withouth_hierarchical_imports: list = [
    "query_6.sparql",
    "query_7.sparql",
    "query_8.sparql",
    "query_12.sparql",
    "query_14.sparql"
]

sparql_queries_with_hierarchical_imports: list = [
    "query_9.sparql",
    "query_10.sparql",
    "query_11.sparql",
    "query_13.sparql",
    "query_15.sparql"
]

def access_graph(file_name: str):
    input_file = os.path.join("input", file_name)
    return rdflib.Graph().parse(input_file)

def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = os.path.join("input", "sparql_queries")
                     ):
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_query_name)

    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)

def get_table_with_links(knowledge_graph: rdflib.Graph,
                         sparql_queries: set = sparql_queries_withouth_hierarchical_imports):
    # Initialize an empty list to store results
    all_results = []

    # Run SPARQL queries and append results to the list
    for sparql_query in sparql_queries:
        sparql_results = run_sparql_query(knowledge_graph, sparql_query)
        for result in sparql_results:
            all_results.append([
                result.s.toPython() if hasattr(result.s, "toPython") else result.s,
                result.o.toPython() if hasattr(result.o, "toPython") else result.o,
                int(result.links)])

    # Create the pandas DataFrame from the list of results
    links = pd.DataFrame(all_results,
                         columns=["textual_unit", "conceptual_item", "use_number"])

    return links  # Return the DataFrame

def dataframe_to_csv(df: pd.DataFrame,
                     filename: str = "output.csv",
                     output_folder: str = "output"):
    """Saves a pandas DataFrame to a CSV file in the 'output' folder.

    Args:
        df: The pandas DataFrame to save.
        filename: The name of the CSV file (default: "output.csv").
    """
    # Construct the full file path
    filepath = os.path.join(output_folder, f"{filename}.csv")

    # Save the DataFrame to the CSV file
    df.to_csv(filepath, index=False)
    print(f"DataFrame saved to: {filepath}")

def get_tables_of_textual_units_and_concepts(file_name: str):
    # access turtle file
    # and populate a Graph object
    kg = access_graph(file_name)

    # table of textual units and direct concepts
    direct_links = get_table_with_links(kg,
                                        sparql_queries_withouth_hierarchical_imports)

    # table of textual units and indirect concepts
    indirect_links = get_table_with_links(kg,
                                          sparql_queries_with_hierarchical_imports)

    return direct_links, indirect_links

def main_get_tables_of_textual_units_and_concepts(file_name: str):
    direct_links, indirect_links = get_tables_of_textual_units_and_concepts(file_name)

    dataframe_to_csv(direct_links, "direct_links")
    dataframe_to_csv(indirect_links, "indirect_links")

    return direct_links, indirect_links

direct_links, indirect_links = main_get_tables_of_textual_units_and_concepts(file_name)
direct_links

In [None]:
indirect_links

In [None]:
# parameters
file_name = "ontology_output.ttl"

def access_graph(file_name: str):
    input_file = os.path.join("input", file_name)
    return rdflib.Graph().parse(input_file)

def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = os.path.join("input", "sparql_queries")
                     ):
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_queries[sparql_query_name])
    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)

def get_initial_activation_potential(knowledge_graph: rdflib.Graph,
                                     sparql_queries_direct_link: set = {
                                         "query_6.sparql",
                                         "query_7.sparql",
                                         "query_8.sparql
                                     },
                                     sparql_queries_hierachical_link: set = {
                                         "query_9.sparql",
                                         "query_10.sparql",
                                         "query_11.sparql
                                     }
                                     ):

    return


def prepare_initial_hebbian_connections():
    return


def main_routine_starting_state(file_name: str):
    # access turtle file
    # and populate a Graph object
    kg = access_graph(file_name)

    # run sparql queries;
    # organize the query results;
    # return several analytical results,
    # including the initial state concerning the activation potential
    # of the conceptual items in the graph

    # initial_state_intermediate_results, initial_state = get_initial_state_euclid(kg)

    return initial_state


In [None]:
# file_name = "creativity_graph.nt"
file_name = "ontology_output.ttl"
sparql_queries: dict = {
    "definitions": "query_3.sparql",
    "postulates": "query_1.sparql",
    "common_notions": "query_2.sparql",
    "propositions": "query_4.sparql",
    "proofs": "query_5.sparql",
}


def access_graph(file_name: str):
    input_file = os.path.join("input", file_name)
    return rdflib.Graph().parse(input_file)

def run_sparql_query(knowledge_graph: rdflib.Graph,
                     sparql_query_name: str,
                     folder_name: str = "input"):
    # create path to sparql query
    query_path = os.path.join(folder_name, sparql_queries[sparql_query_name])
    # access the sparql query and run it on the knowledge graph
    with open(query_path, "r") as query_file:
        sparql_query = query_file.read()
    return knowledge_graph.query(sparql_query)

def get_connection_weights(knowledge_graph: rdflib.Graph,
                           sparql_queries_initial_state: set = {"definitions", "postulates", "common_notions"}):
    # initialize dictionary to store the query results
    results: dict = {
        "subject": [],
        "object": [],
        "connection_weight": []
        }
    for sparql_query_name in sparql_queries_initial_state:
        query_results = run_sparql_query(knowledge_graph, sparql_query_name)
        for result in query_results:
            results["subject"].append(result.s)
            results["object"].append(result.o)
            results["connection_weight"].append(int(result.links))

    return results

def join_results(list_of_results: list):
    joined_results: dict = {}

    for result in list_of_results:
        for subject_triple, object_triple, connection_weight in zip(result["subject"], result["object"], result["connection_weight"]):
            key: str = f"{subject_triple}||{object_triple}"
            joined_results[key] = joined_results.get(key, 0) + connection_weight
    return joined_results

def prepare_intermediate_results_dataframe(joined_results: dict):
    # initial_state = pd.DataFrame(columns=["subject", "object", "connection_weight"])
    initial_state_intermediate_results: dict = {
        "subject": [],
        "object": [],
        "use_iterations": []
    }
    for key, value in joined_results.items():
        subject_triple, object_triple = key.split("||")
        initial_state_intermediate_results["subject"].append(subject_triple)
        initial_state_intermediate_results["object"].append(object_triple)
        initial_state_intermediate_results["use_iterations"].append(value)
    return pd.DataFrame(initial_state_intermediate_results)

def prepare_initial_state_dataframe(initial_state_intermediate_result: pd.DataFrame):
    # Group by the 'object' column, summing up the connection_weight
    intial_state: pd.DataFrame = initial_state_intermediate_result.groupby('object', as_index=False)['use_iterations'].sum()

    # Rename columns
    intial_state.columns = ['conceptual_item', 'use_weight']

    # Sort the DataFrame by 'use_weight' in descending order
    intial_state = intial_state.sort_values(by='use_weight', ascending=False).reset_index(drop=True)

    return intial_state


def get_initial_state_euclid(knowledge_graph: rdflib.Graph):
    list_of_results = []

    # get connection from definitions,
    # postulates, and common notions
    list_of_results.append(get_connection_weights(knowledge_graph))

    # join results
    joined_results = join_results(list_of_results)

    # prepare dataframe of the initial state
    initial_state_intermediate_results_with_hierarhical_imports = prepare_intermediate_results_dataframe(joined_results)
    initial_state_with_hierarchical_imports = prepare_initial_state_dataframe(initial_state_intermediate_results_with_hierarhical_imports)

    return initial_state_intermediate_results_with_hierarhical_imports, initial_state_with_hierarchical_imports

def main_routine_starting_state(file_name: str):
    kg = access_graph(file_name)
    initial_state_intermediate_results, initial_state = get_initial_state_euclid(kg)

    return initial_state

initial_state: pd.DataFrame = main_routine_starting_state(file_name)
initial_state

In [None]:
get_initial_state_euclid(access_graph())