In [None]:
!python --version
print("---")

# install and import modules
%pip install rdflib

import google
import IPython
import json
import math
import os
import pandas as pd
import rdflib
import tabulate
import datetime
import typing

# mount drive here to read files from the folder "My Drive > Colab_Notebooks > Formal_Ontology_of_Mathematics > creativity"
google.colab.drive.mount('/content/drive')

os.chdir("/content/drive/My Drive/Colab_Notebooks/Formal_Ontology_of_Mathematics/creativity")

print("---")
!pwd

print("---")
!ls

In [None]:
import modules.queries as queries

In [None]:
# parameters
file_name = "ontology_output_v3.ttl"

# general functions
def access_graph(file_name: str,
                 folder_name: str = "input") -> rdflib.Graph:
    input_file = os.path.join(folder_name, file_name)
    return rdflib.Graph().parse(input_file)

def sparql_to_df(kg: rdflib.Graph,
                 sparql_query: str):
    raw = kg.query(sparql_query)
    variables = raw.vars
    records = [{str(variables[i]): str(item) for i, item in enumerate(row)} for row in raw]
    records_df = pd.DataFrame(records)
    if "links" in records_df.columns:
        records_df["links"] = records_df["links"].astype(int)
    return records_df

def sparql_to_concat_df(kg: rdflib.Graph,
                        sparql_queries: list,
                        hebb: bool = False):
    if hebb:
        df = pd.concat(
            [sparql_to_df(kg, sparql_query) for sparql_query in sparql_queries],
            ignore_index = True).groupby(by=["o1", "o2"])["links"].sum().reset_index()
    else:
        df = pd.concat(
            [sparql_to_df(kg, sparql_query) for sparql_query in sparql_queries],
            ignore_index = True).groupby(by=["o"])["links"].sum().reset_index()
    return df

In [None]:
def create_iris_for_values(proposition_number: int):
    iris_strings = [f"<https://www.foom.com/core#proof_{i}> <https://www.foom.com/core#proposition_{i}>" for i in range(1, proposition_number)]
    return " ".join(iris_strings)


def history(kg: rdflib.Graph,
            proposition_number: int = 0,
            base_sparql_queries: list = [
                [queries.direct_definitions(), queries.direct_postulates(), queries.direct_common_notions()],
                [queries.hierarchical_definitions(), queries.hierarchical_postulates(), queries.hierarchical_common_notions()],
                [queries.mereological_definitions(), queries.mereological_postulates(), queries.mereological_common_notions()] ],
            weights: list = [6/9, 1/9, 2/9]):
    query_lists = base_sparql_queries.copy()
    if proposition_number >= 2:
        # Generate the iris strings
        iris = create_iris_for_values(proposition_number)

        # Append the new queries to the existing lists
        query_lists[0].append(queries.direct_template_propositions_proofs(iris))
        query_lists[1].append(queries.hierarchical_template_propositions_proofs(iris))
        query_lists[2].append(queries.mereological_template_propositions_proofs(iris))

    # Generate the histories
    histories = [sparql_to_concat_df(kg, query_list) for query_list in query_lists]

    activation_dfs = []
    # calculation of activation potentials
    for history_df, weight in zip(histories, weights):
        total_use = history_df["links"].sum()
        actions_df = history_df.assign(
            activation_potential = (history_df["links"] * weight) / total_use
        )[["o", "activation_potential"]]
        activation_dfs.append(actions_df)

    # combine dataframes
    combined_df = pd.concat(activation_dfs, ignore_index=True)
    return combined_df.groupby("o")["activation_potential"].sum().reset_index()

def hebb(kg: rdflib.Graph,
         proposition_number: int = 0,
         sparql_queries: list = [queries.hebb_definitions(), queries.hebb_postulates(), queries.hebb_common_notions()]):
    if proposition_number >= 2:
        # Generate the iris strings
        iris = create_iris_for_values(proposition_number)
        # Append the new queries to the existing lists
        sparql_queries.append(queries.hebb_template_propositions_proofs(iris))
    df = sparql_to_concat_df(kg, sparql_queries, hebb=True)
    total_use = df["links"].sum()
    df["activation_potential"] = df["links"] / total_use
    df = df.drop(columns=["links"])
    df = df.sort_values(by="activation_potential", ascending=False)
    df = df.reset_index(drop=True)
    return df

def calculate_activation_potential(kg: rdflib.Graph,
                                   proposition_number: int = 0):
    # history potential
    calculated_history_potential_df = history(kg, proposition_number)
    # print(len(calculated_history_potential_df))
    # print(calculated_history_potential_df["activation_potential"].sum())
    # hebb potential
    hebb_potential = hebb(kg, proposition_number)
    # print(len(hebb_potential))
    # print(hebb_potential["activation_potential"].sum())
    return calculated_history_potential_df, hebb_potential

def direct_last_item(kg: rdflib.Graph,
                     last_proposition_iri: rdflib.URIRef):
    results = kg.query(queries.direct_template_propositions_proofs(last_proposition_iri))
    return {str(row.o) for row in results}

def mereological_last_item(kg: rdflib.Graph,
                           last_proposition_iri: rdflib.URIRef):
    results = kg.query(queries.direct_template_last_item(last_proposition_iri))
    return {str(row.o) for row in results}

def direct_and_mereological_last_item(kg: rdflib.Graph,
                                      last_proposition_iri: rdflib.URIRef):
    return direct_last_item(kg, last_proposition_iri), mereological_last_item(kg, last_proposition_iri)

def highest_potential(df: pd.DataFrame,
                      upper_part: float = 1/4):
    keep_count = math.ceil(len(df) * upper_part)
    print("keep ", keep_count)
    return df.iloc[:keep_count].copy()

def get_background_concepts(materials: dict, upper_part: float=1/4):
    history_highest_concepts = set(highest_potential(materials["history"], upper_part)["o"])
    cooccurrence_df = highest_potential(materials["cooccurrence"], upper_part)
    cooccurrence_concepts = set(cooccurrence_df["o1"]) | set(cooccurrence_df["o2"])
    proposition_concepts = materials["direct_last_proposition"] | materials["mereological_last_proposition"]
    return history_highest_concepts | cooccurrence_concepts | proposition_concepts

def check_surprise_score(materials: dict, upper_part: float=1/4):
    cooccurrence_df = highest_potential(materials["cooccurrence"], upper_part)
    background_concepts = get_background_concepts(materials, upper_part)
    proof_concepts = materials["direct_last_proof"]
    print("background", len(background_concepts), background_concepts)
    diff = proof_concepts - background_concepts
    print("diff ", len(diff), diff)
    return background_concepts, diff

In [None]:
"""
1. capture of last proposition: direct and mereological [DONE]
2. capture of history and hebbian of previous propositions and proofs [DONE]
3. include history and hebbian of previous propositions and proofs in table [DONE]
4. prepare table of activation potential [DONE]
5. capture last proof: direct [DONE]
6. check last proof against lowest 3/4  of activation potential tables with last propostion removed
    - extract lowest 3/4 of activation potentual tables
    - collect low activation concepts and concepts from the last proposition
    - compare concepts above with concepts from proof. [DONE]
"""

"""
CHECK UPPER_PART VALUE: it seems to do the opposite of what it should do
"""

def proof_analysis(kg: rdflib.Graph,
                   proposition_number: int = 1,
                   upper_part: float = 1/4):
    #  calculate activation potential
    history_df, cooccurrence_df = calculate_activation_potential(kg, proposition_number)
    # find direct and mereological concepts of last proposition
    last_proposition_iri = f"<https://www.foom.com/core#proposition_{proposition_number}>"
    direct_last_proposition, mereological_last_proposition = direct_and_mereological_last_item(kg, last_proposition_iri)
    # find direct concepts of last proof
    last_proof_iri = f"<https://www.foom.com/core#proof_{proposition_number}>"
    direct_last_proof, mereological_last_proof = direct_and_mereological_last_item(kg, last_proof_iri)
    # check surprise score
    materials = {
        "direct_last_proposition": direct_last_proposition,
        "mereological_last_proposition": mereological_last_proposition,
        "direct_last_proof": direct_last_proof,
        "history": history_df,
        "cooccurrence": cooccurrence_df
    }
    # check surprisingness
    background_concepts, diff = check_surprise_score(materials, upper_part)
    return background_concepts, diff

def output_df(analyses: list, filename: str="output/analyses"):
    analyses_df = pd.DataFrame(analyses, columns=["proof_number", "background_concepts", "diff"])
    t = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename}_{t}.csv"
    analyses_df.to_csv(filename, index=False)
    print(f"Output: {filename}")
    return analyses_df

def main(file_name: str = file_name,
         upper_proposition_number: int = 1,
         given_upper_part: float = 1/4):
    # access turtle file and put content in the kg (rdflib.Graph)
    kg = access_graph(file_name)
    # initialize list of dataframes
    analyses = []
    # analysis of proofs
    for i in range(1, upper_proposition_number):
        print(i)
        background_concepts, diff = proof_analysis(kg, proposition_number=i, upper_part=given_upper_part)
        print("----")
        analyses.append( [i,
                          " ; ".join( sorted( list(background_concepts) ) ) ,
                          " ; ".join( sorted(list(diff) ) ) ] )
        print(analyses)
    # output analyses
    analyses_df = output_df(analyses)
    return analyses_df



In [None]:
analyses_df = main(upper_proposition_number=49, given_upper_part=1)

In [None]:
analyses_df

In [None]:
history_df, cooccurrence_df, direct_last_proposition, mereological_last_proposition, direct_last_proof = main(proposition_number = 1, upper_part = 1/4)

In [None]:
history_df

In [None]:
cooccurrence_df

In [None]:
direct_last_proposition

In [None]:
mereological_last_proposition

In [None]:
direct_last_proof

In [None]:

def analysis_to_df(proposition_number: int=1, background_concepts:set=set(), diff:set=set()):
    return pd.DataFrame({
        "proposition_number": proposition_number,
        "background_concepts": " ; ".join(sorted(list(background_concepts))),
        "diff": " ; ".join(sorted(list(diff)))
    }, index=[0])

def dfs_to_excel(dfs: list,
                 output_file_name: str="output/analysis"):
    t = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file_name = f"{output_file_name}_{t}.xlsx"
    with pd.ExcelWriter(output_file_name, engine="openpyxl") as writer:
        for i, df in enumerate(dfs):
            df.to_excel(writer, sheet_name=f"proof_{i+1}", index=False)
    print(f"Output: {output_file_name}")