In [None]:
import pandas as pd
from rdflib import Graph

In [None]:
def getOntologyData(path_to_ont):
    g = Graph()
    g.parse(path_to_ont, format="ttl")

    query = f"""
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT DISTINCT ?Class ?Subclass ?sbLabel ?clsLabel
    WHERE {{
        ?Subclass rdfs:subClassOf ?Class .
            ?Class rdfs:label ?clsLabel .
            ?Subclass rdfs:label ?sbLabel .
            FILTER (!isBlank(?Class)) 
    }}

    """

    qres = g.query(query)

    df = pd.DataFrame(qres.bindings)
    df.columns = df.columns.str.replace(' ', '')
    df = df[['clsLabel', 'sbLabel']].copy()
    
    return df

In [None]:
# label = 0, meaning for data (A,B) A is NOT_DIRECTLY_RELATED to B.
def create_negativeDF(data_df, superclass_df, subclass_df):
    negative_df = data_df.copy()
    negative_df['clsLabel'] = negative_df['clsLabel'].values[::-1] 

    negative_df[~negative_df.isin(superclass_df)].dropna()
    negative_df[~negative_df.isin(subclass_df)].dropna()

    negative_df = negative_df.rename(columns={'clsLabel' : 'classA', 'sbLabel' : 'classB'})
    negative_df["label"] = 0
    return negative_df

In [None]:
# label = 1, meaning for data (A,B) A is SUPERCLASS of B.
def create_superclassDF(data_df):
    superclass_df = data_df.copy().rename(columns={'clsLabel' : 'classA', 'sbLabel' : 'classB'})
    superclass_df["label"] = 1
    return superclass_df

In [None]:
# label = 2, meaning for data (A,B) A is SUBCLASS of B.
def create_subclassDF(data_df):
    subclass_df = data_df.copy().rename(columns={'clsLabel' : 'classB', 'sbLabel' : 'classA'})
    subclass_df["label"] = 2
    return subclass_df

In [None]:
def create_processedDF(data_df):
    subclass_df = create_subclassDF(data_df)
    superclass_df = create_superclassDF(data_df)
    negative_df = create_negativeDF(data_df, superclass_df, subclass_df)
    processed_df = pd.concat([negative_df, superclass_df, subclass_df], axis=0)

    processed_df = processed_df.sample(frac=1).reset_index(drop=True)
    return processed_df