In [52]:
from rdflib import Graph
import pandas as pd
from dotenv import load_dotenv
import os
from transformers import AutoModelForSequenceClassification, BertTokenizer, pipeline
from itertools import permutations

In [68]:
def loginHuggingFace():
    load_dotenv()
    HUGGINGFACE_APIKEY = os.getenv('HUGGINGFACE_APIKEY')
    !huggingface-cli login --token {HUGGINGFACE_APIKEY}

In [None]:
def getOntoClasses(path_to_onto):
    g = Graph()
    g.parse(path_to_onto)
    query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>

    SELECT DISTINCT ?className
    WHERE {{
            {{
                ?class a rdfs:Class .
                ?class rdfs:label ?classLabel .
                BIND(?classLabel AS ?className)  # Use label if available
            }}
        UNION
            {{
                ?class a owl:Class .
                FILTER NOT EXISTS {{ ?class rdfs:label ?anyLabel }}  # Ensure no label exists
                BIND(REPLACE(STR(?class), "^.*[/#]", "") AS ?className)  # Extract local name
            }}
    }}
    """
    qres = g.query(query)
    df = pd.DataFrame(qres.bindings)
    df.columns = df.columns.str.replace(' ', '')
    
    return df

In [None]:
def getClassifier(name_tokenizer, name_model, name_pipeline):
    tokenizer = BertTokenizer.from_pretrained(name_tokenizer)
    model = AutoModelForSequenceClassification.from_pretrained(name_model)
    classifier = pipeline(name_pipeline, model=model, tokenizer=tokenizer)

    return classifier

In [None]:
def getPredictions(classifier, df):
    predictions = []
    classA = ''
    classB = ''
    for classA, classB in permutations(df.iloc[:, 0], 2): 
        prediction = classifier(f"{classA} [SEP] {classB}")
        prediction[0].update({'classA': classA, 'classB': classB})
        predictions.append(prediction[0])

    return pd.DataFrame(predictions)

In [None]:
def sortPredictions(df_pred):
      # Select top 2 for each (classA, label)
    df_sorted = df_pred[df_pred.label != "LABEL_0"].sort_values(by="score", ascending=False)
    df_top = df_sorted.groupby(["classA", "label"]).head(1).sort_values(by="classA")

    return df_top