In [2]:
#Define here the end point  (i.e. where the blazergraph instance is running)
#current one
import yaml
with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)
endpoint = config['blazegraph']['endpoint']
endpoint

'http://193.171.177.138:9999/blazegraph/namespace/ito/sparql'

In [3]:
# %%
#import some modules
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import seaborn as sns
from SPARQLWrapper import SPARQLWrapper, N3, JSON
from rdflib import Graph
from scipy import stats

prefixes = """
PREFIX edam: <http://edamontology.org/>
PREFIX obo:  <http://purl.obolibrary.org/obo/>
PREFIX ito:  <https://identifiers.org/ito#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
"""


#Use this function to escape some desired_benchmark names that might contain special chars causing crashes.
def escape(s):
    return s.translate(str.maketrans({  "'":   r"\'",
                                        '"':   r'\"',
                                        "\\":  r"\\",
                                        "\r":  r"\r",
                                        "\n":  r"\n"}))


def query(service, query, numeric_cols = [], date_cols = []):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()
    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']
    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)
        
    df = pd.DataFrame(out, columns=cols)
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col])
    for col in date_cols:
        df[col] = pd.to_datetime(df[col])
    
    return df

def ito_to_df(endpoint, ito):
    q = """
        PREFIX edam: <http://edamontology.org/>
        PREFIX obo:  <http://purl.obolibrary.org/obo/>
        PREFIX ito:  <https://identifiers.org/ito:>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
        PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

        SELECT DISTINCT *
        WHERE {
                ?paper a edam:data_0971 . 
                ?paper rdfs:label ?paper_label. 
                ?paper oboInOwl:date ?date. 
                ?model  rdfs:seeAlso ?paper ;
                        rdfs:label ?model_label ;
                        a ?dataset . # this will create a place holder for the rdfs:type results that contains the information about the individual
                
                BIND(ito:""" + ito + """ AS ?top_level_class)
                ?top_level_class rdfs:label ?top_level_class_label .

                ?dataset rdfs:label ?dataset_label ;
                        rdfs:subClassOf* ?top_level_class .

                ?metric rdfs:subPropertyOf* ito:performance_measure .
                ?metric rdfs:label ?metric_label .
                ?model ?metric ?result

                FILTER(?top_level_class != ito:Benchmarking) 
                FILTER(?top_level_class != ito:ITO_01524) 
            } ORDER by ?date
    """

    all = query(endpoint, q, numeric_cols = ["result"], date_cols = ["date"])
    
    all[['dataset_label', 'task_label']] = all['dataset_label'].str.rsplit(' - ', 1, expand=True)
    all["task_label"] = all["task_label"].str.replace(" benchmarking","")

    # reorder columns (more hierarchical)
    all = all.reindex(columns=['date', 'top_level_class', 'top_level_class_label', 'task_label', 'dataset', 'dataset_label', 'paper', 'paper_label', 'model', 'model_label', 'metric', 'metric_label', 'result'])
    return all

def fix_polarities(df):
    # merge with polarities df and change polarities of negative polar results
    # this file can be found here and is extracted from supplementary tables (See google sheets)
    # https://github.com/OpenBioLink/ITO/blob/master/notebooks/barbosa-silva-etal-2022/data/polarities.csv
    polarities = pd.read_csv("data/polarities.csv", sep=";").drop_duplicates()
    pol_all = df.merge(polarities.rename(columns={"metric": "metric_label"}), how="left", on="metric_label")
    pol_all.loc[pol_all["polarity"] == "neg", "result"] = pol_all[pol_all["polarity"] == "neg"]["result"] * -1
    pol_all["polarity"] = pol_all["polarity"].fillna("pos")
    return pol_all

def add_katrin_groups(df, ito):
    #get grouping: this tries to add the annotations by Kathrin, we need a merge here...
    grouping_table = pd.read_csv("data/grouping_"+ito+".csv")

    # fix dirty data
    grouping_table["Suggested_label"] = grouping_table["Suggested_label"].str.replace("Semantic segmenation", "Semantic segmentation")
    # grouping_table["Suggested_label"] = grouping_table["Suggested_label"].str.replace("Text summarization", "Natural language generation")

    grouping_table = grouping_table.rename(columns={'Class_Label': 'task_label_lower', 'Suggested_label': 'suggested_task_label'})
    
    grouping_table["task_label_lower"] = grouping_table["task_label_lower"].str.lower()
    df["task_label_lower"] = df["task_label"].str.lower()
    
    group_all = df.merge(grouping_table, on = 'task_label_lower', how = 'left')
    group_all = group_all.drop(['id', 'Superclass_id','Superclass_label', "task_label_lower"], axis = 1)
    return group_all

def extract_sota(df):
    # extract sota trajectories
    def agg_(ex):
        
        # convert to monthly
        ex["date"] = ex["date"].dt.strftime("%Y-%m").astype("string")

        # Sort rows first by date ascending
        # then by value descending (this is needed to keep only the best result per date, see drop_duplicates below)
        ex = ex.sort_values(by=["date", "result"], ascending=[True, False])

        # keeps only best result of a specific date
        ex = ex.drop_duplicates(subset=["date"])

        # calculates sota
        # see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.cummax.html
        ex["result"] = ex["result"].cummax()

        # keeps only best results so far
        ex = ex.drop_duplicates(subset=["result"])

        return ex

    # key for grouping
    grp = ["top_level_class", "top_level_class_label", "l1_label", "l2_label", "l3_label", "task_label", "dataset", "dataset_label", "metric"]
    # aggregate groups
    sota = df.groupby(grp)[[x for x in df.columns if x not in grp]].apply(agg_).reset_index()
    return sota

def add_superclasses(endpoint, ito, df, up_to_lvl):

    # appearantly i am too stupid to write a better sparql :(

    prefix = """
    PREFIX edam: <http://edamontology.org/>
    PREFIX obo:  <http://purl.obolibrary.org/obo/>
    PREFIX ito:  <https://identifiers.org/ito:>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>
    PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
    """

    lvl1 = """
    SELECT DISTINCT ?dataset ?l1_label
    WHERE {
    BIND(ito:""" + ito + """ AS ?top_level_class)
    ?dataset rdfs:subClassOf+ ?l1 .
    ?l1 rdfs:subClassOf ?top_level_class .
    ?l1 rdfs:label ?l1_label .
    }
    """

    lvl2 = """
    SELECT DISTINCT ?dataset ?l1_label ?l2_label
    WHERE {
    BIND(ito:""" + ito + """ AS ?top_level_class)
    ?dataset rdfs:subClassOf+ ?l2 .
    ?l2 rdfs:subClassOf ?l1 .
    ?l1 rdfs:subClassOf ?top_level_class .
    ?l2 rdfs:label ?l2_label .
    ?l1 rdfs:label ?l1_label .
    }
    """

    lvl3 = """
    SELECT DISTINCT ?dataset ?l1_label ?l2_label ?l3_label
    WHERE {
    BIND(ito:""" + ito + """ AS ?top_level_class)
    ?dataset rdfs:subClassOf+ ?l3 .
    ?l3 rdfs:subClassOf ?l2 .
    ?l2 rdfs:subClassOf ?l1 .
    ?l1 rdfs:subClassOf ?top_level_class .
    ?l3 rdfs:label ?l3_label .
    ?l2 rdfs:label ?l2_label .
    ?l1 rdfs:label ?l1_label .
    }
    """
    
    classes_lvl1 = query(endpoint, prefix + lvl1)
    classes_lvl2 = query(endpoint, prefix + lvl2)
    classes_lvl3 = query(endpoint, prefix + lvl3)
    classes = classes_lvl1
    classes = pd.merge(classes, classes_lvl2, how="left", on=list(classes.columns))
    classes = pd.merge(classes, classes_lvl3, how="left", on=list(classes.columns))
    df = pd.merge(df, classes, how="left", on="dataset")

    df.loc[df["l2_label"].isna(), "l2_label"] = df[df["l2_label"].isna()]["l1_label"]
    df.loc[df["l3_label"].isna(), "l3_label"] = df[df["l3_label"].isna()]["l2_label"]

    return df


In [5]:
for ito in ["ITO_00101", "ITO_00141"]:
    print(f"Creating sota curves for {ito}")
    df = ito_to_df(endpoint, ito)
    df = fix_polarities(df)
    df = add_superclasses(endpoint, ito, df, 3)
    df.to_csv(f"data/all_{ito}.csv", index=None)
    df = extract_sota(df)

    sota_papers = df[["dataset_label", "paper", "paper_label"]].drop_duplicates().to_csv(f"data/sota_papers_{ito}.csv", index=None)

    out = df[["l1_label", "l2_label", "l3_label", "task_label", "dataset_label", "date", "model_label", "result", "metric_label", "polarity"]]
    out = out.rename(columns={
        "l1_label": "l1", 
        "l2_label": "l2", 
        "l3_label": "l3", 
        "task_label": "task", 
        "dataset_label": "dataset",
        "model_label": "model",
        "metric_label": "metric"
    })
    out.to_csv(f"data/sota_{ito}.csv", index=None)
    print(f"Saved sota curve to data/sota_{ito}.csv")



Creating sota curves for ITO_00101
Saved sota curve to data/sota_ITO_00101.csv
Creating sota curves for ITO_00141
Saved sota curve to data/sota_ITO_00141.csv


In [23]:
ito = "ITO_00101"
all = pd.read_csv(f"data/all_{ito}.csv")
all = all[["date", "task_label", "dataset_label", "metric_label", "model_label", "result"]].drop_duplicates()

len_ds = len(all["dataset_label"].unique())
len_ds_ge_3 = len(all.groupby(["task_label", "dataset_label", "metric_label"]).filter(lambda x: len(x) >= 3).reset_index()["dataset_label"].unique())

print("Benchmarks with ≥ 1 reported result:", len_ds)
print("Benchmarks with ≥ 3 results at different time points (% of above):", len_ds_ge_3, len_ds_ge_3/len_ds * 100)

len_t = len(all["task_label"].unique())
len_t_ge_3 = len(all.groupby(["task_label", "dataset_label", "metric_label"]).filter(lambda x: len(x) >= 3).reset_index()["task_label"].unique())

print("AI tasks with ≥ 1 reported result:", len_t)
print("AI tasks with ≥ 3 results at different time points (% of above):", len_t_ge_3, len_t_ge_3/len_t * 100)

Benchmarks with ≥ 1 reported result: 2447
Benchmarks with ≥ 3 results at different time points (% of above): 1274 52.063751532488766
AI tasks with ≥ 1 reported result: 601
AI tasks with ≥ 3 results at different time points (% of above): 386 64.22628951747087
