### 3) PLOT_0_POL - Ratio-corrected create CSV export from ITO
(Work in progress)

This plot is based on the original PLOT_0 notebook. It takes in consideration the floor and the ceiling values, asl well as the metrics (posivite vs. negative) to plot the curves.

This notebook is used to export the information from sparql into a csv file.

It should be executed as base for individual plot created with the notebooks
PLOT_1, PLOT_2, PLOT_3 and PLOT_4.



In [1]:
#Adjust display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
#!pip install SPARQLWrapper
#!pip install pandas
#!pip install numpy
#!pip install matplotlib
#!pip install seaborn
#!pip install sparqlwrapper
#!pip install plotly

In [3]:
#import some modules
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import seaborn as sns
from SPARQLWrapper import SPARQLWrapper, N3, JSON
from rdflib import Graph
import plotly.graph_objects as go
import plotly.express as px
import re

%matplotlib inline

### Start function definitions

In [4]:
#Define here the end point  (i.e. where the blazergraph instance is running)

#endpoint = "http://localhost:9999/blazegraph/sparql" # SPARQL endpoint hosting ITO.owl


#current one
endpoint = "http://192.168.56.1:9999/blazegraph/sparql"
        
#endpoint = "http://149.148.106.153:9999/blazegraph/sparql"
prefixes = """
PREFIX edam: <http://edamontology.org/>
PREFIX obo:  <http://purl.obolibrary.org/obo/>
PREFIX ito:  <https://identifiers.org/ito#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
"""

def query(service, query, numeric_cols = []):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)
        
    df = pd.DataFrame(out, columns=cols)
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col])
    
    return df


In [5]:
#Use this function to escape some desired_benchmark names that might contain special chars.
def escape(s):
    return s.translate(str.maketrans({  "'":   r"\'",
                                        '"':   r'\"',
                                        "\\":  r"\\",
                                        "\r":  r"\r",
                                        "\n":  r"\n"}))

#Query database for selected ito and get one ratio dataframe per desired measure. Example provided.
def create_ratio(desired_measure, desired_benchmark, ds_count, selected_ito, metricName_negative_list):
    
    #escape string here, this is needed once some names contain special chars in the query.
    desired_benchmark = escape(desired_benchmark)
    print("Creating ratio df for ",desired_measure,", ",desired_benchmark,", ds_count=",ds_count)
    
    query = """
    PREFIX edam: <http://edamontology.org/>
    PREFIX obo:  <http://purl.obolibrary.org/obo/>
    PREFIX ito:  <https://identifiers.org/ito:>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT *
    WHERE {
    
        ?paper a edam:data_0971 . 
        ?paper rdfs:label ?paperTitle. 
        ?paper obo:date ?date. 

        ?benchmark_process_individual 	rdfs:seeAlso ?paper ;
                                        rdfs:label ?model_label ;
                                        a ?benchmark_process_class . # this will create a place holder for the rdfs:type results that contains the information about the individual
        
        ?benchmark_process_class rdfs:label ?benchmark_process_class_label ;
                                 rdfs:subClassOf*   """+selected_ito+""" . # this limits for the NLP class
        

        ?performance_measure rdfs:subPropertyOf* ito:performance_measure .
        ?performance_measure rdfs:label ?metricName .



        ?performance_measure rdfs:label ?metricName .
        ?benchmark_process_individual ?performance_measure ?result .	

        FILTER regex(?metricName, "^"""+ desired_measure +"""$" ). # this searches a specific match
        FILTER regex(?benchmark_process_class_label, "^"""+desired_benchmark+"""$" ). # this searches a specific match



    } ORDER by ?date

    """

    #send query via API
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    #process results as JSON
    processed_results = json.load(result.response)

    #Use accessory function to process results
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))

        out.append(item)

    #this is the final df containing the results of the query
    df = pd.DataFrame(out, columns=cols)
    
    if len(df.index)==0:
        return None
           
    #Set date column to datetime
    df['date'] = pd.to_datetime(df['date'])

    #Keep only the model name
    df.new = df.replace(to_replace =' model in .*$', value = '', regex = True)

    #Change the result column to numeric type
    df.new["result"] = pd.to_numeric(df.new["result"]) 

    #Create dataframe to store state of art results
    sota = pd.DataFrame(columns = df.new.columns)

    
    ###
    if desired_measure in metricName_negative_list:
    
        #ATTENTION: this modification using the minus signal is necessary in order to plot correctly the curve.
        #Although the numbers are decreasing, the curve is showing an upward pattern.

        df.new["result"] = -df.new["result"]

        #Create dataframe to store state of art results
        sota = pd.DataFrame(columns = df.new.columns)

        best_value = min(df.new["result"])
        i=0

        for result in df.new["result"]:        
            if result >= best_value:
                best_value = result
                sota_add = pd.DataFrame(df.new.iloc[i]).transpose()
                sota = sota.append(sota_add)            
            i = i+1    
    
    else:
        #Create dataframe to store state of art results
        sota = pd.DataFrame(columns = df.new.columns)

        best_value = 0
        i=0

        for result in df.new["result"]:        
            if result > best_value:
                best_value = result
                sota_add = pd.DataFrame(df.new.iloc[i]).transpose()
                sota = sota.append(sota_add)            
            i = i+1    
    
    
    ###
    #/////commented block
    #best_value = 0
    #i=0

    #for result in df.new["result"]:        
    #        if result > best_value:
    #            best_value = result
    #            sota_add = pd.DataFrame(df.new.iloc[i]).transpose()
    #            sota = sota.append(sota_add)            
    #        i = i+1    

    #This variable is containing the data points to plot the state of the art line        
    #sota
    
    #/////commented block

    #this is a copy to use as hovertemplate below
    y=df.new["result"].astype(str)


    #create this data frame to store the ratios
    ratio_df = pd.DataFrame(columns = ["ds_count", 
                                       "task",
                                       "ds", 
                                       "date",
                                       "model_label",
                                       "value",
                                       "percent_of_max_sota",
                                       "gain",
                                       "ratio",
                                       "max_sota"])


    #This block collects the rate of the benchmarkings along the years and saves it
    #ratio_df = pd.DataFrame(columns = ["ds_count", "ds", "date", "value","gain","ratio","max_sota"])

    print("###SOTA RESULTS: "+str(len(sota["result"])))
    if (len(sota["result"]>0)) :
        max(sota["result"])
        min(sota["result"])

        i=0
        for res in sota["result"]:
            if sota.loc[sota.index[i], 'result'] <= max(sota["result"]) :
                if(i == 0):
                    gain = sota.loc[sota.index[i], 'result']
                    ratio = round( gain / (max(sota["result"] - sota.loc[sota.index[0], 'result'])  ) ,4)
                    
                else :
                    gain = round(sota.loc[sota.index[i], 'result'] - sota.loc[sota.index[i-1], 'result'],4)
                    ratio = round( gain / (max(sota["result"] - sota.loc[sota.index[0], 'result'])  ) ,4)
                    
                #this is only to check if the calculation is correct
                #print(str(sota.loc[sota.index[i], 'result'])+" - "+ str(sota.loc[sota.index[i-1], 'result']) + " == "+ str(gain) +" Ratio == "+str(ratio))

                
                
                
                #ratio = round( gain / max(sota["result"]) ,2)
                
                
                #total = round((sota.loc[sota.index[i+1], 'result'] - sota.loc[sota.index[i], 'result'] ),2)
                #print(sota["result"][i+1] - sota["result"][i])
                #OK print(total.astype(str)+ " : " + max(sota["result"]).astype(str) )

                #year=sota.loc[sota.index[i], 'date'].strftime('%Y-%m-%d')
                year=sota.loc[sota.index[i], 'date'].strftime('%Y-%m')
                value=sota.loc[sota.index[i], 'result'].astype(str)
                percent_of_max_sota=round((sota.loc[sota.index[i], 'result'] / max(sota["result"]) * 100),2)
                benchmarking=sota.loc[sota.index[i], 'benchmark_process_class_label']
                model_label=sota.loc[sota.index[i], 'model_label']

                #clears the name of the task

                task=re.sub("^.*\s-\s","",benchmarking)
                task=re.sub(" benchmarking","",task)




                #benchmarking = df.replace(to_replace =' model in .*$', value = '', regex = True)


                #reduce the name of the benchmark
                #benchmarking = re.sub(r'\-.*',"",benchmarking) 
                #re.sub(r'\.\.\..*',"",test)

                new_row = {'ds_count':ds_count,
                           'task':task,
                           'ds':benchmarking, 
                           'date':year,
                           'model_label':model_label,
                           'value':value, 
                           'percent_of_max_sota':percent_of_max_sota.astype(float),  
                           'gain':gain.astype(str), 
                           'ratio':ratio.astype(str), 
                           'max_sota':max(sota["result"]).astype(str)}

                ratio_df = ratio_df.append(new_row, ignore_index=True)

                i = i+1
    return ratio_df

    

In [6]:
def get_metrics_polarity():
    df_metric_all = pd.read_csv("df_metric_all.csv")

    df_metric_all = df_metric_all[df_metric_all["ranking"]<=2]
    df_metric_all = df_metric_all[df_metric_all["ranking"]>0]
    df_metric_all = df_metric_all[df_metric_all["metrics"]!="No. parameters"]

    #calculate polarity 
    #report df
    polarity_df = pd.DataFrame(columns=['task','datasets','metricName','rank_1', 'rank_2', 'polarity'])

    #iterate over tasks
    for task in df_metric_all["task"].unique():
        #iterate over datasets
        for dataset in df_metric_all[df_metric_all["task"]==task]["datasets"].unique():
            #iterate over metrics
            for metric in df_metric_all[df_metric_all["task"]==task]["metrics"].unique():

                #get ranks 1 and 2, compare and set polarity
                try:
                    rank_1 = df_metric_all[(df_metric_all["task"] == task) & (df_metric_all["datasets"] == dataset) & (df_metric_all["metrics"] == metric ) & (df_metric_all["ranking"] == 1 )]
                    rank_2 = df_metric_all[(df_metric_all["task"] == task) & (df_metric_all["datasets"] == dataset) & (df_metric_all["metrics"] == metric ) & (df_metric_all["ranking"] == 2 )]
                    polarity = float(rank_1.value.iloc[0]) - float(rank_2.value.iloc[0])
                    if(polarity >=0):
                        polarity = "pos"
                    else:
                        polarity = "neg"

                    #print("metricName:"+metric + "\trank_1:"+rank_1["value"].iloc[0]+"\trank_2:"+rank_2["value"].iloc[0]+"\tpolarity:"+polarity)

                    #save to report df
                    polarity_df = polarity_df.append({'task':task,
                                                      'datasets':dataset,
                                                      'metricName':metric,
                                                      'rank_1':rank_1["value"].iloc[0],
                                                      'rank_2':rank_2["value"].iloc[0],
                                                      'polarity':polarity},
                                                      ignore_index=True)
                #skip if metric is not present for such dataset    
                except:
                    next


    
    return(polarity_df)


In [7]:
#return ratio_df_all dataframe per desired measure
def get_ratio_df_all_per_measure(desired_measure, selected_ito, metricName_negative_list):

    
    query = """
        PREFIX edam: <http://edamontology.org/>
        PREFIX obo:  <http://purl.obolibrary.org/obo/>
        PREFIX ito:  <https://identifiers.org/ito:>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>

        SELECT *
        WHERE {
            ?paper a edam:data_0971 . 
            ?paper rdfs:label ?paperTitle. 
            ?paper obo:date ?date. 

            ?benchmark_process_individual 	rdfs:seeAlso ?paper ;
                                            rdfs:label ?model_label ;
                                            a ?benchmark_process_class . # this will create a place holder for the rdfs:type results that contains the information about the individual

            ?benchmark_process_class rdfs:label ?benchmark_process_class_label ;
                                     rdfs:subClassOf*  """+selected_ito+""" . # this limits for the NLP class


            ?performance_measure rdfs:subPropertyOf* ito:performance_measure .
            ?performance_measure rdfs:label ?metricName .



            ?performance_measure rdfs:label ?metricName .
            ?benchmark_process_individual ?performance_measure ?result .	


            FILTER regex(?metricName, "^"""+ desired_measure +"""$" ) . # this searches a specific match
            



        } ORDER by ?date



        """

    #send query via API
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    #process results as JSON
    processed_results = json.load(result.response)

    #Use accessory function to process results
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))

            out.append(item)


    #this is the final df containing the results of the query
    df2 = pd.DataFrame(out, columns=cols)

    #now get the unique list of benchmark datasets
    benchmark_process_class_label = df2["benchmark_process_class_label"].unique()

    #start here the ds_count, which is actually the unique combination of 'desired_measure' and 'desired_benchmark'
    ds_count = 1
    #this place holder will contain the plots to be contatenated in the final plot
    plt_data = []

    #create final ratio_df_all for all the combinations
    ratio_df_all = pd.DataFrame(columns = ["ds_count", 
                                           "task",
                                           "ds", 
                                           "date",
                                           "model_label",
                                           "value",
                                           "percent_of_max_sota",
                                           "gain",
                                           "ratio",
                                           "max_sota"])
    #fig = go.Figure()

    #iterate over the benchmark list to create the ratio_df_all, which will be used to plot later
    for desired_benchmark in benchmark_process_class_label:

        #print(desired_benchmark)
        ratio_df = create_ratio(desired_measure,desired_benchmark,ds_count, selected_ito, metricName_negative_list) #read from the memory to get this polarity information
        #print("ds_count: ",ds_count)
        #ratio_df = create_ratio(desired_measure,"Citeseer",ds_count)

        if ratio_df is None:
                print("null")  
        elif len(ratio_df.index)>0:
            #print(desired_benchmark)  
            ratio_df_all = ratio_df_all.append(ratio_df, ignore_index=True)
        
        #ds_count = ds_count + 1

    
    if len(ratio_df_all.index) > 0:
        
        print("number of sota per dataset/metric: ",len(ratio_df_all.index))
        #edit here the column with the benchmark name
        ratio_df_all = ratio_df_all.replace(to_replace =' \- .*', value = '', regex = True)

        #add a column representing the percentual of the max obtained value per value
        #ratio_df_all["percent"] = round(ratio_df_all["value"].astype(float)/max(ratio_df_all["total"].astype(float)),2)*100

        #add one extra column wihth the desired measure name in order to condendate the plot
        ratio_df_all["merge"]=desired_measure


        
    return ratio_df_all

In [8]:
def get_ratio_df_per_ito(metricName_df, metricName_negative_list):
    #this will concatenate all the get_ratio_df_all_per_measure into only one df.
    get_ratio_df_all_per_global = pd.DataFrame(columns = ["ds_count", 
                                                          "task",
                                                          "ds", 
                                                          "date",
                                                          "model_label",
                                                          "value",
                                                          "percent_of_max_sota",
                                                          "gain",
                                                          "ratio",
                                                          "max_sota",
                                                          "percent_of_max_metric"])

    for desired_measure in metricName_df:

        #desired_measure = "Accuracy"
        #this is necessary to query the database correctly, it will be editaded later in the code.
        desired_measure = re.escape(desired_measure).replace("\\", "\\\\")
        print("#######",desired_measure)

        #Call external function to get the ratio per metric for a selected ito
        ratio_df_all_per_measure=get_ratio_df_all_per_measure(desired_measure, selected_ito, metricName_negative_list)

        if len(ratio_df_all_per_measure)>0:
            #this percentage has to be calculated out of the function, 
            #because it congreate the maximum obtained value accross all benchmarks
            ratio_df_all_per_measure["value"]=ratio_df_all_per_measure["value"].astype(float)
            ratio_df_all_per_measure["percent_of_max_metric"] = round(ratio_df_all_per_measure["value"] / max(ratio_df_all_per_measure["value"]),2)

            #save the results of the function to the global dataframe
            get_ratio_df_all_per_global=get_ratio_df_all_per_global.append(ratio_df_all_per_measure)
    
    return(get_ratio_df_all_per_global)

In [9]:
#Query all the metrics collected for the selected selected_ito
def get_metrics_df(selected_ito):
    query = """
        PREFIX edam: <http://edamontology.org/>
        PREFIX obo:  <http://purl.obolibrary.org/obo/>
        PREFIX ito:  <https://identifiers.org/ito:>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>

        SELECT *
        WHERE {
                ?paper a edam:data_0971 . 
                ?paper rdfs:label ?paperTitle. 
                ?paper obo:date ?date. 

                ?benchmark_process_individual 	rdfs:seeAlso ?paper ;
                                                rdfs:label ?model_label ;
                                                a ?benchmark_process_class . # this will create a place holder for the rdfs:type results that contains the information about the individual

                ?benchmark_process_class rdfs:label ?benchmark_process_class_label ;
                                         rdfs:subClassOf* """+selected_ito+""" . # this limits for the NLP class

                ?performance_measure rdfs:subPropertyOf* ito:performance_measure .
                ?performance_measure rdfs:label ?metricName .


                ?performance_measure rdfs:label ?metricName .
                ?benchmark_process_individual ?performance_measure ?result
            } ORDER by ?date



        """

    #send query via API
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    #process results as JSON
    processed_results = json.load(result.response)

    #Use accessory function to process results
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))

            out.append(item)


    #this is the final df containing the results of the query
    metricName_df = pd.DataFrame(out, columns=cols)

    #get unique benchmark_process_class_label
    #benchmark_process_class_label = df2["benchmark_process_class_label"].unique()

    #get unique metricName
    metricName_df = metricName_df["metricName"].unique()


    print("Number of metrics: ",len(metricName_df))
    
    return(metricName_df)    

In [10]:
def plot_task_trajectory(ito, class_label):
    import pandas as pd
    input_file_name="get_ratio_df_all_per_global_"+ito+".csv"
    get_ratio_df_all_per_global = pd.read_csv(input_file_name)

    # drop first column
    get_ratio_df_all_per_global = get_ratio_df_all_per_global.drop(["Unnamed: 0"], axis=1)


    # change the column to name the facets of the plot
    get_ratio_df_all_per_global = get_ratio_df_all_per_global.rename(
        columns={"merge": "metricName"}
    )
    get_ratio_df_all_per_global["metricName"] = get_ratio_df_all_per_global[
        "metricName"
    ].str.replace("\\", "")

    get_ratio_df_all_per_global["unique_ds"] = (
        get_ratio_df_all_per_global["ds"].astype(str)
        + ", "
        + get_ratio_df_all_per_global["metricName"]
    )
    get_ratio_df_all_per_global["unique_task"] = (
        get_ratio_df_all_per_global["task"].astype(str)
        + ", "
        + get_ratio_df_all_per_global["metricName"]
    )

    get_ratio_df_all_per_global["unique_task_ds_metric"] = (
        get_ratio_df_all_per_global["task"].astype(str)
        + ", "
        + get_ratio_df_all_per_global["ds"].astype(str)
        + ", "
        + get_ratio_df_all_per_global["metricName"]
    )


    #calculate average data frame using 'RATIO' as criteria FOR unique_task
    #This graph is fine, however, it will display overlapped datasets used for the same task in the same trajectory.
    traj = get_ratio_df_all_per_global.copy()

    #task causing some problems
    traj = traj.drop(traj[traj["metricName"]=="Parameters"].index)

    
    

    # this will delete from the traj data frame, all the ds/tasks which the counts are equal to 1
    # pd.set_option("display.max_rows", None)
    count_df = pd.DataFrame(traj["ds"].value_counts())
    count_df[count_df.ds == 1].index

    
    # the symbol ~ selects reverse to .isin
    
    traj = traj[~traj["ds"].isin(count_df[count_df.ds == 1].index)]

    

    count_df = pd.DataFrame(traj["task"].value_counts())
    count_df[count_df.task == 1].index

    
    traj = traj[~traj["task"].isin(count_df[count_df.task == 1].index)]

    if(len(traj) == 0):
        return("Not enough points to plot trajectory")
    
    
    # this function is declared to add the anchor dots (white dots) to the trajectory
    def add_white(category):
        fig_traj.add_trace(
            go.Scatter(
                x=average_summary_OUT["date"],
                y=average_summary_OUT[category],
                mode="markers",
                name=None,
                marker=dict(
                    size=10,
                    color="white",
                    # opacity=0.5,
                ),
                hovertemplate=average_summary_OUT[category]
                + "<BR>category: "
                + average_summary_OUT[category]
                + "<BR>date: "
                + average_summary_OUT["date"].astype("string")
                + "<BR>ratio: "
                + average_summary_OUT["ratio"].astype("string"),
            )
        )

    # variable_to_plot = "ratio"
    # choose here which grouping variable to use
    average_summary = pd.DataFrame(traj.groupby(["task", "date"])["ratio"].mean())

    # DROP anchors
    # average_summary = average_summary.drop(average_summary[average_summary["ratio"]>0.5].index)


    average_summary.sort_values(by=["date"], ascending=True)

    average_summary.reset_index(inplace=True)
    # average_summary["date"]=pd.to_datetime(average_summary['date'])
    # average_summary["date"]=average_summary["date"].dt.year
    average_summary["in_trajectory"] = 1


    i = 0
    for t in average_summary.task.unique():
        sota_per = 0
        # here the date can't be unique, because we want to look for the best value per year
        for v in average_summary[average_summary["task"] == t].ratio:
            per = average_summary[
                (average_summary["task"] == t) & (average_summary["ratio"] == v)
            ].ratio.astype(float)
            per = per.iloc[0]
            if per >= sota_per:
                # print(per)
                sota_per = per
                average_summary.loc[i, "in_trajectory"] = "IN"
            else:
                # average_summary = average_summary.drop(i)
                average_summary.loc[i, "in_trajectory"] = "IN"  # change back to OUT
                # sota_per = per
            i = i + 1

    # Add OUT to high gain values, normally those of the first results. They will be displayed as white dots.
    # NOTE
    # Ignore this for heatmap
    #average_summary.loc[average_summary["ratio"] > 0.5, "in_trajectory"] = "OUT"

    average_summary["ratio"] = average_summary["ratio"].apply(lambda x: round(x, 2))
    # average_summary = average_summary.rename(columns={'gain': 'gain'})

    # needs to rename here to average of percentage of maximum sota

    
    ###NOW PLOT IT
    # try using plotly
    import pandas as pd
    import plotly.express as px
    from plotly.validators.scatter.marker import SymbolValidator

    average_summary_IN = average_summary[average_summary["in_trajectory"] == "IN"]
    average_summary_OUT = average_summary[average_summary["in_trajectory"] == "OUT"]
    
    
    # This block will take the values from average_summary_IN and delete those that have only one arrow per trajectory
    count_df = pd.DataFrame(average_summary_IN["task"].value_counts())
    count_df[count_df.task == 1].index
    average_summary_IN = average_summary_IN[
        ~average_summary_IN["task"].isin(count_df[count_df.task == 1].index)
    ]
    # this will delete from the traj data frame, all the tasks which the average_summary_IN counts are equal to 1
    # meaning single arrows will be excluded from the plot

    
    fig_traj = px.line(average_summary_IN, x="date", y="task", color="task")

    # this trace adds first all the dates with a data point attached to it
    # fig3_df=average_summary[average_summary["in_trajectory"]=="OUT"]
    # fig3_df["date"]=pd.to_datetime(fig3_df['date'])
    # fig3_df["date"]=fig3_df["date"].dt.year


    # then as use the average_summary df to add only the data that forms a trajectory#
    # this value/date is the average of the percentual of maximum value achieved for the metrics at that date.

    # This adds/remove those points with value < 1 and > 0.5
    

    fig_traj.add_trace(
        go.Scatter(
            x=average_summary_IN["date"],
            y=average_summary_IN["task"],
            mode="markers",
            name=None,
            hovertemplate=average_summary_IN["task"]
            + "<BR>task: "
            + average_summary_IN["task"]
            + "<BR>date: "
            + average_summary_IN["date"].astype("string")
            + "<BR>ratio: "
            + average_summary_IN["ratio"].astype("string"),
            marker=dict(
                size=15,  # alpha ratio
                symbol=48,  # https://plotly.com/python/marker-style/
                opacity=0.7,  # alpha ratio
                color=average_summary_IN["ratio"],  # set color equal to a variable
                colorscale="YlGn",  # one of plotly colorscales
                colorbar=dict(title="ratio", lenmode="pixels"),
                showscale=True,
            ),
        )
    )

    add_white("task")
    
    fig_traj.update_traces(
        marker=dict(line=dict(color="black", width=1)),
        line=dict(width=1, color="black"),
    )

    fig_traj.update_xaxes(showgrid=True, gridcolor="lightBlue", title="Year")
    fig_traj.update_yaxes(showgrid=True, gridcolor="lightBlue", title=ito+": "+class_label)

    fig_traj.update_layout(
        #title="Trajectory for ratio (task per year)",
        title_text='Trajectory for ratio (task per year).<BR><BR>Anchor points (ratio>0.5) removed, trajectories with single arrow removed.',
        showlegend=False,
        plot_bgcolor="white",
        height=2000,
        width=900,
        xaxis=dict(
            tickmode="auto",
        ),
    )  # set the background colour)

    # fig.update_layout(margin_pad=1)

    fig_traj.show()
    fig_traj.write_image("top_classes_trajectory_plots/"+ito+".png")
    
  

### End function definitions

In [11]:
endpoint

'http://192.168.56.1:9999/blazegraph/sparql'

In [12]:
#First select here the list with all top levels that will be used
query = """
        PREFIX ito: <https://identifiers.org/ito:>
        SELECT ?top_level_class ?class_label
        WHERE { ?top_level_class rdfs:subClassOf ito:ITO_01625 ;
                         rdfs:label ?class_label .
                FILTER(?top_level_class != ito:Benchmarking) 
                FILTER(?top_level_class != ito:ITO_01524) 
              }

        """

#send query via API
sparql = SPARQLWrapper(endpoint)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query()

#process results as JSON
processed_results = json.load(result.response)

#Use accessory function to process results
cols = processed_results['head']['vars']

out = []
for row in processed_results['results']['bindings']:
    item = []
    for c in cols:
        item.append(row.get(c, {}).get('value'))
        out.append(item)

#this is the final df containing the results of the query
top_level = pd.DataFrame(out, columns=cols)

top_level = top_level.drop_duplicates().reset_index(drop=True)

top_level

Unnamed: 0,top_level_class,class_label
0,https://identifiers.org/ito:ITO_00101,Vision process
1,https://identifiers.org/ito:ITO_00113,Miscellaneous process
2,https://identifiers.org/ito:ITO_00115,Fundamental AI process
3,https://identifiers.org/ito:ITO_00126,Biomedical AI process
4,https://identifiers.org/ito:ITO_00131,Time Series process
5,https://identifiers.org/ito:ITO_00137,Graph process
6,https://identifiers.org/ito:ITO_00141,Natural Language Processing
7,https://identifiers.org/ito:ITO_00145,Audio process
8,https://identifiers.org/ito:ITO_00310,Adversarial process
9,https://identifiers.org/ito:ITO_00485,Computer code process


In [13]:
#Retrieve metrics used for ito
selected_ito = "ito:ITO_00141"
metricName_df = get_metrics_df(selected_ito)     
#remove here metrics with problems
metricName_df = metricName_df[metricName_df!="Accuracy (pose)"]
metricName_df = metricName_df[metricName_df!="F1 (Sequence)"]



Number of metrics:  250


In [16]:
pd.DataFrame(metricName_df)

Unnamed: 0,0
0,F1
1,Accuracy
2,PPL
3,% Test Accuracy
4,MRR
...,...
245,Cased sacreBLEU
246,ICAT Score
247,PA
248,DE


## Metrics polarities
### Start with the call for the metrics polarities

In [56]:
metrics_polarity[metrics_polarity["metricName"]=="video-to-text R@1"]

Unnamed: 0,task,datasets,metricName,rank_1,rank_2,polarity
577,Video Retrieval,MSR-VTT-1kA,video-to-text R@1,43.5,42.7,pos
601,Video Retrieval,MSVD,video-to-text R@1,58.7,62.0,neg


In [17]:
#get all negative metrics (takes 1-2min to run)
metrics_polarity = get_metrics_polarity()

metricName_negative_list = metrics_polarity[metrics_polarity["polarity"]=="neg"]["metricName"].unique()
metricName_negative_list = metricName_negative_list.tolist()

In [24]:
#setup some pandas display modes
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option('display.max_colwidth', None)

#get polarity report
polarity_df_report = pd.DataFrame(metrics_polarity.groupby(['metricName', 'polarity'])['datasets'].count())
#order
polarity_df_report = polarity_df_report.sort_index(ascending=True)

#display
polarity_df_report_2 = polarity_df_report.copy()

polarity_df_report_2.reset_index(inplace=True)  

#print the polarity report
polarity_df_report_2.head(10)



Unnamed: 0,metricName,polarity,datasets
0,RMSE (Subject-exposed),neg,1
1,three pixel error,neg,1
2,# of clusters (k),pos,1
3,% Test Accuracy,pos,1
4,0..5sec,neg,1
5,1-1,pos,2
6,10 sec,pos,1
7,10%,pos,1
8,10-20% Mask PSNR,pos,1
9,14 gestures accuracy,pos,3


In [27]:
#Summarise how many different polarities are observed for each metric
metrics_polarity_counts =  pd.DataFrame(polarity_df_report_2.groupby(['metricName'])['polarity'].count())

metrics_polarity_counts

Unnamed: 0_level_0,polarity
metricName,Unnamed: 1_level_1
RMSE (Subject-exposed),1
three pixel error,1
# of clusters (k),1
% Test Accuracy,1
0..5sec,1
1-1,1
10 sec,1
10%,1
10-20% Mask PSNR,1
14 gestures accuracy,1


In [28]:
#count how many metrics have been assigned to two polarities
len(metrics_polarity_counts[metrics_polarity_counts["polarity"]==2])


87

In [35]:
#metrics_polarity_counts[metrics_polarity_counts["polarity"]==2].index
pd.DataFrame(metrics_polarity_counts[metrics_polarity_counts["polarity"]==2].index)

Unnamed: 0,metricName
0,ACC@1-100Clients
1,AED
2,AKD
3,AP
4,AVERAGE MAE
5,Abs Rel
6,Acc
7,Accuracy
8,Accuracy (%)
9,Accuracy (Cross-Setup)


In [57]:
metrics_list=metrics_polarity_counts[metrics_polarity_counts["polarity"]==2].index
example = pd.DataFrame({"metrics":metrics_list})
example

Unnamed: 0,metrics
0,ACC@1-100Clients
1,AED
2,AKD
3,AP
4,AVERAGE MAE
5,Abs Rel
6,Acc
7,Accuracy
8,Accuracy (%)
9,Accuracy (Cross-Setup)


In [64]:
metrics_w_2_polarities=["ACC@1-100Clients",
"AED",
"AKD",
"AP",
"AVERAGE MAE",
"Abs Rel",
"Acc",
"Accuracy",
"Accuracy (%)",
"Accuracy (Cross-Setup)",
"Average MAE",
"B1",
"B2",
"B3",
"B4",
"BLEU",
"Class IOU",
"EER",
"EM",
"Edit",
"Error Rate",
"Exact Span F1",
"F-Measure (Seen)",
"F-Measure (Unseen)",
"F1",
"F1 score",
"F1-score",
"F1-score (Augmented)",
"F1@10%",
"F1@25%",
"FID",
"GAR @0.1% FAR Obfuscation",
"GAR @0.1% FAR Overall",
"HR@20",
"Hit@20",
"IS",
"Inception Score",
"IoU",
"IoU overall",
"Jaccard (Decay)",
"Jaccard (Recall)",
"Jaccard (Unseen)",
"L1",
"LPIPS",
"MAE",
"MAE [bpm, session-wise]",
"MAP",
"MAX E-MEASURE",
"MAX F-MEASURE",
"METEOR",
"MKR",
"MPJPE",
"PCK@0.2",
"PCKh",
"PQth",
"PSNR",
"PSNR-B",
"Precision",
"R-Prec",
"R@1",
"RMSE",
"ROUGE-2",
"ROUGE-L",
"Rank-5",
"Recall",
"Restaurant 2014 (F1)",
"SPICE",
"SSIM",
"SSIM (sRGB)",
"Speed (FPS)",
"Top 5 Accuracy",
"Top-5 Accuracy",
"Video-mAP 0.5",
"mAP",
"mAP IOU@0.6",
"mAP IOU@0.7",
"mIoU",
"max E-Measure",
"max E-measure",
"max F-Measure",
"text-to-video Mean Rank",
"text-to-video Median Rank",
"video-to-text R@1",
"video-to-text R@10",
"video-to-text R@5"]

metrics_w_2_polarities_curation=["pos",
"neg",
"neg",
"pos",
"neg",
"pos",
"pos",
"pos",
"pos",
"pos",
"neg",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"neg",
"pos",
"pos",
"neg",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"neg",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"neg",
"pos",
"pos",
"neg",
"neg",
"neg",
"neg",
"pos",
"pos",
"pos",
"pos",
"neg",
"neg",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"neg",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"pos",
"neg",
"neg",
"pos",
"pos",
"pos"]


curation_metrics_w_2_polarities = {'metricName':metrics_w_2_polarities,'curatedPolarity':metrics_w_2_polarities_curation}

curation_metrics_w_2_polarities=pd.DataFrame(curation_metrics_w_2_polarities)

curation_metrics_w_2_polarities

Unnamed: 0,metricName,curatedPolarity
0,ACC@1-100Clients,pos
1,AED,neg
2,AKD,neg
3,AP,pos
4,AVERAGE MAE,neg
5,Abs Rel,pos
6,Acc,pos
7,Accuracy,pos
8,Accuracy (%),pos
9,Accuracy (Cross-Setup),pos


In [80]:
#get the metrics with one polarity
metrics_w_1_polarity = metrics_polarity_counts[metrics_polarity_counts["polarity"]==1].index
metrics_w_1_polarity

Index([' RMSE (Subject-exposed)', ' three pixel error', '# of clusters (k)',
       '% Test Accuracy', '0..5sec', '1-1', '10 sec', '10%',
       '10-20% Mask PSNR', '14 gestures accuracy',
       ...
       'sMOTSA', 'spl', 'tOF', 'text-to-video R@1', 'text-to-video R@10',
       'text-to-video R@5', 'text-to-video R@50',
       'validation mean average precision', 'video-to-text Median Rank',
       'δ1.25'],
      dtype='object', name='metricName', length=662)

In [81]:
metrics_w_1_polarity.tolist()

[' RMSE (Subject-exposed)',
 ' three pixel error',
 '# of clusters (k)',
 '% Test Accuracy',
 '0..5sec',
 '1-1',
 '10 sec',
 '10%',
 '10-20% Mask PSNR',
 '14 gestures accuracy',
 '20-30% Mask PSNR',
 '28 gestures accuracy',
 '3 sec',
 '3-fold Accuracy',
 '30 sec',
 '30-40% Mask PSNR',
 '3DIoU',
 '3DPCK',
 '40-50% Mask PSNR',
 '5..20sec',
 'AAA',
 'ABX-across',
 'ACC',
 'ACC@1-10Clients',
 'ACC@1-50Clients',
 'ACER',
 'ADD',
 'ADDS AUC',
 'AMT',
 'AMrTRE',
 'AOP',
 'AP 0.5',
 'AP Hard',
 'AP Medium',
 "AP at 10' Elevation error",
 "AP at 15' Azimuth error",
 'AP50',
 'AP75',
 'AP@0.15',
 'AP@0.7',
 'APH/L2',
 'APL',
 'APM',
 'APc',
 'APf',
 'APr',
 'AR',
 'AR50',
 'AR75',
 'AR@100',
 'ARI',
 'ARL',
 'ARM',
 'AUC',
 'AUC (ABPA)',
 'AUC (Aspergillus)',
 'AUC (Diabetes)',
 'AUC (E. Coli)',
 'AUC (I. Obstruction)',
 'AUC (K. Pneumonia)',
 'AUC (val)',
 'AUC-J',
 'AUC-J&F',
 'AUC-ROC',
 'AUC0.08 private',
 'AUC@0.1 (all)',
 'AUROC',
 'AVG',
 'AVG-CDS',
 'Accuracy ',
 'Accuracy (10-fold)',
 '

In [91]:
metrics_w_1_polarity_df = polarity_df_report_2[polarity_df_report_2['metricName'] .isin(metrics_w_1_polarity.tolist())][["metricName","polarity"]]
metrics_w_1_polarity_df.head(5)

Unnamed: 0,metricName,polarity
0,RMSE (Subject-exposed),neg
1,three pixel error,neg
2,# of clusters (k),pos
3,% Test Accuracy,pos
4,0..5sec,neg


In [92]:

#curation_metrics_w_2_polarities[curation_metrics_w_2_polarities["metricName"]=="AED"]
curation_metrics_w_2_polarities.head(5)


Unnamed: 0,metricName,curatedPolarity
0,ACC@1-100Clients,pos
1,AED,neg
2,AKD,neg
3,AP,pos
4,AVERAGE MAE,neg


In [95]:
def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

print(intersection(curation_metrics_w_2_polarities["metricName"].tolist(), metrics_w_1_polarity_df["metricName"].tolist()))


#Check this intersection

['Speed (FPS)']


In [None]:
#Continue from here (14-09-2021)

In [31]:
#CONTINUE FROM HERE... how to exclude the negative polarities that have two reported polarities

In [95]:
for met in metricName_df:
    #print(met)
    if met in metricName_negative_list:
        print(met + "\tneg")
    else:
        print(met + "\tpos")

F1	neg
Accuracy	neg
PPL	pos
% Test Accuracy	pos
MRR	pos
Accuracy (2 classes)	pos
MAP	neg
RE+ Micro F1	pos
RE Micro F1	pos
NER Micro F1	pos
BLEU score	pos
Avg F1	pos
Test perplexity	neg
Validation perplexity	pos
BLEU-1	pos
F1 score	neg
Pearson Correlation	pos
Spearman Correlation	pos
MSE	neg
P-at-1	pos
Mean Error Rate	neg
Accuracy (trained on 10k)	pos
Accuracy (trained on 1k)	pos
Error	pos
Percentage correct	pos
Average	neg
DVD	pos
Books	pos
Electronics	pos
Kitchen	pos
CNN	pos
Daily Mail	pos
LAS	pos
UAS	pos
POS	pos
R10-at-1	pos
R10-at-2	pos
R10-at-5	pos
R2-at-1	pos
% Train Accuracy	pos
Parameters	pos
P-at-10%	pos
P-at-30%	pos
CR	pos
ROUGE-1	pos
ROUGE-2	neg
ROUGE-L	neg
Restaurant (Acc)	pos
Laptop (Acc)	neg
Mean Acc (Restaurant + Laptop)	pos
BLEU	neg
Micro Precision	pos
In-KB Accuracy	pos
Bit per Character (BPC)	pos
Accuracy-CN	pos
Accuracy-NE	pos
Unigram Acc	pos
N-gram F1	pos
ROUGE	neg
Number of params	pos
Avg accuracy	pos
R-at-1	pos
R-at-10	pos
R-at-5	pos
Mean Rank	neg
Precision	neg
Rec

In [76]:
metricName_negative_list

['COL',
 'FDE',
 'absolute relative error',
 'RMSE',
 'D3R',
 'ORD',
 'δ1.25',
 'ORD ',
 'Abs Rel',
 'RMSE log',
 'SQ Rel',
 ' three pixel error',
 'SSIM',
 'NIQE',
 'FID',
 'PSNR',
 'FED',
 'LPIPS',
 'Fullset (public)',
 'Mean Error Rate',
 'Mean NME ',
 'Mean NME',
 'Error rate',
 'pose',
 'NME',
 'Mean Reconstruction Error (mm)',
 'Equal Error Rate',
 'MAE',
 'NLDA',
 'GAR @0.1% FAR Obfuscation',
 'GAR @0.1% FAR Overall',
 'GAR @1% FAR Impersonation',
 'GAR @1% FAR Obfuscation',
 'GAR @1% FAR Overall',
 'Reasonable Miss Rate',
 'Heavy MR^-2',
 'Partial MR^-2',
 'Reasonable MR^-2',
 'LCS',
 'MIoU (13 classes)',
 'Quality',
 'IS',
 'Kernel Inception Distance',
 'Top 1 Error',
 'mean Corruption Error (mCE)',
 'Top-1 Error Rate',
 'Word Error Rate (WER)',
 'Percentage error',
 'BLEU',
 'Mean Rank',
 'R@1',
 'Mean',
 'MultiWOZ (Inform)',
 'MultiWOZ (Success)',
 'Average MPJPE (mm)',
 'MSE',
 'MRPE',
 'OOB Rate (10^−3) ',
 'Path Length',
 'Step Change (10^−3)',
 'MAE (PM2.5)',
 'MAE (10% 

In [52]:
metricName_df=["Error"]
metricName_df


['Error']

In [73]:
selected_ito

'ito:ITO_00141'

In [91]:
#claculate ratio dataframe and save as csv
get_ratio_df_all_per_global = get_ratio_df_per_ito(metricName_df, metricName_negative_list)
csv_file_name="get_ratio_df_all_per_global_"+selected_ito+".AllPOL.csv"
csv_file_name=csv_file_name.replace("ito:", "")
get_ratio_df_all_per_global.to_csv(csv_file_name)

####### F1
Creating ratio df for  F1 ,  ACL-ARC - Citation Intent Classification benchmarking , ds_count= 1


  df.new = df.replace(to_replace =' model in .*$', value = '', regex = True)
  ratio = round( gain / (max(sota["result"] - sota.loc[sota.index[0], 'result'])  ) ,4)


###SOTA RESULTS: 1
-41.0 - -41.0 == -41.0 Ratio == -inf
Creating ratio df for  F1 ,  MRPC - Semantic Textual Similarity benchmarking , ds_count= 1
###SOTA RESULTS: 2
-85.9 - -83.1 == -85.9 Ratio == -30.6786
-83.1 - -85.9 == 2.8 Ratio == 1.0
Creating ratio df for  F1 ,  MSRP - Paraphrase Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
-81.48 - -81.3 == -81.48 Ratio == -452.6667
-81.3 - -81.48 == 0.18 Ratio == 1.0
Creating ratio df for  F1 ,  WebQuestions - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
-29.7 - -29.7 == -29.7 Ratio == -inf
Creating ratio df for  F1 ,  SciERC - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
-65.12 - -64.2 == -65.12 Ratio == -70.7826
-64.2 - -65.12 == 0.92 Ratio == 1.0
Creating ratio df for  F1 ,  SensEval 2 Lexical Sample - Word Sense Disambiguation benchmarking , ds_count= 1
###SOTA RESULTS: 1
-66.2 - -66.2 == -66.2 Ratio == -inf
Creating ratio df for  F1 ,  SensEval 3 Lexical Sample - Word Sense Dis

###SOTA RESULTS: 1
-71.6 - -71.6 == -71.6 Ratio == -inf
Creating ratio df for  F1 ,  CoNLL 2000 - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
-90.34 - -90.34 == -90.34 Ratio == -inf
Creating ratio df for  F1 ,  ACE 2005 - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
-72.2 - -72.2 == -72.2 Ratio == -inf
Creating ratio df for  F1 ,  GENIA - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
-74.7 - -73.9 == -74.7 Ratio == -93.375
-73.9 - -74.7 == 0.8 Ratio == 1.0
Creating ratio df for  F1 ,  IEMOCAP - Multimodal Emotion Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
-0.756 - -0.756 == -0.756 Ratio == -inf
Creating ratio df for  F1 ,  SearchQA - Open-Domain Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
-64.5 - -63.6 == -64.5 Ratio == -71.6667
-63.6 - -64.5 == 0.9 Ratio == 1.0
Creating ratio df for  F1 ,  Ontonotes v5 (English) - Entity Typing benchmarking , ds_count= 1
null
Creating rati

###SOTA RESULTS: 1
-84.4 - -84.4 == -84.4 Ratio == -inf
Creating ratio df for  F1 ,  ASOS.com user intent - Intent Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
-0.865 - -0.856 == -0.865 Ratio == -96.1111
-0.856 - -0.865 == 0.009 Ratio == 1.0
Creating ratio df for  F1 ,  _sem 2012 Shared Task: Sherlock Dataset - Negation Scope Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 1
-91.59 - -91.59 == -91.59 Ratio == -inf
Creating ratio df for  F1 ,  BioScope : Full Papers - Negation Scope Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 1
-94.4 - -94.4 == -94.4 Ratio == -inf
Creating ratio df for  F1 ,  SFU Review Corpus - Negation Scope Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 1
-91.25 - -91.25 == -91.25 Ratio == -inf
Creating ratio df for  F1 ,  BioScope : Abstracts - Negation Scope Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 1
-95.74 - -95.74 == -95.74 Ratio == -inf
Creating ratio df for  F1 ,  BioScope : Full Papers - Speculation Scope Reso

  ratio = round( gain / (max(sota["result"] - sota.loc[sota.index[0], 'result'])  ) ,4)


###SOTA RESULTS: 1
-69.6 - -69.6 == -69.6 Ratio == -inf
Creating ratio df for  F1 ,  SoSciSoCi - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
-0.82 - -0.82 == -0.82 Ratio == -inf
number of sota per dataset/metric:  145
####### Accuracy
Creating ratio df for  Accuracy ,  SST-2 Binary classification - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 3
-85.4 - -54.72 == -85.4 Ratio == -2.7836
-82.9 - -85.4 == 2.5 Ratio == 0.0815
-54.72 - -82.9 == 28.18 Ratio == 0.9185
Creating ratio df for  Accuracy ,  MSRP - Paraphrase Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
-72.75 - -71.5 == -72.75 Ratio == -58.2
-71.5 - -72.75 == 1.25 Ratio == 1.0
Creating ratio df for  Accuracy ,  MRPC - Semantic Textual Similarity benchmarking , ds_count= 1
###SOTA RESULTS: 2
-80.4 - -76.2 == -80.4 Ratio == -19.1429
-76.2 - -80.4 == 4.2 Ratio == 1.0
Creating ratio df for  Accuracy ,  SST-5 Fine-grained classification - Sentiment Analysis benchmarking , ds_c

###SOTA RESULTS: 3
-80.3 - -76.5 == -80.3 Ratio == -21.1316
-77.1 - -80.3 == 3.2 Ratio == 0.8421
-76.5 - -77.1 == 0.6 Ratio == 0.1579
Creating ratio df for  Accuracy ,  IEMOCAP - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 1
-56.32 - -56.32 == -56.32 Ratio == -inf
Creating ratio df for  Accuracy ,  Ohsumed - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
-36.2 - -36.2 == -36.2 Ratio == -inf
Creating ratio df for  Accuracy ,  GQA Test2019 - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
-49.74 - -49.74 == -49.74 Ratio == -inf
Creating ratio df for  Accuracy ,  ICSI Meeting Recorder Dialog Act (MRDA) corpus - Dialog Act Classification benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  WOS-11967 - Document Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
-86.07 - -86.07 == -86.07 Ratio == -inf
Creating ratio df for  Accuracy ,  WOS-5736 - Document Classification benchmarking , ds_co

  ratio = round( gain / (max(sota["result"] - sota.loc[sota.index[0], 'result'])  ) ,4)


###SOTA RESULTS: 1
-37.4 - -37.4 == -37.4 Ratio == -inf
Creating ratio df for  Accuracy ,  R8 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
-96.7 - -96.7 == -96.7 Ratio == -inf
Creating ratio df for  Accuracy ,  V-SNLI - Natural Language Inference benchmarking , ds_count= 1
###SOTA RESULTS: 1
-86.41 - -86.41 == -86.41 Ratio == -inf
Creating ratio df for  Accuracy ,  CMU-MOSEI - Multimodal Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
-76.9 - -76.9 == -76.9 Ratio == -inf
Creating ratio df for  Accuracy ,  LAMBADA - Language Modelling benchmarking , ds_count= 1
###SOTA RESULTS: 1
-56.25 - -56.25 == -56.25 Ratio == -inf
Creating ratio df for  Accuracy ,  CLEVR - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
-98.8 - -65.9 == -98.8 Ratio == -3.003
-65.9 - -98.8 == 32.9 Ratio == 1.0
Creating ratio df for  Accuracy ,  Query Wellformedness - Query Wellformedness benchmarking , ds_count= 1
###SOTA RESULTS: 1
-70.7 - -70.7 == -70

null
Creating ratio df for  Accuracy ,  Financial PhraseBank - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
-86 - -86 == -86 Ratio == -inf
Creating ratio df for  Accuracy ,  WikiSQL - Semantic Parsing benchmarking , ds_count= 1
###SOTA RESULTS: 1
-89 - -89 == -89 Ratio == -inf
Creating ratio df for  Accuracy ,  CoNLL-Aida - Entity Linking benchmarking , ds_count= 1
###SOTA RESULTS: 1
-94.9 - -94.9 == -94.9 Ratio == -inf
Creating ratio df for  Accuracy ,  TAC-KBP 2010 - Entity Linking benchmarking , ds_count= 1
###SOTA RESULTS: 1
-89.8 - -89.8 == -89.8 Ratio == -inf
Creating ratio df for  Accuracy ,  MNIST - Handwritten Digit Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
-96.95 - -96.95 == -96.95 Ratio == -inf
number of sota per dataset/metric:  172
####### PPL
Creating ratio df for  PPL ,  One Billion Word - Language Modelling benchmarking , ds_count= 1
###SOTA RESULTS: 2
51.3 - 52.9 == 51.3 Ratio == 32.0625
52.9 - 51.3 == 1.6 Ratio == 1.0
Creating ratio

###SOTA RESULTS: 5
80.8 - 88.6 == 80.8 Ratio == 10.359
83.4 - 80.8 == 2.6 Ratio == 0.3333
83.6 - 83.4 == 0.2 Ratio == 0.0256
88.4 - 83.6 == 4.8 Ratio == 0.6154
88.6 - 88.4 == 0.2 Ratio == 0.0256
Creating ratio df for  NER\\ Micro\\ F1 ,  CoNLL04 - Relation Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 3
80.7 - 87.8 == 80.7 Ratio == 11.3662
85.6 - 80.7 == 4.9 Ratio == 0.6901
87.8 - 85.6 == 2.2 Ratio == 0.3099
number of sota per dataset/metric:  11
####### BLEU\\ score
Creating ratio df for  BLEU\\ score ,  WMT2014 English-French - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 12
34.54 - 45.6 == 34.54 Ratio == 3.123
36.2 - 34.54 == 1.66 Ratio == 0.1501
36.5 - 36.2 == 0.3 Ratio == 0.0271
37.5 - 36.5 == 1.0 Ratio == 0.0904
39.2 - 37.5 == 1.7 Ratio == 0.1537
39.9 - 39.2 == 0.7 Ratio == 0.0633
40.56 - 39.9 == 0.66 Ratio == 0.0597
41.3 - 40.56 == 0.74 Ratio == 0.0669
41.4 - 41.3 == 0.1 Ratio == 0.009
41.5 - 41.4 == 0.1 Ratio == 0.009
43.2 - 41.5 == 1.7 Ratio == 0.15

###SOTA RESULTS: 3
10.64 - 54.64 == 10.64 Ratio == 0.2418
54.37 - 10.64 == 43.73 Ratio == 0.9939
54.64 - 54.37 == 0.27 Ratio == 0.0061
Creating ratio df for  BLEU\\-1 ,  quora - Paraphrase Generation benchmarking , ds_count= 1
###SOTA RESULTS: 2
22.9 - 45.7 == 22.9 Ratio == 1.0044
45.7 - 22.9 == 22.8 Ratio == 1.0
Creating ratio df for  BLEU\\-1 ,  Visual Question Generation - Question Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
36 - 36 == 36 Ratio == inf
Creating ratio df for  BLEU\\-1 ,  DailyDialog - Text Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
14.17 - 14.17 == 14.17 Ratio == inf
Creating ratio df for  BLEU\\-1 ,  COCO - Image Captioning benchmarking , ds_count= 1
###SOTA RESULTS: 1
64.2 - 64.2 == 64.2 Ratio == inf
number of sota per dataset/metric:  13
####### F1\\ score
Creating ratio df for  F1\\ score ,  Penn Treebank - Constituency Parsing benchmarking , ds_count= 1
###SOTA RESULTS: 5
92.1 - 95.6 == 92.1 Ratio == 26.3143
93.8 - 92.1 == 1.7 Ratio == 

###SOTA RESULTS: 2
9.51 - 14.0 == 9.51 Ratio == 2.118
14.0 - 9.51 == 4.49 Ratio == 1.0
Creating ratio df for  Error ,  Yelp Binary classification - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
4.88 - 4.88 == 4.88 Ratio == inf
Creating ratio df for  Error ,  Yelp Fine-grained classification - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 2
37.95 - 46.8 == 37.95 Ratio == 4.2881
46.8 - 37.95 == 8.85 Ratio == 1.0
Creating ratio df for  Error ,  DBpedia - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
1.55 - 1.55 == 1.55 Ratio == inf
Creating ratio df for  Error ,  TREC-50 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
2.8 - 2.8 == 2.8 Ratio == inf
Creating ratio df for  Error ,  Amazon-2 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
2.11 - 3.9 == 2.11 Ratio == 1.1788
3.9 - 2.11 == 1.79 Ratio == 1.0
Creating ratio df for  Error ,  Amazon-5 - Text Classification benchmarking , ds_count= 1
###SO

###SOTA RESULTS: 2
46.5 - 52.4 == 46.5 Ratio == 7.8814
52.4 - 46.5 == 5.9 Ratio == 1.0
Creating ratio df for  P\\-at\\-30% ,  NYT Corpus - Relation Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 2
51.8 - 59.5 == 51.8 Ratio == 6.7273
59.5 - 51.8 == 7.7 Ratio == 1.0
number of sota per dataset/metric:  4
####### CR
Creating ratio df for  CR ,  Google Dataset - Sentence Compression benchmarking , ds_count= 1
###SOTA RESULTS: 2
0.38 - 0.43 == 0.38 Ratio == 7.6
0.43 - 0.38 == 0.05 Ratio == 1.0
number of sota per dataset/metric:  2
####### ROUGE\\-1
Creating ratio df for  ROUGE\\-1 ,  DUC 2004 Task 1 - Extractive Text Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 1
26.55 - 26.55 == 26.55 Ratio == inf
Creating ratio df for  ROUGE\\-1 ,  GigaWord - Text Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 6
30.88 - 38.9 == 30.88 Ratio == 3.8504
31.0 - 30.88 == 0.12 Ratio == 0.015
36.4 - 31.0 == 5.4 Ratio == 0.6733
37.57 - 36.4 == 1.17 Ratio == 0.1459
38.73 - 37.57 == 1.

###SOTA RESULTS: 1
88 - 88 == 88 Ratio == inf
Creating ratio df for  Restaurant\\ \\(Acc\\) ,  SemEval 2015 Task 12 - Aspect-Based Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 2
80.6 - 81.7 == 80.6 Ratio == 73.2727
81.7 - 80.6 == 1.1 Ratio == 1.0
number of sota per dataset/metric:  12
####### Laptop\\ \\(Acc\\)
Creating ratio df for  Laptop\\ \\(Acc\\) ,  SemEval 2014 Task 4 Sub Task 2 - Aspect-Based Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 10
68.13 - 82.29 == 68.13 Ratio == 4.8114
72.21 - 68.13 == 4.08 Ratio == 0.2881
74.49 - 72.21 == 2.28 Ratio == 0.161
75.24 - 74.49 == 0.75 Ratio == 0.053
76.01 - 75.24 == 0.77 Ratio == 0.0544
77.27 - 76.01 == 1.26 Ratio == 0.089
78.99 - 77.27 == 1.72 Ratio == 0.1215
79.93 - 78.99 == 0.94 Ratio == 0.0664
81.35 - 79.93 == 1.42 Ratio == 0.1003
82.29 - 81.35 == 0.94 Ratio == 0.0664
number of sota per dataset/metric:  10
####### Mean\\ Acc\\ \\(Restaurant\\ \\+\\ Laptop\\)
Creating ratio df for  Mean\\ Acc\\ \\(Res

null
number of sota per dataset/metric:  2
####### In\\-KB\\ Accuracy
Creating ratio df for  In\\-KB\\ Accuracy ,  AIDA-CoNLL - Entity Disambiguation benchmarking , ds_count= 1
###SOTA RESULTS: 3
93.1 - 95.0 == 93.1 Ratio == 49.0
94.7 - 93.1 == 1.6 Ratio == 0.8421
95.0 - 94.7 == 0.3 Ratio == 0.1579
number of sota per dataset/metric:  3
####### Bit\\ per\\ Character\\ \\(BPC\\)
Creating ratio df for  Bit\\ per\\ Character\\ \\(BPC\\) ,  Text8 - Language Modelling benchmarking , ds_count= 1
###SOTA RESULTS: 1
1.63 - 1.63 == 1.63 Ratio == inf
Creating ratio df for  Bit\\ per\\ Character\\ \\(BPC\\) ,  Hutter Prize - Language Modelling benchmarking , ds_count= 1
###SOTA RESULTS: 1
1.31 - 1.31 == 1.31 Ratio == inf
Creating ratio df for  Bit\\ per\\ Character\\ \\(BPC\\) ,  enwik8 - Language Modelling benchmarking , ds_count= 1
###SOTA RESULTS: 3
1.27 - 1.34 == 1.27 Ratio == 18.1429
1.32 - 1.27 == 0.05 Ratio == 0.7143
1.34 - 1.32 == 0.02 Ratio == 0.2857
Creating ratio df for  Bit\\ per\\ Cha

###SOTA RESULTS: 5
76.88 - 86.73 == 76.88 Ratio == 7.8051
78.1 - 76.88 == 1.22 Ratio == 0.1239
79.75 - 78.1 == 1.65 Ratio == 0.1675
80.63 - 79.75 == 0.88 Ratio == 0.0893
86.73 - 80.63 == 6.1 Ratio == 0.6193
Creating ratio df for  R\\-at\\-5 ,  Flickr30k Entities Test - Phrase Grounding benchmarking , ds_count= 1
###SOTA RESULTS: 2
84.22 - 84.98 == 84.22 Ratio == 110.8158
84.98 - 84.22 == 0.76 Ratio == 1.0
Creating ratio df for  R\\-at\\-5 ,  Flickr30k Entities Dev - Phrase Grounding benchmarking , ds_count= 1
###SOTA RESULTS: 1
84.49 - 84.49 == 84.49 Ratio == inf
number of sota per dataset/metric:  13
####### Mean\\ Rank
Creating ratio df for  Mean\\ Rank ,  VisDial v0.9 val - Visual Dialog benchmarking , ds_count= 1
###SOTA RESULTS: 1
5.84 - 5.84 == 5.84 Ratio == inf
number of sota per dataset/metric:  1
####### Precision
Creating ratio df for  Precision ,  Ubuntu Dialogue (Activity) - Dialog Generation benchmarking , ds_count= 1
null
Creating ratio df for  Precision ,  Ubuntu Dialogu

###SOTA RESULTS: 3
-64.1 - -59.95 == -64.1 Ratio == -15.4458
-62.5 - -64.1 == 1.6 Ratio == 0.3855
-59.95 - -62.5 == 2.55 Ratio == 0.6145
Creating ratio df for  EM ,  SQuAD1.1 - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
-54.505 - -54.505 == -54.505 Ratio == -inf
Creating ratio df for  EM ,  SQuAD1.1 - Open-Domain Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
-66.2 - -65.5 == -66.2 Ratio == -94.5714
-65.5 - -66.2 == 0.7 Ratio == 1.0
Creating ratio df for  EM ,  NewsQA - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
-43.7 - -43.7 == -43.7 Ratio == -inf
Creating ratio df for  EM ,  Quasart-T - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
-37.7 - -37.7 == -37.7 Ratio == -inf
Creating ratio df for  EM ,  SearchQA - Open-Domain Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
-41.9 - -41.9 == -41.9 Ratio == -inf
Creating ratio df for  EM ,  TriviaQA - Question Answering benchmarking , ds_count= 1


###SOTA RESULTS: 2
9.91 - 19.03 == 9.91 Ratio == 1.0866
19.03 - 9.91 == 9.12 Ratio == 1.0
Creating ratio df for  P\\-at\\-5 ,  Medical domain - Hypernym Discovery benchmarking , ds_count= 1
###SOTA RESULTS: 3
20.71 - 36.77 == 20.71 Ratio == 1.2895
21.43 - 20.71 == 0.72 Ratio == 0.0448
36.77 - 21.43 == 15.34 Ratio == 0.9552
Creating ratio df for  P\\-at\\-5 ,  AAPD - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
41.19 - 41.19 == 41.19 Ratio == inf
Creating ratio df for  P\\-at\\-5 ,  Amazon-12K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
63.16 - 63.16 == 63.16 Ratio == inf
Creating ratio df for  P\\-at\\-5 ,  Kan-Shan Cup - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
25.88 - 25.88 == 25.88 Ratio == inf
Creating ratio df for  P\\-at\\-5 ,  Wiki-30K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
62.87 - 62.87 == 62.87 Ratio == inf
Creating ratio df for  P\

Creating ratio df for  Average\\ Recall ,  SemEval 2017 Task 4-A - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
0.685 - 0.685 == 0.685 Ratio == inf
Creating ratio df for  Average\\ Recall ,  ArSAS - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
0.9 - 0.9 == 0.9 Ratio == inf
Creating ratio df for  Average\\ Recall ,  ASTD - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
0.62 - 0.62 == 0.62 Ratio == inf
number of sota per dataset/metric:  3
####### F1\\-score
Creating ratio df for  F1\\-score ,  SemEval - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
0.685 - 0.685 == 0.685 Ratio == inf
Creating ratio df for  F1\\-score ,  GeNeVA (i-CLEVR) - Text-to-Image Generation benchmarking , ds_count= 1
null
Creating ratio df for  F1\\-score ,  GeNeVA (CoDraw) - Text-to-Image Generation benchmarking , ds_count= 1
null
Creating ratio df for  F1\\-score ,  200k Short Texts for Humor Detection - Humor Detection benchmarking , d

Creating ratio df for  Open ,  GQA Test2019 - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 3
34.83 - 47.64 == 34.83 Ratio == 2.719
45.47 - 34.83 == 10.64 Ratio == 0.8306
47.64 - 45.47 == 2.17 Ratio == 0.1694
number of sota per dataset/metric:  3
####### Execution\\ Accuracy
Creating ratio df for  Execution\\ Accuracy ,  WikiSQL - Code Generation benchmarking , ds_count= 1
###SOTA RESULTS: 6
35.9 - 89.2 == 35.9 Ratio == 0.6735
59.4 - 35.9 == 23.5 Ratio == 0.4409
68.0 - 59.4 == 8.6 Ratio == 0.1614
74.6 - 68.0 == 6.6 Ratio == 0.1238
82.6 - 74.6 == 8.0 Ratio == 0.1501
89.2 - 82.6 == 6.6 Ratio == 0.1238
number of sota per dataset/metric:  6
####### Exact\\ Match\\ Accuracy
Creating ratio df for  Exact\\ Match\\ Accuracy ,  WikiSQL - Code Generation benchmarking , ds_count= 1
###SOTA RESULTS: 5
23.4 - 83.7 == 23.4 Ratio == 0.3881
48.3 - 23.4 == 24.9 Ratio == 0.4129
62.8 - 48.3 == 14.5 Ratio == 0.2405
68.6 - 62.8 == 5.8 Ratio == 0.0962
83.7 - 68.6 == 15.1 Ratio == 0.2

###SOTA RESULTS: 1
31.1 - 31.1 == 31.1 Ratio == inf
number of sota per dataset/metric:  5
####### P\\-at\\-20
Creating ratio df for  P\\-at\\-20 ,  TREC Robust04 - Ad-Hoc Information Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 6
0.382 - 0.4667 == 0.382 Ratio == 4.51
0.389 - 0.382 == 0.007 Ratio == 0.0826
0.3948 - 0.389 == 0.0058 Ratio == 0.0685
0.4064 - 0.3948 == 0.0116 Ratio == 0.137
0.4287 - 0.4064 == 0.0223 Ratio == 0.2633
0.4667 - 0.4287 == 0.038 Ratio == 0.4486
number of sota per dataset/metric:  6
####### LPIPS
Creating ratio df for  LPIPS ,  Multi-Modal-CelebA-HQ - Text-to-Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
-0.512 - -0.512 == -0.512 Ratio == -inf
number of sota per dataset/metric:  1
####### Acc
Creating ratio df for  Acc ,  Multi-Modal-CelebA-HQ - Text-to-Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
-13.0 - -13.0 == -13.0 Ratio == -inf
number of sota per dataset/metric:  1
####### Real
Creating ratio df for  Real ,  Multi-

Creating ratio df for  Macro\\ F1 ,  Freebase FIGER - Entity Typing benchmarking , ds_count= 1
###SOTA RESULTS: 1
84.2 - 84.2 == 84.2 Ratio == inf
Creating ratio df for  Macro\\ F1 ,  FIGER - Entity Linking benchmarking , ds_count= 1
###SOTA RESULTS: 1
76.51 - 76.51 == 76.51 Ratio == inf
Creating ratio df for  Macro\\ F1 ,  NLP-TDMS (Exp, arXiv only) - Scientific Results Extraction benchmarking , ds_count= 1
null
Creating ratio df for  Macro\\ F1 ,  RCV1 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
60.1 - 60.1 == 60.1 Ratio == inf
Creating ratio df for  Macro\\ F1 ,  PWC Leaderboards (restricted) - Scientific Results Extraction benchmarking , ds_count= 1
null
number of sota per dataset/metric:  3
####### Micro\\ F1
Creating ratio df for  Micro\\ F1 ,  Freebase FIGER - Entity Typing benchmarking , ds_count= 1
###SOTA RESULTS: 1
85.7 - 85.7 == 85.7 Ratio == inf
Creating ratio df for  Micro\\ F1 ,  FIGER - Entity Linking benchmarking , ds_count= 1
###SOTA RESULTS: 1

###SOTA RESULTS: 1
66.2 - 66.2 == 66.2 Ratio == inf
number of sota per dataset/metric:  1
####### F1\\ \\(Short\\)
Creating ratio df for  F1\\ \\(Short\\) ,  Natural Questions - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
52.1 - 52.1 == 52.1 Ratio == inf
number of sota per dataset/metric:  1
####### Bits\\ per\\ byte
Creating ratio df for  Bits\\ per\\ byte ,  The Pile - Language Modelling benchmarking , ds_count= 1
###SOTA RESULTS: 1
1.2253 - 1.2253 == 1.2253 Ratio == inf
number of sota per dataset/metric:  1
####### interest\\ \\(human\\)
Creating ratio df for  interest\\ \\(human\\) ,  Reddit (multi-ref) - Dialog Generation benchmarking , ds_count= 1
null
####### relevance\\ \\(human\\)
Creating ratio df for  relevance\\ \\(human\\) ,  Reddit (multi-ref) - Dialog Generation benchmarking , ds_count= 1
null
####### Accuracy\\ \\(3\\-way\\)
Creating ratio df for  Accuracy\\ \\(3\\-way\\) ,  SemEval 2014 Task 4 Subtask 4 - Aspect-Based Sentiment Analysis benchmarkin

Creating ratio df for  Restaurant\\ 2014\\ \\(F1\\) ,  SemEval - Aspect Term Extraction and Sentiment Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
70.72 - 70.72 == 70.72 Ratio == inf
number of sota per dataset/metric:  1
####### Laptop\\ 2014\\ \\(F1\\)
Creating ratio df for  Laptop\\ 2014\\ \\(F1\\) ,  SemEval - Aspect Term Extraction and Sentiment Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
61.73 - 61.73 == 61.73 Ratio == inf
number of sota per dataset/metric:  1
####### Restaurant\\ 2015\\ \\(F1\\)
Creating ratio df for  Restaurant\\ 2015\\ \\(F1\\) ,  SemEval - Aspect Term Extraction and Sentiment Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
60.22 - 60.22 == 60.22 Ratio == inf
number of sota per dataset/metric:  1
####### A1
Creating ratio df for  A1 ,  ANLI test - Natural Language Inference benchmarking , ds_count= 1
###SOTA RESULTS: 2
70.3 - 72.4 == 70.3 Ratio == 33.4762
72.4 - 70.3 == 2.1 Ratio == 1.0
number of sota per dataset/me

In [92]:
get_ratio_df_all_per_global[get_ratio_df_all_per_global["ds"]=="R8"]

Unnamed: 0,ds_count,task,ds,date,model_label,value,percent_of_max_sota,gain,ratio,max_sota,percent_of_max_metric,merge
104,1,Text Classification,R8,2018-06,TextEnt-full,-96.7,100.0,-96.7,-inf,-96.7,312.94,Accuracy
0,1,Text Classification,R8,2018-06,TextEnt-full,91.0,99.24,91.0,130.0,91.7,0.99,F\\-measure
1,1,Text Classification,R8,2019-09,NABoE-full,91.7,100.0,0.7,1.0,91.7,1.0,F\\-measure


In [62]:
i=0 ###try the rest
while i < len(top_level["top_level_class"]):
    print(top_level["top_level_class"][i])
    i = i+1

https://identifiers.org/ito:ITO_00101
https://identifiers.org/ito:ITO_00113
https://identifiers.org/ito:ITO_00115
https://identifiers.org/ito:ITO_00126
https://identifiers.org/ito:ITO_00131
https://identifiers.org/ito:ITO_00137
https://identifiers.org/ito:ITO_00141
https://identifiers.org/ito:ITO_00145
https://identifiers.org/ito:ITO_00310
https://identifiers.org/ito:ITO_00485
https://identifiers.org/ito:ITO_00491
https://identifiers.org/ito:ITO_00528
https://identifiers.org/ito:ITO_00600
https://identifiers.org/ito:ITO_00873
https://identifiers.org/ito:ITO_01532
https://identifiers.org/ito:ITO_00506x


In [87]:
#This block executes three steps:
#1) Retrieves the metrics list for one selected ito (normally a top class);
#2) Calculates the ratio dataframe for each metric to plot the trajectory (saves it as csv);
#3) Plots the trajectory pot for the ITO.
i=0
while i < len(top_level["top_level_class"]):
    print(top_level["top_level_class"][i])
    selected_ito = top_level["top_level_class"][i].replace("https://identifiers.org/","")
    class_label = top_level["class_label"][i]
    
    
    #Retrieve metrics used for ito
    metricName_df = get_metrics_df(selected_ito)     
    #remove here metrics with problems
    metricName_df = metricName_df[metricName_df!="Accuracy (pose)"]
    metricName_df = metricName_df[metricName_df!="F1 (Sequence)"]
    
    #claculate ratio dataframe and save as csv
    get_ratio_df_all_per_global = get_ratio_df_per_ito(metricName_df)
    csv_file_name="get_ratio_df_all_per_global_"+selected_ito+".csv"
    csv_file_name=csv_file_name.replace("ito:", "")
    get_ratio_df_all_per_global.to_csv(csv_file_name)
    
    #plot trajectory
    ito = selected_ito.replace("ito:","")
    plot_task_trajectory(ito, class_label)
    
    i = i+1

https://identifiers.org/ito:ITO_00101
Number of metrics:  607
####### AMrTRE
Creating ratio df for  AMrTRE ,  CIMA-10k - BIRL: Benchmark on Image Registration methods with Landmark validations benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MMrTRE
Creating ratio df for  MMrTRE ,  CIMA-10k - BIRL: Benchmark on Image Registration methods with Landmark validations benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Mean\\ target\\ overlap\\ ratio
Creating ratio df for  Mean\\ target\\ overlap\\ ratio ,  CUMC12 - Diffeomorphic Medical Image Registration benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### RMSE
Creating ratio df for  RMSE ,  Automatic Cardiac Diagnosis Challenge (ACDC) - Diffeomorphic Medical Image Registration benchmarking , ds_count= 1
null
Creating ratio df for  RMSE ,  NYU-Depth V2 - Monocular Depth Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  RMSE ,  CARPK - Object Counting benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  RMSE ,  Mid-Air Dataset - Monocular Depth Estimation benchmarking , ds_count= 1
###SOTA R

###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  Cambridge - Hand Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  UT - Human Interaction Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  MNIST - Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  FER2013 - Facial Expression Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  ImageNet - 0-Shot - Few-Shot Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  ImageNet-10 - Image Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  CIFAR-10 - Image Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Accuracy ,  Imagenet-dog-15 - Image Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  Tiny-ImageNet

###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  CUB Birds - Image Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  Stanford Dogs - Image Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  Stanford Cars - Image Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  Flowers-102 - 0-Shot - Few-Shot Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  HMDBfull-to-UCF - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  VisDA2017 - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  VQA v2 test-dev - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 9
Creating ratio df for  Accuracy ,  ReferIt - Phrase Grounding benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Flickr30k Ent

null
Creating ratio df for  Accuracy ,  TuSimple - Lane Detection benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  STL-10 - Semi-Supervised Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  IJB-B - Face Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  IJB-A - Face Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  MovieQA - Video Story QA benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  PETA - Pedestrian Attribute Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  PA-100K - Pedestrian Attribute Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  RAP - Pedestrian Attribute Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  SFEW - Facial Expression Recognition 

###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  LboroHAR - Multimodal Activity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  street2shop - topwear - Image Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Northwestern University - Hand Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  CapgMyo DB-c - Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Ninapro DB-1 8 gestures - Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Ninapro DB-1 12 gestures - Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  CapgMyo DB-b - Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  FERG - Facial Expression Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1

null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) abstract 1.0 multiple choice - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) real images 1.0 open ended - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) real images 1.0 multiple choice - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  Visual7W - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Percentage\\ correct ,  Visual Genome (subjects) - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  Visual Genome (pairs) - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  cifar10, 250 Labels - Semi-Supervised

###SOTA RESULTS: 1
Creating ratio df for  Top\\-1\\ Accuracy ,  VTAB-1k - Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  Top\\-1\\ Accuracy ,  ObjectNet - Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Top\\-1\\ Accuracy ,  EPIC-KITCHENS-55 - Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Top\\-1\\ Accuracy ,  EgoGesture - Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  26
####### Average\\ MPJPE\\ \\(mm\\)
Creating ratio df for  Average\\ MPJPE\\ \\(mm\\) ,  Human3.6M - 3D Human Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Average\\ MPJPE\\ \\(mm\\) ,  Human3.6M - Monocular 3D Human Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ MPJPE\\ \\(mm\\) ,  Total Capture - 3D Human Pose Estimation benchmarking , ds_count= 1
###SOT

###SOTA RESULTS: 1
Creating ratio df for  Rank\\-1 ,  UAV-Human - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Rank\\-1 ,  DukeTracklet - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Rank\\-1 ,  Market-1501->MSMT17 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Rank\\-1 ,  DukeMTMC-reID->MSMT17 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Rank\\-1 ,  iLIDS-VID - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Rank\\-1 ,  DukeMTMC-reID->Market-1501 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Rank\\-1 ,  Market-1501->DukeMTMC-reID - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  83
#####

###SOTA RESULTS: 4
Creating ratio df for  Average\\ Accuracy ,  Office-31 - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Average\\ Accuracy ,  Scan2CAD - 3D Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ Accuracy ,  ScanNet - Scene Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ Accuracy ,  BP4D - Facial Action Unit Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ Accuracy ,  UAV-Human - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ Accuracy ,  EGTEA - Egocentric Activity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  20
####### Mean\\ IoU
Creating ratio df for  Mean\\ IoU ,  PASCAL VOC 2012 test - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 10
Creating ratio df for  Mean\\ IoU ,  S

###SOTA RESULTS: 3
Creating ratio df for  AP ,  KITTI Pedestrian Moderate val - Birds Eye View Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AP ,  KITTI Cyclist Moderate val - Birds Eye View Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AP ,  KITTI Cyclist Easy val - Birds Eye View Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AP ,  KITTI Cars Hard - Birds Eye View Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  AP ,  KITTI Cyclist Hard val - Birds Eye View Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AP ,  KITTI Pedestrian Hard val - Birds Eye View Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AP ,  KITTI Pedestrians Easy - 3D Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  AP ,  KITTI Cars Moderate - 3D Object D

###SOTA RESULTS: 3
Creating ratio df for  mIoU ,  ADE20K-Outdoor Labels-to-Photos - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  mIoU ,  Cityscapes Labels-to-Photo - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  mIoU ,  ADE20K Labels-to-Photos - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  mIoU ,  S3DIS - 3D Instance Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  mIoU ,  2018 Data Science Bowl - Medical Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  mIoU ,  Cityscapes val - Panoptic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  mIoU ,  SYNTHIA-to-Cityscapes - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  mIoU ,  MICHE - Iris Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Cre

###SOTA RESULTS: 3
Creating ratio df for  Recall ,  3DMatch Benchmark - Point Cloud Registration benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Recall ,  ICDAR 2015 - Scene Text Detection benchmarking , ds_count= 1
###SOTA RESULTS: 8
Creating ratio df for  Recall ,  COCO-Text - Scene Text Detection benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Recall ,  Anatomical Tracings of Lesions After Stroke (ATLAS) - Lesion Segmentation benchmarking , ds_count= 1
null
Creating ratio df for  Recall ,  MSRA-TD500 - Scene Text Detection benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Recall ,  Total-Text - Scene Text Detection benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Recall ,  ICDAR 2017 MLT - Scene Text Detection benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Recall ,  SCUT-CTW1500 - Scene Text Detection benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Recall ,  Pu

###SOTA RESULTS: 1
number of sota per dataset/metric:  13
####### Frame\\ \\(fps\\)
Creating ratio df for  Frame\\ \\(fps\\) ,  Cityscapes test - Real-Time Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  Frame\\ \\(fps\\) ,  CamVid - Real-Time Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Frame\\ \\(fps\\) ,  MSCOCO - Real-time Instance Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  11
####### PSNR
Creating ratio df for  PSNR ,  Urban100 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 9
Creating ratio df for  PSNR ,  Set5 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 11
Creating ratio df for  PSNR ,  FFHQ 1024 x 1024 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  PSNR ,  Manga109 - 4x upscaling - Image Super-Resolution

###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  Clip300 sigma15 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  Set12 sigma15 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  PSNR ,  Clip300 sigma35 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  Clip300 sigma25 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  Clip300 sigma60 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  BSD68 sigma75 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  Clip300 sigma50 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PSNR ,  Set12 sigma25 - Grayscale Image Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df f

Creating ratio df for  SSIM ,  Set5 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  SSIM ,  FFHQ 256 x 256 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  SSIM ,  FFHQ 1024 x 1024 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  SSIM ,  Vid4 - 4x upscaling - Video Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  SSIM ,  Manga109 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  SSIM ,  Urban100 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 9
Creating ratio df for  SSIM ,  BSD100 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  SSIM ,  Set14 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RE

###SOTA RESULTS: 1
Creating ratio df for  SSIM ,  SOTS Indoor - Image Dehazing benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  SSIM ,  SOTS Outdoor - Image Dehazing benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  SSIM ,  Middlebury - Video Frame Interpolation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  126
####### FID
Creating ratio df for  FID ,  FFHQ 256 x 256 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  FID ,  FFHQ 1024 x 1024 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  FID ,  CUB 128 x 128 - Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  FID ,  Stanford Dogs - Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  FID ,  Stanford Cars - Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating 

###SOTA RESULTS: 1
Creating ratio df for  FID ,  LSUN Car 512 x 384 - Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  FID ,  AFHQ - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  FID ,  CelebA-HQ - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  FID ,  Deep-Fashion - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  96
####### MOVIE
Creating ratio df for  MOVIE ,  Vid4 - 4x upscaling - Video Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### MS\\-SSIM
Creating ratio df for  MS\\-SSIM ,  FFHQ 256 x 256 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  MS\\-SSIM ,  FFHQ 1024 x 1024 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio d

###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  NIH - Lung Nodule Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  BRATS 2018 val - Brain Tumor Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  HSVM - Medical Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  CHAOS MRI Dataset - Medical Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  Brain MRI segmentation - Brain Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  Lung Nodule  - Lung Nodule Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  29
####### Warping\\ Error
Creating ratio df for  Warping\\ Error ,  ISBI 2012 EM Segmentation - Medical Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per da

###SOTA RESULTS: 1
Creating ratio df for  text\\-to\\-video\\ Median\\ Rank ,  MSVD - Video Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  text\\-to\\-video\\ Median\\ Rank ,  DiDeMo - Video Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  text\\-to\\-video\\ Median\\ Rank ,  ActivityNet - Video Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  8
####### text\\-to\\-video\\ R\\-at\\-1
Creating ratio df for  text\\-to\\-video\\ R\\-at\\-1 ,  YouCook2 - Video Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  text\\-to\\-video\\ R\\-at\\-1 ,  MSR-VTT - Video Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  text\\-to\\-video\\ R\\-at\\-1 ,  LSMDC - Video Retrieval benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  text\\-to\\-video\\ R\\-at\\-1 ,  MSR-VTT-1kA - Video Retrieval benchmarking , ds_count= 1
###SOTA 

###SOTA RESULTS: 1
Creating ratio df for  FPS ,  K2HPD - 3D Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  FPS ,  ICVL Hands - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  FPS ,  NYU Hands - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  12
####### AP75
Creating ratio df for  AP75 ,  SKU-110K - Dense Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  AP75 ,  COCO test-dev - Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 10
Creating ratio df for  AP75 ,  COCO test-dev - Keypoint Detection benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  AP75 ,  COCO test-dev - Multi-Person Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  AP75 ,  COCO test-dev - Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 9
Creating ratio df for  AP75 ,  COCO miniva

###SOTA RESULTS: 1
Creating ratio df for  AP50 ,  nuScenes-F - 3D Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  47
####### MOS
Creating ratio df for  MOS ,  Set5 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  MOS ,  BSD100 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  MOS ,  Set14 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  MOS ,  CelebA + AFLW Unaligned - Face Alignment benchmarking , ds_count= 1
null
Creating ratio df for  MOS ,  CelebA Aligned - Face Alignment benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  8
####### Inception\\ score
Creating ratio df for  Inception\\ score ,  CIFAR-10 - Conditional Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  Inception\\ score ,  CIFAR

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Mean\\ NME
Creating ratio df for  Mean\\ NME ,  AFLW2000-3D - Face Alignment benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ NME ,  Florence - 3D Face Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ NME ,  AFLW2000-3D - 3D Face Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Mean\\ NME ,  AFLW-Full - Face Alignment benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ NME ,  AFLW-LFPA - Face Alignment benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ NME ,  AFLW-Full - Facial Landmark Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Mean\\ NME ,  AFLW-Front - Facial Landmark Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ NME ,  AFLW - Face Alignment benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creatin

###SOTA RESULTS: 2
number of sota per dataset/metric:  9
####### F1@25%
Creating ratio df for  F1@25% ,  GTEA - Action Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  F1@25% ,  Breakfast - Action Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1@25% ,  50 Salads - Action Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  9
####### F1@10%
Creating ratio df for  F1@10% ,  GTEA - Action Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  F1@10% ,  50 Salads - Action Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1@10% ,  Breakfast - Action Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  9
####### F\\-measure
Creating ratio df for  F\\-measure ,  DUTS-TE - RGB Salient Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  F\\-measure ,  

###SOTA RESULTS: 2
Creating ratio df for  Overall\\ Accuracy ,  ScanObjectNN - 3D Point Cloud Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Overall\\ Accuracy ,  Salinas Scene - Hyperspectral Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Overall\\ Accuracy ,  RAF-DB - Facial Expression Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  16
####### m\\-reIRMSE
Creating ratio df for  m\\-reIRMSE ,  COCO count-test - Object Counting benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### m\\-reIRMSE\\-nz
Creating ratio df for  m\\-reIRMSE\\-nz ,  COCO count-test - Object Counting benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  m\\-reIRMSE\\-nz ,  Pascal VOC 2007 count-test - Object Counting benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  4
####### m\\-relRMSE
Creating ratio df 

Creating ratio df for  F\\-measure\\ \\(Recall\\) ,  DAVIS 2017 (val) - Semi-Supervised Video Object Segmentation benchmarking , ds_count= 1
null
Creating ratio df for  F\\-measure\\ \\(Recall\\) ,  DAVIS 2016 - Unsupervised Video Object Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  7
####### Jaccard\\ \\(Decay\\)
Creating ratio df for  Jaccard\\ \\(Decay\\) ,  DAVIS 2016 - Semi-Supervised Video Object Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Jaccard\\ \\(Decay\\) ,  DAVIS 2017 (test-dev) - Semi-Supervised Video Object Segmentation benchmarking , ds_count= 1
null
Creating ratio df for  Jaccard\\ \\(Decay\\) ,  DAVIS 2017 (val) - Semi-Supervised Video Object Segmentation benchmarking , ds_count= 1
null
Creating ratio df for  Jaccard\\ \\(Decay\\) ,  DAVIS 2016 - Unsupervised Video Object Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### Jaccard

###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### LPIPS
Creating ratio df for  LPIPS ,  FFHQ 512 x 512 - 4x upscaling - Image Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  LPIPS ,  Edge-to-Handbags - Image Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  LPIPS ,  Edge-to-Shoes - Image Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  LPIPS ,  FFHQ 512 x 512 - 16x upscaling - Face Hallucination benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  LPIPS ,  Multi-Modal-CelebA-HQ - Text-to-Image Generation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  LPIPS ,  Deep-Fashion - Pose Transfer benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  LPIPS ,  Edge-to-Clothes - Image Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  LPIPS ,  CelebA-HQ - Image-to-Image Translation benchmarkin

###SOTA RESULTS: 9
Creating ratio df for  APM ,  MSCOCO - Real-time Instance Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  41
####### Test\\ AP
Creating ratio df for  Test\\ AP ,  COCO - Keypoint Detection benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### O\\ \\(Average\\ of\\ Measures\\)
Creating ratio df for  O\\ \\(Average\\ of\\ Measures\\) ,  YouTube-VOS - Visual Object Tracking benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### AR
Creating ratio df for  AR ,  COCO test-dev - Keypoint Detection benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  AR ,  COCO test-dev - Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  AR ,  COCO test-challenge - Keypoint Detection benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  AR ,  nuScenes-F - 3D Object Detection benchmarking , ds_count= 1
###SO

Creating ratio df for  Average\\ 3D\\ Error ,  Florence - 3D Face Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Average\\ 3D\\ Error ,  ICVL Hands - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ 3D\\ Error ,  MSRA Hands - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Average\\ 3D\\ Error ,  NYU Hands - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ 3D\\ Error ,  HANDS 2017 - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Average\\ 3D\\ Error ,  HANDS 2019 - Hand Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  9
####### Number\\ of\\ Views
Creating ratio df for  Number\\ of\\ Views ,  Human3.6M - Weakly-supervised 3D Human Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset

###SOTA RESULTS: 2
Creating ratio df for  Average\\ Recall ,  EGTEA - Long-tail Learning benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Average\\ Recall ,  ScanNet - Scene Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Average\\ Recall ,  3DMatch Benchmark - 3D Feature Matching benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  5
####### Video\\-mAP\\ 0\\.2
Creating ratio df for  Video\\-mAP\\ 0\\.2 ,  UCF101-24 - Action Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Video\\-mAP\\ 0\\.1
Creating ratio df for  Video\\-mAP\\ 0\\.1 ,  UCF101-24 - Action Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Mean\\ ADD
Creating ratio df for  Mean\\ ADD ,  LineMOD - 6D Pose Estimation using RGB benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Mean\\ ADD ,  YCB-Video - 6D Pose Estimatio

###SOTA RESULTS: 1
Creating ratio df for  Rank\\-10 ,  DukeMTMC-reID->MSMT17 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Rank\\-10 ,  Market-1501 - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Rank\\-10 ,  MARS - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Rank\\-10 ,  DukeMTMC-reID->Market-1501 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Rank\\-10 ,  Market-1501->DukeMTMC-reID - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Rank\\-10 ,  DukeMTMC-reID - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Rank\\-10 ,  iLIDS-VID - Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  19
####### AUC0\\.08\\ pri

null
number of sota per dataset/metric:  7
####### Log\\-Spectral\\ Distance
Creating ratio df for  Log\\-Spectral\\ Distance ,  Piano - Audio Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Log\\-Spectral\\ Distance ,  VCTK Multi-Speaker - Audio Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Log\\-Spectral\\ Distance ,  Voice Bank corpus (VCTK) - Audio Super-Resolution benchmarking , ds_count= 1
null
number of sota per dataset/metric:  2
####### Interpolation\\ Error
Creating ratio df for  Interpolation\\ Error ,  Middlebury - Video Frame Interpolation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### PSNR\\-B
Creating ratio df for  PSNR\\-B ,  LIVE1 (Quality 10 Color) - JPEG Artifact Correction benchmarking , ds_count= 1
null
Creating ratio df for  PSNR\\-B ,  LIVE1 (Quality 20 Grayscale) - JPEG Artifact Correction benchmarking , ds_count= 1
null
Creating ratio df fo

###SOTA RESULTS: 1
number of sota per dataset/metric:  4
####### 10%
Creating ratio df for  10% ,  J-HMBD Early Action - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  10% ,  2019_test set - Face Anonymization benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### ADDS\\ AUC
Creating ratio df for  ADDS\\ AUC ,  YCB-Video - 6D Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### Mean\\ ADD\\-S
Creating ratio df for  Mean\\ ADD\\-S ,  YCB-Video - 6D Pose Estimation using RGBD benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Mean\\ ADD\\-S ,  YCB-Video - 6D Pose Estimation using RGB benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### fwIOU
Creating ratio df for  fwIOU ,  SYNTHIA Fall-to-Winter - Image-to-Image Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df

###SOTA RESULTS: 1
Creating ratio df for  PQst ,  COCO test-dev - Panoptic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  4
####### Top\\-1\\ \\(%\\)
Creating ratio df for  Top\\-1\\ \\(%\\) ,  Moments in Time Dataset - Multimodal Activity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Top\\-1\\ \\(%\\) ,  DukeMTMC-reID->Market-1501 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Top\\-1\\ \\(%\\) ,  Market-1501->MSMT17 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Top\\-1\\ \\(%\\) ,  DukeMTMC-reID->MSMT17 - Unsupervised Person Re-Identification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  7
####### Top\\-5\\ \\(%\\)
Creating ratio df for  Top\\-5\\ \\(%\\) ,  Moments in Time Dataset - Multimodal Activity Recognition benchmarking , ds_count= 1
##

###SOTA RESULTS: 1
Creating ratio df for  Error ,  Kuzushiji-MNIST - Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### CD
Creating ratio df for  CD ,  Pix3D - 3D Shape Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### EMD
Creating ratio df for  EMD ,  Pix3D - 3D Shape Reconstruction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### v2v\\ error
Creating ratio df for  v2v\\ error ,  Expressive hands and faces dataset (EHF). - Multimodal Emotion Recognition benchmarking , ds_count= 1
null
####### mAP\\ \\(at\\-0\\.1,\\ Through\\-wall\\)
Creating ratio df for  mAP\\ \\(at\\-0\\.1,\\ Through\\-wall\\) ,  RF-MMD - RF-based Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  mAP\\ \\(at\\-0\\.1,\\ Through\\-wall\\) ,  3DMatch Benchmark - Low-Light Image Enhancement benchmarking , ds_count= 1
###SOTA R

Creating ratio df for  Normalized\\ cPSNR ,  PROBA-V - Multi-Frame Super-Resolution benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### mAP\\ \\(at\\-0\\.1,\\ Visible\\)
Creating ratio df for  mAP\\ \\(at\\-0\\.1,\\ Visible\\) ,  RF-MMD - RF-based Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Rank\\-1\\ Recognition\\ Rate
Creating ratio df for  Rank\\-1\\ Recognition\\ Rate ,  MORPH Album2 - Age-Invariant Face Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Fl\\-all
Creating ratio df for  Fl\\-all ,  KITTI 2015 - Optical Flow Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### GPU\\ sec
Creating ratio df for  GPU\\ sec ,  OASIS+ADIBE+ADHD200+MCIC+PPMI+HABS+HarvardGSP - Diffeomorphic Medical Image Registration benchmarking , ds_count= 1
null
####### MIoU\\ \\(13\\ classes\\)
Creati

###SOTA RESULTS: 3
Creating ratio df for  28\\ gestures\\ accuracy ,  SHREC 2017 - Hand Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  4
####### Pixel\\ Accuracy
Creating ratio df for  Pixel\\ Accuracy ,  ADE20K val - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### FSF
Creating ratio df for  FSF ,  FaceForensics - DeepFake Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Total\\ Accuracy
Creating ratio df for  Total\\ Accuracy ,  FaceForensics - DeepFake Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### DF
Creating ratio df for  DF ,  FaceForensics - DeepFake Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### FS
Creating ratio df for  FS ,  FaceForensics - DeepFake Detection benchmarking , ds_count= 1
###SOTA RESU

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### ADE\\ \\(in\\ world\\ coordinates\\)
Creating ratio df for  ADE\\ \\(in\\ world\\ coordinates\\) ,  Stanford Drone - Trajectory Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### ARm
Creating ratio df for  ARm ,  nuScenes-FB - 3D Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  ARm ,  nuScenes-F - 3D Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### ARs
Creating ratio df for  ARs ,  nuScenes-F - 3D Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  ARs ,  nuScenes-FB - 3D Object Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### coverage
Creating ratio df for  coverage ,  Human Righst Archive (HRA) - Displaced People Recognition benchmarking , ds_count= 1
null
####### ROUGE
Creating ratio df 

###SOTA RESULTS: 2
number of sota per dataset/metric:  5
####### NI
Creating ratio df for  NI ,  mtrl-auto-uav - Autonomous Flight (Dense Forest) benchmarking , ds_count= 1
null
####### ATV
Creating ratio df for  ATV ,  KITTI Horizon - Horizon Line Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### No\\.\\ parameters
Creating ratio df for  No\\.\\ parameters ,  SHREC 2017 track on 3D Hand Gesture Recognition - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  No\\.\\ parameters ,  JHMDB (2D poses only) - Skeleton Based Action Recognition benchmarking , ds_count= 1
null
number of sota per dataset/metric:  1
####### MPJAE
Creating ratio df for  MPJAE ,  3D Poses in the Wild Challenge - 3D Human Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### MRPE
Creating ratio df for  MRPE ,  Human3.6M - 3D Absolute Human Pose Estimation ben

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### ADE
Creating ratio df for  ADE ,  Citywalks - Multiple Object Forecasting benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### AIOU
Creating ratio df for  AIOU ,  Citywalks - Multiple Object Forecasting benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MPVPE
Creating ratio df for  MPVPE ,  3DPW - 3D Human Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Real\\ World\\ Accuracy
Creating ratio df for  Real\\ World\\ Accuracy ,  GesturePod - Gesture Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### IOU25
Creating ratio df for  IOU25 ,  NOCS-REAL275 - 6D Pose Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### 5°5\\ cm
Creating ratio df for  5°5\\ cm ,  NOCS-REAL275 - 6D Pose Estimatio


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00113
Number of metrics:  64
####### Accuracy
Creating ratio df for  Accuracy ,  GPS - Trajectory Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy 


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



,  Android Malware Dataset - Malware Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  IEMOCAP - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Accuracy ,  MELD - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  Bing News - Click-Through Rate Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Book-Crossing - Click-Through Rate Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  MovieLens 1M - Click-Through Rate Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  KDD  - Network Intrusion Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Last.FM - Click-Through Rate Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Children

Creating ratio df for  Percentage\\ correct ,  Metamath set.mm - Automated Theorem Proving benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ correct ,  HOList benchmark - Automated Theorem Proving benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Percentage\\ correct ,  CoqGym - Automated Theorem Proving benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  5
####### Perplexity
Creating ratio df for  Perplexity ,  allrecipes.com - Recipe Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Perplexity ,  Now You\'re Cooking! - Recipe Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### BLEU
Creating ratio df for  BLEU ,  allrecipes.com - Recipe Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MAE
Creating ratio df for  MAE ,  QM9 - Formation Energy benchmarking , ds_count= 1
###SO

###SOTA RESULTS: 1
Creating ratio df for  Decidability ,  UBI-Fights - Semi-supervised Anomaly Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### Time\\ \\(ms\\)
Creating ratio df for  Time\\ \\(ms\\) ,  Non-Linear Elasticity Benchmark - Stress-Strain Relation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### nDCG\\-at\\-100
Creating ratio df for  nDCG\\-at\\-100 ,  MovieLens 20M - Recommendation Systems benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  nDCG\\-at\\-100 ,  Netflix - Recommendation Systems benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  nDCG\\-at\\-100 ,  Million Song Dataset - Recommendation Systems benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  10
####### Recall\\-at\\-20
Creating ratio df for  Recall\\-at\\-20 ,  MovieLens 20M - Recommendation Systems benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creati

###SOTA RESULTS: 1
Creating ratio df for  AUC\\ \\(outlier\\ ratio\\ =\\ 0\\.5\\) ,  Reuters-21578 - Unsupervised Anomaly Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  4
####### Micro\\-F1
Creating ratio df for  Micro\\-F1 ,  EC - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Micro\\-F1 ,  DailyDialog - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### Error\\ rate
Creating ratio df for  Error\\ rate ,  ContactDB - Grasp Contact Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### FDE\\ \\(in\\ world\\ coordinates\\)
Creating ratio df for  FDE\\ \\(in\\ world\\ coordinates\\) ,  Stanford Drone - Trajectory Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### ADE\\ \\(in\\ world\\ coordinates\\)
Creating rati


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00115
Number of metrics:  164
####### Accuracy
Creating ratio df for  Accuracy ,  UWA3D - Skeleton Based Action Recognition benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  CAD-120 - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  Office-Home - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  ImageNet - 0-Shot - Few-Shot Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  Olympic-to-HMDBsmall - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  HMDBsmall-to-UCF - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  UCF-to-Olympic - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  UCF-to-HMDBsmall - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  UT-Kinect - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 

null
Creating ratio df for  Accuracy ,  FC100 5-way (5-shot) - Few-Shot Image Classification benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  FC100 5-way (1-shot) - Few-Shot Image Classification benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  Mini-ImageNet - 1-Shot Learning - Few-Shot Image Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  ImageNet - Neural Architecture Search benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  Accuracy ,  Potsdam-3 - Unsupervised Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  COCO-Stuff-15 - Unsupervised Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Potsdam - Unsupervised Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  COCO-Stuff-3 - Unsupervised Semantic Segmentation benchmarking , ds_co

null
####### Accuracy\\ \\(CS\\)
Creating ratio df for  Accuracy\\ \\(CS\\) ,  NTU RGB+D - Skeleton Based Action Recognition benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy\\ \\(CS\\) ,  NTU RGB+D - Action Recognition benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy\\ \\(CS\\) ,  Varying-view RGB-D Action-Skeleton - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 4
number of sota per dataset/metric:  4
####### Average\\ Accuracy
Creating ratio df for  Average\\ Accuracy ,  VIRAT Ground 2.0 - Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Average\\ Accuracy ,  Office-Caltech - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Average\\ Accuracy ,  Office-31 - Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Average\\ Accuracy ,  ScanNet - Scene Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating r

###SOTA RESULTS: 1
Creating ratio df for  Validation\\ mIoU ,  Cityscapes 2% labeled - Semi-Supervised Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Validation\\ mIoU ,  PASCAL Context 12.5% labeled - Semi-Supervised Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Validation\\ mIoU ,  Cityscapes 5% labeled - Semi-Supervised Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  23
####### Accuracy\\ \\(RGB\\+pose\\)
Creating ratio df for  Accuracy\\ \\(RGB\\+pose\\) ,  J-HMDB - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 6
number of sota per dataset/metric:  6
####### Time\\ \\(ms\\)
Creating ratio df for  Time\\ \\(ms\\) ,  Cityscapes test - Real-Time Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Time\\ \\(ms\\) ,  CamVid - Real-Time Semantic Segmentation benchmarking , ds

###SOTA RESULTS: 3
Creating ratio df for  mAP ,  ActEV - Activity Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  24
####### Accuracy\\ \\(Cross\\-Subject\\)
Creating ratio df for  Accuracy\\ \\(Cross\\-Subject\\) ,  NTU RGB+D 120 - Skeleton Based Action Recognition benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy\\ \\(Cross\\-Subject\\) ,  NTU RGB+D 120 - Action Recognition benchmarking , ds_count= 1
null
####### Accuracy\\ \\(Cross\\-Setup\\)
Creating ratio df for  Accuracy\\ \\(Cross\\-Setup\\) ,  NTU RGB+D 120 - Skeleton Based Action Recognition benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy\\ \\(Cross\\-Setup\\) ,  NTU RGB+D 120 - Action Recognition benchmarking , ds_count= 1
null
####### Accuracy\\ \\(CV\\ I\\)
Creating ratio df for  Accuracy\\ \\(CV\\ I\\) ,  Varying-view RGB-D Action-Skeleton - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 4
number of sota per dataset/me

###SOTA RESULTS: 5
number of sota per dataset/metric:  5
####### mAcc
Creating ratio df for  mAcc ,  S3DIS Area5 - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  mAcc ,  S3DIS - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  mAcc ,  S3DIS - 3D Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  11
####### oAcc
Creating ratio df for  oAcc ,  S3DIS - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  oAcc ,  S3DIS Area5 - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  oAcc ,  Semantic3D - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  7
####### Test\\ Score
Creating ratio df for  Test\\ Score ,  ADE20K - Semantic Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 5
number of sota per dataset/metric:  5
###

Creating ratio df for  rank\\-1 ,  Market to Duke - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  rank\\-1 ,  Duke to Market - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  rank\\-1 ,  Duke to MSMT - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  rank\\-1 ,  Market to MSMT - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  12
####### rank\\-5
Creating ratio df for  rank\\-5 ,  Market to Duke - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  rank\\-5 ,  Duke to Market - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  rank\\-5 ,  Market to MSMT - Unsupervised Domain Adaptation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  rank\\-5 ,  Duke to 

###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### MACs
Creating ratio df for  MACs ,  ImageNet - Neural Architecture Search benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### NLL
Creating ratio df for  NLL ,  CIFAR-10 - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  NLL ,  UCI MINIBOONE - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  NLL ,  BSDS300 - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 0
Creating ratio df for  NLL ,  UCI HEPMASS - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  NLL ,  UCI POWER - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 0
Creating ratio df for  NLL ,  MNIST - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  NLL ,  Caltech-101 - Density Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio 

###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-5 ,  EUR-Lex - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  P\\-at\\-5 ,  Wiki-30K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-5 ,  Kan-Shan Cup - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  6
####### P\\-at\\-3
Creating ratio df for  P\\-at\\-3 ,  AAPD - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-3 ,  Amazon-12K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-3 ,  Kan-Shan Cup - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-3 ,  Wiki-30K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-3 ,  


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00126
Number of metrics:  94
####### Accuracy
Creating ratio df for  Accuracy ,  MIT-BIH AF - Atrial Fibrillation Detection benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  PTB dataset, ECG lead II - Myocardial Infarction Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  SEED-IV - Electroencephalogram (EEG) process benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  SEED - Electroencephalogram (EEG) process benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  JIGSAWS - Surgical Skills Evaluation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  MISTIC-SIL - Surgical Skills Evaluation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Sleep-EDF - Sleep Stage Detection benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  ALS EMG (University of Copenhagen) - ALS Detection benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  LIDC-IDRI - Lung Nodule Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accur

###SOTA RESULTS: 2
Creating ratio df for  Dice\\ Score ,  ISLES-2015 - Lesion Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  BRATS-2015 - Brain Tumor Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Dice\\ Score ,  PROMISE 2012 - Volumetric Medical Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  TCIA Pancreas-CT - 3D Medical Imaging Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  BRATS-2017 val - Brain Tumor Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Dice\\ Score ,  BRATS-2014 - Brain Tumor Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  iSEG 2017 Challenge - Infant Brain MRI Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Dice\\ Score ,  iSEG 2017 Challenge - Medical Image Segm

###SOTA RESULTS: 1
Creating ratio df for  IoU ,  Cell - Medical Image Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  5
####### Mean\\ IoU
Creating ratio df for  Mean\\ IoU ,  PhC-U373 - Cell Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ IoU ,  DIC-HeLa - Cell Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Mean\\ IoU ,  ISIC 2017 - Lesion Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### F1\\-score
Creating ratio df for  F1\\-score ,  CRAG - Colorectal Gland Segmentation: benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1\\-score ,  Cell17 - Nuclear Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  5
####### F1\\ score
Creating ratio df for  F1\\ score ,  DRIVE - Retinal Vessel Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating

Creating ratio df for  Acc ,  OCT2017 - Retinal OCT Disease Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Acc ,  Srinivasan2014 - Retinal OCT Disease Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Acc ,  LIDC-IDRI - Lung Nodule Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  6
####### Sensitivity
Creating ratio df for  Sensitivity ,  OCT2017 - Retinal OCT Disease Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Sensitivity ,  CHF database - Congestive Heart Failure detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  4
####### Q8
Creating ratio df for  Q8 ,  CB513 - Protein Secondary Structure Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Q8 ,  CullPDB - Protein Secondary Structure Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df fo

###SOTA RESULTS: 1
Creating ratio df for  mIoU ,  Montgomery County - Lung Nodule Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  mIoU ,  DRIVE - Retinal Vessel Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  7
####### GPU\\ sec
Creating ratio df for  GPU\\ sec ,  OASIS+ADIBE+ADHD200+MCIC+PPMI+HABS+HarvardGSP - Diffeomorphic Medical Image Registration benchmarking , ds_count= 1
null
####### Accuracy\\(10\\-fold\\)
Creating ratio df for  Accuracy\\(10\\-fold\\) ,  LIDC-IDRI - Lung Nodule Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### F1\\ \\(Set\\)
Creating ratio df for  F1\\ \\(Set\\) ,  \"Cardiologist-level\" 12-rhythm ECG dataset - Arrhythmia Detection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### VInfo
Creating ratio df for  VInfo ,  ISBI 2012 EM Segmentation - Medical Image Segmentation benchmarkin


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00131
Number of metrics:  70
####### MAE\\ \\(10%\\ missing\\)
Creating ratio df for  MAE\\ \\(10%\\ missing\\) ,  UCI localization data - Multivariate Time Series Imputation benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MAE\\ \\(10%\\ of\\ data\\ as\\ GT\\)
Creating ratio df for  MAE\\ \\(10%\\ of\\ data\\ as\\ GT\\) ,  PhysioNet Challenge 2012 - Multivariate Time Series Imputation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MAE\\ \\(PM2\\.5\\)
Creating ratio df for  MAE\\ \\(PM2\\.5\\) ,  Beijing Air Quality - Multivariate Time Series Imputation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MSE\\ \\(10%\\ missing\\)
Creating ratio df for  MSE\\ \\(10%\\ missing\\) ,  KDD CUP Challenge 2018 - Multivariate Time Series Imputation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Accuracy
Creating ratio df for  Accuracy ,  UWA3D - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  CAD-120 - Skeleton Based Action Recognition benchmarking , ds_co

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Accuracy\\ \\(Cross\\-Setup\\)
Creating ratio df for  Accuracy\\ \\(Cross\\-Setup\\) ,  NTU RGB+D 120 - Skeleton Based Action Recognition benchmarking , ds_count= 1
null
####### Accuracy\\ \\(CV\\ I\\)
Creating ratio df for  Accuracy\\ \\(CV\\ I\\) ,  Varying-view RGB-D Action-Skeleton - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 4
number of sota per dataset/metric:  4
####### Accuracy\\ \\(CV\\ II\\)
Creating ratio df for  Accuracy\\ \\(CV\\ II\\) ,  Varying-view RGB-D Action-Skeleton - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 5
number of sota per dataset/metric:  5
####### Accuracy\\ \\(AV\\ I\\)
Creating ratio df for  Accuracy\\ \\(AV\\ I\\) ,  Varying-view RGB-D Action-Skeleton - Skeleton Based Action Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 5
number of sota per dataset/metric:  5
####### Accuracy\\ \\(AV\\ II\\)
Creating ratio df f

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### NMI\\ \\(physiology_12_hours\\)
Creating ratio df for  NMI\\ \\(physiology_12_hours\\) ,  eICU Collaborative Research Database - Time Series Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### NMI\\ \\(physiology_24_hours\\)
Creating ratio df for  NMI\\ \\(physiology_24_hours\\) ,  eICU Collaborative Research Database - Time Series Clustering benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### mse\\ \\(10\\^\\-3\\)
Creating ratio df for  mse\\ \\(10\\^\\-3\\) ,  PhysioNet Challenge 2012 - Multivariate Time Series Imputation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  mse\\ \\(10\\^\\-3\\) ,  PhysioNet Challenge 2012 - Multivariate Time Series Forecasting benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### MSE\\ stdev
Creating ratio df for  MSE\\ stdev ,  PhysioNet Chal


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00137
Number of metrics:  42
####### Accuracy
Creating ratio df for  Accuracy ,  PROTEINS - Graph Classification benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 10
Creating ratio df for  Accuracy ,  HIV-fMRI-77  - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  HIV-DTI-77 - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  BP-fMRI-97 - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  BlogCatalog - Node Classification benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  Wikipedia - Node Classification benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  NEURON-BINARY - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  NEURON-Average - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  NEURON-MULTI - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  FRANKENSTEIN - G

Creating ratio df for  Accuracy ,  Cancer - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Wine - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Digits - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Cora - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  20NEWS - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Citeseer - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Citeseer (biased evaluation) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  Cora (biased evaluation) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  Pubmed (biased evaluation) - Link Prediction benchmarking , ds_count= 1
null


###SOTA RESULTS: 1
Creating ratio df for  MRR ,  FB15k-237 - Knowledge Graph Completion benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  MRR ,  NELL-995 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  MRR ,  ICEWS14 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  MRR ,  ICEWS05-15 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  MRR ,  YAGO15k - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  34
####### MR
Creating ratio df for  MR ,  FB15k - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  MR ,  WN18 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  MR ,  FB15k (filtered) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  MR ,  WN18 (filtered) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio d

###SOTA RESULTS: 1
Creating ratio df for  AUC ,  Gnutella - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AUC ,  Cit-HepPH - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AUC ,  Wiki-Vote - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AUC ,  Douban - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  AUC ,  IMDb - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AUC ,  DBLP - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  AUC ,  Yelp - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  AUC ,  MIT - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AUC ,  Epinions - Link Sign Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  AUC ,  Bitcoin-Alpha - Link Sign Prediction

####### PR\\ AUC
Creating ratio df for  PR\\ AUC ,  YouTube - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PR\\ AUC ,  Alibaba - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PR\\ AUC ,  Alibaba-S - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PR\\ AUC ,  Amazon - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  PR\\ AUC ,  Twitter - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  5
####### Mean\\ Accuracy
Creating ratio df for  Mean\\ Accuracy ,  MUTAG - Graph Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Macro\\-F1\\ \\(60%\\ training\\ data\\)
Creating ratio df for  Macro\\-F1\\ \\(60%\\ training\\ data\\) ,  DBLP (PACT) 14k - Heterogeneous Node Classification benchmarking , ds_count= 1
null



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00141
Number of metrics:  250
####### F1
Creating ratio df for  F1 ,  ACL-ARC - Citation Intent Classification benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 4
Creating ratio df for  F1 ,  MSRP - Paraphrase Identification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1 ,  MRPC - Semantic Textual Similarity benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  WebQuestions - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  F1 ,  SciERC - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  F1 ,  SensEval 2 Lexical Sample - Word Sense Disambiguation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  F1 ,  SensEval 3 Lexical Sample - Word Sense Disambiguation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1 ,  SimpleQuestions - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  MSRA - Chinese Word Segmentation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  Google Dataset - Sentence Compression 

###SOTA RESULTS: 2
Creating ratio df for  F1 ,  Reuters-21578 - Document Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1 ,  JNLPBA - Relation Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  SciERC - Relation Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1 ,  ScienceCite - Sentence Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  F1 ,  Paper Field - Sentence Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  SciCite - Citation Intent Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  WetLab - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  WLPC - Relation Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  F1 ,  WLPC - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 

###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  Amazon Review Full - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  Amazon Review Polarity - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  MR - Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  Quora Question Pairs - Paraphrase Identification benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  Accuracy ,  MSRVTT-QA - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  MSVD-QA - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  SARC (all-bal) - Sarcasm Detection benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  SARC (pol-bal) - Sarcasm Detection benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  RumourEval - Stance Detec

###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  CODAH - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Yelp-14 - Document Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  XNLI Chinese - Natural Language Inference benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  XNLI Chinese Dev - Natural Language Inference benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  Yelp-2 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  FIGER - Entity Linking benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  RTE - Natural Language Inference benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  QNLI - Natural Language Inference benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  IMDb-M - Document Classificati

###SOTA RESULTS: 3
Creating ratio df for  BLEU\\ score ,  WMT2016 German-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  BLEU\\ score ,  WMT2016 Romanian-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  BLEU\\ score ,  WMT2016 Czech-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU\\ score ,  WMT2016 Russian-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU\\ score ,  WMT2016 English-Russian - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU\\ score ,  IWSLT2015 Thai-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU\\ score ,  IWSLT2014 German-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  BLEU\\ score ,  IWSLT2015 English-German - 

###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-1 ,  Amazon-12K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-1 ,  Kan-Shan Cup - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-1 ,  RCV1 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  21
####### Mean\\ Error\\ Rate
Creating ratio df for  Mean\\ Error\\ Rate ,  bAbi - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Accuracy\\ \\(trained\\ on\\ 10k\\)
Creating ratio df for  Accuracy\\ \\(trained\\ on\\ 10k\\) ,  bAbi - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Accuracy\\ \\(trained\\ on\\ 1k\\)
Creating ratio df for  Accuracy\\ \\(trained\\ on\\ 1k\\) ,  bAbi - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 

###SOTA RESULTS: 1
Creating ratio df for  ROUGE\\-1 ,  Debatepedia - Query-Based Extractive Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  ROUGE\\-1 ,  CNN / Daily Mail - Document Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  ROUGE\\-1 ,  Multi-News - Multi-Document Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  ROUGE\\-1 ,  RASG - Reader-Aware Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  ROUGE\\-1 ,  MTS - Timeline Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  ROUGE\\-1 ,  X-Sum - Text Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  32
####### ROUGE\\-2
Creating ratio df for  ROUGE\\-2 ,  DUC 2004 Task 1 - Text Summarization benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  ROUGE\\-2 ,  DUC 2004 Task 1 - Extractive Text Summarization benc

###SOTA RESULTS: 1
Creating ratio df for  BLEU ,  WebNLG Full - Data-to-Text Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU ,  ViGGO - Data-to-Text Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU ,  Cleaned E2E NLG Challenge - Data-to-Text Generation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  BLEU ,  IWSLT2015 Chinese-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  68
####### Micro\\ Precision
Creating ratio df for  Micro\\ Precision ,  TAC2010 - Entity Disambiguation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Micro\\ Precision ,  NLP-TDMS (Exp, arXiv only) - Scientific Results Extraction benchmarking , ds_count= 1
null
Creating ratio df for  Micro\\ Precision ,  PWC Leaderboards (restricted) - Scientific Results Extraction benchmarking , ds_count= 1
null
number of sota per dataset/metric:  2
#####

###SOTA RESULTS: 1
Creating ratio df for  Recall ,  FewRel - Relation Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Recall ,  Open Entity - Entity Typing benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Recall ,  20NEWS - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Recall ,  SoSciSoCi - Named Entity Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  5
####### EM\\ \\(Quasar\\-T\\)
Creating ratio df for  EM\\ \\(Quasar\\-T\\) ,  Quasar - Open-Domain Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### F1\\ \\(Quasar\\-T\\)
Creating ratio df for  F1\\ \\(Quasar\\-T\\) ,  Quasar - Open-Domain Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### Joint
Creating ratio df for  Joint ,  Second dialogue state tracking challenge - Dialog Stat

###SOTA RESULTS: 4
number of sota per dataset/metric:  4
####### Sentiment
Creating ratio df for  Sentiment ,  Sentihood - Aspect-Based Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### P\\-at\\-5
Creating ratio df for  P\\-at\\-5 ,  Music domain - Hypernym Discovery benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  P\\-at\\-5 ,  Medical domain - Hypernym Discovery benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  P\\-at\\-5 ,  General - Hypernym Discovery benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  P\\-at\\-5 ,  AAPD - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-5 ,  Amazon-12K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  P\\-at\\-5 ,  Wiki-30K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df 

Creating ratio df for  MAE\\ \\(Arousal\\) ,  SEMAINE - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MAE\\ \\(Expectancy\\)
Creating ratio df for  MAE\\ \\(Expectancy\\) ,  SEMAINE - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### MAE\\ \\(Power\\)
Creating ratio df for  MAE\\ \\(Power\\) ,  SEMAINE - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Weighted\\-F1
Creating ratio df for  Weighted\\-F1 ,  MELD - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Weighted\\-F1 ,  IEMOCAP - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 4
number of sota per dataset/metric:  8
####### UA
Creating ratio df for  UA ,  IEMOCAP - Multimodal Emotion Recognition benchmarking

Creating ratio df for  Target\\ Binary\\ F1 ,  MPQA - Fine-Grained Opinion Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Mean\\ F1\\ \\(WSJ\\)
Creating ratio df for  Mean\\ F1\\ \\(WSJ\\) ,  PTB - Constituency Grammar Induction benchmarking , ds_count= 1
###SOTA RESULTS: 4
number of sota per dataset/metric:  4
####### Max\\ F1\\ \\(WSJ\\)
Creating ratio df for  Max\\ F1\\ \\(WSJ\\) ,  PTB - Constituency Grammar Induction benchmarking , ds_count= 1
###SOTA RESULTS: 6
number of sota per dataset/metric:  6
####### Exact\\ Span\\ F1
Creating ratio df for  Exact\\ Span\\ F1 ,  CoNLL 2000 - Chunking benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Exact\\ Span\\ F1 ,  STM-corpus - Scientific Concept Extraction benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  4
####### nDCG\\-at\\-20
Creating ratio df for  nDCG\\-at\\-20 ,  TREC Robust04 - Ad-Hoc Information Retrieval benchmarking , ds_

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### MAE
Creating ratio df for  MAE ,  CMU-MOSEI - Multimodal Sentiment Analysis benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Dev
Creating ratio df for  Dev ,  SWAG - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### In\\-domain
Creating ratio df for  In\\-domain ,  CoQA - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 5
number of sota per dataset/metric:  5
####### Overall
Creating ratio df for  Overall ,  CoQA - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 5
number of sota per dataset/metric:  5
####### Out\\-of\\-domain
Creating ratio df for  Out\\-of\\-domain ,  CoQA - Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 6
number of sota per dataset/metric:  6
####### F1\\-Score
Creating ratio df for  F1\\-Score ,  CoQA - Generative Question Answering benchmarking , ds_

Creating ratio df for  nDCG\\-at\\-5 ,  AAPD - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  nDCG\\-at\\-5 ,  Kan-Shan Cup - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  nDCG\\-at\\-5 ,  Wiki-30K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  nDCG\\-at\\-5 ,  Amazon-12K - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  nDCG\\-at\\-5 ,  EUR-Lex - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  nDCG\\-at\\-5 ,  RCV1 - Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  7
####### nDCG\\-at\\-3
Creating ratio df for  nDCG\\-at\\-3 ,  AAPD - Multi-Label Text Classification benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  nDCG\\-at\\-3 ,  Amazon-12K - Mul

Creating ratio df for  SPICE ,  Flickr30k Captions test - Image Captioning benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  SPICE ,  COCO Captions test - Image Captioning benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  2
####### Weighted\\ Macro\\-F1
Creating ratio df for  Weighted\\ Macro\\-F1 ,  EmoryNLP - Emotion Recognition in Conversation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Cased\\ sacreBLEU
Creating ratio df for  Cased\\ sacreBLEU ,  IWSLT2017 French-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Cased\\ sacreBLEU ,  IWSLT2017 Arabic-English - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Cased\\ sacreBLEU ,  IWSLT2017 English-Arabic - Machine Translation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Cased\\ sacreBLEU ,  IWSLT2017 English-French - Machine 


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00145
Number of metrics:  29
####### Percentage\\ error
Creating ratio df for  Percentage\\ error ,  TIMIT - Speech Recognition benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 3
Creating ratio df for  Percentage\\ error ,  swb_hub_500 WER fullSWBCH - Speech Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ error ,  Switchboard + Hub500 - Speech Recognition benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ error ,  CHiME real - Noisy Speech Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ error ,  VoxForge Indian - Accented Speech Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ error ,  VoxForge Commonwealth - Accented Speech Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ error ,  VoxForge American-Canadian - Accented Speech Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ error ,  CHiME clean - Noisy Speech Recognition benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Percentage\\ e

###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### SIR
Creating ratio df for  SIR ,  MUSIC (multi-source) - Audio Source Separation benchmarking , ds_count= 1
null
Creating ratio df for  SIR ,  AudioSet - Audio Source Separation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### NSDR
Creating ratio df for  NSDR ,  AV-Bench - Wooden Horse - Audio Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  NSDR ,  AV-Bench - Guitar Solo - Audio Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  NSDR ,  AV-Bench - Violin Yanni - Audio Denoising benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### Angular\\ Error
Creating ratio df for  Angular\\ Error ,  SOFA - Direction of Arrival Estimation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Audio\\ Quality\\ MOS
Creating ratio df for  Audio\\ Quality\\ MOS , 


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00310
Number of metrics:  1
####### Accuracy
Creating ratio df for  Accuracy ,  ImageNet (targeted PGD, max perturbation=16) - Adversarial Defense benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  ImageNet - Adversarial Defense benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  CAAD 2018 - Adversarial Defense benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  CIFAR-10 - Adversarial Defense benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
https://identifiers.org/ito:ITO_00485



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



Number of metrics:  9
####### BLEU\\-4
Creating ratio df for  BLEU\\-4 ,  WikiSQL - SQL-to-Text benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Accuracy
Creating ratio df for  Accuracy ,  Django - Code Generation benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  MNIST - Feature Selection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  ISOLET - Feature Selection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Coil-20 - Feature Selection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Fashion-MNIST - Feature Selection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Activity - Feature Selection benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Mice Protein - Feature Selection benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  8
####### 14\\ gestures\\ accuracy
Creating ratio df for  14\\ gestures\\ accuracy ,  100 sleep nights of 8 caregivers - 


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00491
Number of metrics:  17
####### Percentage\\ correct
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) abstract images 1.0 open ended - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) real images 1.0 open ended - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) abstract 1.0 multiple choice - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) real images 2.0 open ended - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  COCO Visual Question Answering (VQA) real images 1.0 multiple choice - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct 


Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 2
Creating ratio df for  Percentage\\ correct ,  Visual Genome (pairs) - Visual Question Answering benchmarking , ds_count= 1
null
Creating ratio df for  Percentage\\ correct ,  Visual Genome (subjects) - Visual Question Answering benchmarking , ds_count= 1
null
number of sota per dataset/metric:  2
####### Accuracy
Creating ratio df for  Accuracy ,  VQA v1 test-std - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Accuracy ,  VQA v1 test-dev - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 6
Creating ratio df for  Accuracy ,  VQA v2 test-dev - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 9
Creating ratio df for  Accuracy ,  MSVD-QA - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  MSRVTT-QA - Visual Question Answering benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Accuracy ,  GQA Test2019 - Visu


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00528
Number of metrics:  24
####### Hits\\-at\\-10
Creating ratio df for  Hits\\-at\\-10 ,  FB15k - Link Prediction benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 6
Creating ratio df for  Hits\\-at\\-10 ,  FB122 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Hits\\-at\\-10 ,  WN18 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Hits\\-at\\-10 ,  WN18RR - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Hits\\-at\\-10 ,  FB15k-237 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  Hits\\-at\\-10 ,  FB15k (filtered) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Hits\\-at\\-10 ,  WN18 (filtered) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Hits\\-at\\-10 ,  YAGO3-10 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Hits\\-at\\-10 ,  YAGO37 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Hits\\-at\\-10 ,  AKSW-bib - Link Prediction benchmarking , ds_count=

###SOTA RESULTS: 1
Creating ratio df for  Accuracy ,  Citeseer - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  Accuracy ,  WordNet - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  Accuracy ,  Citeseer (biased evaluation) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  Cora (biased evaluation) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  Pubmed (biased evaluation) - Link Prediction benchmarking , ds_count= 1
null
Creating ratio df for  Accuracy ,  PPI - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  9
####### Mean\\ AP
Creating ratio df for  Mean\\ AP ,  NELL-995 - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### AUC
Creating ratio df for  AUC ,  Cora - Link Prediction benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating rati


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00600
Number of metrics:  10
####### 5\\ fold\\ cross\\ validation
Creating ratio df for  5\\ fold\\ cross\\ validation ,  Cornell Grasp Dataset - Robotic Grasping benchmarking , ds_count= 1
###SOTA RESULTS: 4



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



number of sota per dataset/metric:  4
####### spl
Creating ratio df for  spl ,  R2R - Visual Navigation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  spl ,  VLN Challenge - Vision and Language Navigation benchmarking , ds_count= 1
###SOTA RESULTS: 3
Creating ratio df for  spl ,  Gibson PointGoal Navigation - PointGoal Navigation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  spl ,  Cooperative Vision-and-Dialogue Navigation - Visual Navigation benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  spl ,  Help, Anna! (HANNA) - Visual Navigation benchmarking , ds_count= 1
null
number of sota per dataset/metric:  8
####### Medium\\ Human\\-Normalized\\ Score
Creating ratio df for  Medium\\ Human\\-Normalized\\ Score ,  Dmlab-30 - Visual Navigation benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### length
Creating ratio df for  length ,  VLN Challenge - Vision and Language Navigation benchma


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00873
Number of metrics:  10
####### Score
Creating ratio df for  Score ,  Atari 2600 Space Invaders - Playing Atari Games benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 9
Creating ratio df for  Score ,  Atari 2600 Atlantis - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Score ,  Atari 2600 Tennis - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Score ,  Atari 2600 Robotank - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 8
Creating ratio df for  Score ,  Atari 2600 Asterix - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 5
Creating ratio df for  Score ,  Atari 2600 Zaxxon - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  Score ,  Atari 2600 Freeway - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 7
Creating ratio df for  Score ,  Atari 2600 Kung-Fu Master - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 4
Creating ratio df for  Score ,  Atari 2600 Tutankham - Playing Atari Games benchmarking , ds_count= 1
###SOTA RESULTS: 9
Creating ratio df fo

###SOTA RESULTS: 1
Creating ratio df for  Score ,  Acrobot (limited sensors) - Continuous Control benchmarking , ds_count= 1
null
Creating ratio df for  Score ,  Mountain Car - Continuous Control benchmarking , ds_count= 1
###SOTA RESULTS: 0
Creating ratio df for  Score ,  Inverted Pendulum (limited sensors) - Continuous Control benchmarking , ds_count= 1
null
Creating ratio df for  Score ,  Acrobot - Continuous Control benchmarking , ds_count= 1
###SOTA RESULTS: 0
Creating ratio df for  Score ,  Inverted Pendulum (system identifications) - Continuous Control benchmarking , ds_count= 1
null
Creating ratio df for  Score ,  Inverted Pendulum - Continuous Control benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  Score ,  Mountain Car (system identifications) - Continuous Control benchmarking , ds_count= 1
null
Creating ratio df for  Score ,  Acrobot (noisy observations) - Continuous Control benchmarking , ds_count= 1
null
Creating ratio df for  Score ,  Hopper - Continu


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_01532
Number of metrics:  11
####### APS
Creating ratio df for  APS ,  MusicNet - Music Transcription benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 1
number of sota per dataset/metric:  1
####### Number\\ of\\ params
Creating ratio df for  Number\\ of\\ params ,  MusicNet - Music Transcription benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### NLL
Creating ratio df for  NLL ,  Nottingham - Music Modeling benchmarking , ds_count= 1
###SOTA RESULTS: 2
Creating ratio df for  NLL ,  JSB Chorales - Music Modeling benchmarking , ds_count= 1
###SOTA RESULTS: 1
number of sota per dataset/metric:  3
####### SDR\\ \\(avg\\)
Creating ratio df for  SDR\\ \\(avg\\) ,  MUSDB18 - Music Source Separation benchmarking , ds_count= 1
###SOTA RESULTS: 4
number of sota per dataset/metric:  4
####### SDR\\ \\(vocals\\)
Creating ratio df for  SDR\\ \\(vocals\\) ,  MUSDB18 - Music Source Separation benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### SDR\\ \\(drums\\)
Creating ratio df for  SDR\\ \\(drums\\) ,  MUSDB18 - Music Source Separation benchmarking ,


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00506x
Number of metrics:  13
####### Average\\ Cross\\-Ent
Creating ratio df for  Average\\ Cross\\-Ent ,  Event2Mind dev - Common Sense Reasoning benchmarking , ds_count= 1



Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access



###SOTA RESULTS: 1
Creating ratio df for  Average\\ Cross\\-Ent ,  Event2Mind test - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  3
####### Score
Creating ratio df for  Score ,  Winograd Schema Challenge - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### Test
Creating ratio df for  Test ,  SWAG - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS: 3
number of sota per dataset/metric:  3
####### Dev
Creating ratio df for  Dev ,  SWAG - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS: 2
number of sota per dataset/metric:  2
####### 1\\ in\\ 10\\ R\\-at\\-5
Creating ratio df for  1\\ in\\ 10\\ R\\-at\\-5 ,  Visual Dialog v0.9 - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS: 1
Creating ratio df for  1\\ in\\ 10\\ R\\-at\\-5 ,  Visual Dialog  v0.9 - Common Sense Reasoning benchmarking , ds_count= 1
###SOTA RESULTS:


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



In [44]:
#Just plot the graphs (quicker)
i=0
while i < len(top_level["top_level_class"]):
    print(top_level["top_level_class"][i])
    selected_ito = top_level["top_level_class"][i].replace("https://identifiers.org/","")
    class_label = top_level["class_label"][i]
    
    #plot trajectory
    ito = selected_ito.replace("ito:","")
    plot_task_trajectory(ito, class_label)
    
    i = i+1

https://identifiers.org/ito:ITO_00310
https://identifiers.org/ito:ITO_00485



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00491



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00528



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00600



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00873



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_01532



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



https://identifiers.org/ito:ITO_00506x



The default value of regex will change from True to False in a future version. In addition, single character regular expressions will*not* be treated as literal strings when regex=True.



## Important change 
this percentage has to be calculated out of the function, because it congregates the maximum obtained value across all benchmarks.
