In [4]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import requests as r

## Dumping DANS

First, we use the pyDataverse library to connect to the DANS repository (it's a dataverse repository)

https://pydataverse.readthedocs.io/en/latest/user/basic-usage.html#connect-to-native-api

In [12]:
from pyDataverse.api import NativeApi
api = NativeApi("https://ssh.datastations.nl/")

api.get_info_version().json()

{'status': 'OK',
 'data': {'version': '6.3', 'build': 'DANS-DataStation-PATCH-9'}}

We dump the entire repository by getting the contents of the special dataverse ":root". Warning: this takes like 15 mins.

In [13]:
try:
    with open("dans.json", "rb") as f:
        dans = json.load(f)

except FileNotFoundError:
    tree = api.get_dataverse_contents(":root")

    dans = tree.json()

    with open("dans.json", "wb") as f:
        f.write(tree.read())

Move the data into pandas to start working with it

In [14]:
df_dans = pd.DataFrame.from_dict(dans["data"])

df_dans

Unnamed: 0,id,identifier,persistentUrl,protocol,authority,publisher,publicationDate,storageIdentifier,type
0,1920,dans-zhr-eswk,https://doi.org/10.17026/dans-zhr-eswk,doi,10.17026,DANS Data Station Social Sciences and Humanities,1990-01-01,surf://10.17026/dans-zhr-eswk,dataset
1,1921,dans-xeh-f6xm,https://doi.org/10.17026/dans-xeh-f6xm,doi,10.17026,DANS Data Station Social Sciences and Humanities,1995-01-01,surf://10.17026/dans-xeh-f6xm,dataset
2,1924,dans-zct-en83,https://doi.org/10.17026/dans-zct-en83,doi,10.17026,DANS Data Station Social Sciences and Humanities,1999-01-01,surf://10.17026/dans-zct-en83,dataset
3,1949,dans-xjk-kqxa,https://doi.org/10.17026/dans-xjk-kqxa,doi,10.17026,DANS Data Station Social Sciences and Humanities,2004-01-01,surf://10.17026/dans-xjk-kqxa,dataset
4,2082,dans-xpa-uek9,https://doi.org/10.17026/dans-xpa-uek9,doi,10.17026,DANS Data Station Social Sciences and Humanities,1993-01-01,surf://10.17026/dans-xpa-uek9,dataset
...,...,...,...,...,...,...,...,...,...
8687,615257,SS/GIJTA1,https://doi.org/10.17026/SS/GIJTA1,doi,10.17026,DANS Data Station Social Sciences and Humanities,2025-05-28,surf://10.17026/SS/GIJTA1,dataset
8688,615280,SS/IHYCRX,https://doi.org/10.17026/SS/IHYCRX,doi,10.17026,DANS Data Station Social Sciences and Humanities,2025-05-13,surf://10.17026/SS/IHYCRX,dataset
8689,615286,SS/ZY6QIP,https://doi.org/10.17026/SS/ZY6QIP,doi,10.17026,DANS Data Station Social Sciences and Humanities,2025-05-12,surf://10.17026/SS/ZY6QIP,dataset
8690,615288,SS/TM4SW5,https://doi.org/10.17026/SS/TM4SW5,doi,10.17026,DANS Data Station Social Sciences and Humanities,2025-05-09,surf://10.17026/SS/TM4SW5,dataset


In [15]:
df_dans.sample(10)["persistentUrl"]

7384        https://doi.org/10.17026/SS/VIO8NJ
5618    https://doi.org/10.17026/dans-zmt-qkd9
3290    https://doi.org/10.17026/dans-zgy-zwu7
5585    https://doi.org/10.17026/dans-zcj-frmz
6493    https://doi.org/10.17026/dans-xda-rz3n
3266    https://doi.org/10.17026/dans-znv-8mtc
3905    https://doi.org/10.17026/dans-28g-f5cg
603     https://doi.org/10.17026/dans-236-th32
6555    https://doi.org/10.17026/dans-xmy-3p9e
2205    https://doi.org/10.17026/dans-zed-sfxc
Name: persistentUrl, dtype: object

In [35]:
def fair_checker(pid: str):
    res = r.get("https://fair-checker.france-bioinformatique.fr/api/check/legacy/metrics_all", params={"url": pid})
    return {metric["metric"]: int(metric["score"]) for metric in res.json()}

In [43]:
sample = df_dans.sample(100)["persistentUrl"]
results: pd.DataFrame = sample.apply(fair_checker).apply(pd.Series) / 2

pd.concat([sample, results, results.mean(axis=1)], axis=1)

Unnamed: 0,persistentUrl,F1A,F1B,F2A,F2B,A1.1,A1.2,I1,I2,I3,R1.1,R1.2,R1.3,0
4262,https://doi.org/10.17026/dans-xdg-3b26,1.0,1.0,0.5,0.5,1.0,1.0,0.5,0.5,1.0,1.0,1.0,0.5,0.791667
4774,https://doi.org/10.17026/dans-zcd-jsgk,1.0,1.0,0.5,0.5,1.0,1.0,0.5,0.5,1.0,1.0,1.0,0.5,0.791667
6342,https://doi.org/10.17026/dans-29s-97f7,1.0,1.0,0.5,0.5,1.0,1.0,0.5,0.5,0.0,1.0,1.0,0.5,0.708333
7048,https://doi.org/10.17026/dans-xy7-cehx,1.0,1.0,0.5,0.5,1.0,1.0,0.5,0.5,0.0,1.0,1.0,0.5,0.708333
4393,https://doi.org/10.17026/dans-x9k-jbj9,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4158,https://doi.org/10.17026/dans-x56-4b5d,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
2405,https://doi.org/10.17026/dans-2zq-tzw2,1.0,1.0,0.5,0.5,1.0,1.0,0.5,0.5,1.0,1.0,1.0,0.5,0.791667
4700,https://doi.org/10.17026/dans-xzp-swpa,1.0,1.0,0.5,0.5,1.0,1.0,0.5,0.5,0.0,1.0,1.0,0.5,0.708333
4151,https://doi.org/10.17026/dans-246-w8mc,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667


In [55]:
final = pd.concat([
    results.loc[:, results.columns.str.startswith("F")].mean(axis=1).to_frame("Findability"),
    results.loc[:, results.columns.str.startswith("A")].mean(axis=1).to_frame("Accessibility"),
    results.loc[:, results.columns.str.startswith("I")].mean(axis=1).to_frame("Interoperability"),
    results.loc[:, results.columns.str.startswith("R")].mean(axis=1).to_frame("Reusability"),
    results.mean(axis=1).to_frame("Overall"),
], axis=1)

final

Unnamed: 0,Findability,Accessibility,Interoperability,Reusability,Overall
4262,0.75,1.0,0.666667,0.833333,0.791667
4774,0.75,1.0,0.666667,0.833333,0.791667
6342,0.75,1.0,0.333333,0.833333,0.708333
7048,0.75,1.0,0.333333,0.833333,0.708333
4393,0.25,0.5,0.000000,0.000000,0.166667
...,...,...,...,...,...
4158,0.25,0.5,0.000000,0.000000,0.166667
2405,0.75,1.0,0.666667,0.833333,0.791667
4700,0.75,1.0,0.333333,0.833333,0.708333
4151,0.25,0.5,0.000000,0.000000,0.166667


In [60]:
final_melted = final.melt(var_name="metric", value_name="value")

bins = [0, 0.25, 0.5, 0.75, 1.0]
bin_labels = ['0%-25%', '25%-50%', '50%-75%', '75%-100%']

final_melted['bin'] = pd.cut(final_melted['value'], bins=bins, labels=bin_labels, include_lowest=True)

stats = pd.crosstab(final_melted["metric"], final_melted["bin"]).loc[final.columns]

stats

bin,0%-25%,25%-50%,50%-75%,75%-100%
Findability,10,5,85,0
Accessibility,0,15,0,85
Interoperability,10,70,20,0
Reusability,14,1,0,85
Overall,10,4,67,19


In [41]:
results.to_latex()

'\\begin{tabular}{lrrrrrrrrrrrr}\n\\toprule\n & F1A & F1B & F2A & F2B & A1.1 & A1.2 & I1 & I2 & I3 & R1.1 & R1.2 & R1.3 \\\\\n\\midrule\n1723 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 0.000000 & 1.000000 & 1.000000 & 0.500000 \\\\\n5157 & 1.000000 & 0.000000 & 0.500000 & 0.500000 & 1.000000 & 0.000000 & 0.500000 & 0.500000 & 0.000000 & 0.000000 & 0.000000 & 0.500000 \\\\\n881 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 1.000000 & 1.000000 & 1.000000 & 0.500000 \\\\\n1979 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 0.000000 & 1.000000 & 1.000000 & 0.500000 \\\\\n6831 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 0.000000 & 1.000000 & 1.000000 & 0.500000 \\\\\n6679 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 1.000000 & 1.000000 & 0.500000 & 0.500000 & 0.000000 & 1.000000 & 1.000000 & 0.500000 \\\\\n2325 &

In [1]:
FUJI_URL = "http://localhost:1071/fuji/api/v1/evaluate"

In [2]:
def fuji(pid: str):
    headers = {
        "accept": "application/json",
        "Authorization": "Basic bWFydmVsOndvbmRlcndvbWFu",
        "Content-Type": "application/json",
    }

    req = {"object_identifier": pid, "test_debug": True, "use_datacite": True}
    res = r.post(FUJI_URL, json=req, headers=headers)

    return res.json()

In [6]:
result = fuji("https://doi.org/10.17026/dans-xdg-3b26")

result

{'test_id': 'cc7ce62e01a4afcdfb7087131374d04d93265883',
 'request': {'object_identifier': 'https://doi.org/10.17026/dans-xdg-3b26',
  'test_debug': True,
  'use_datacite': True,
  'normalized_object_identifier': '10.17026/dans-xdg-3b26'},
 'resolved_url': 'https://ssh.datastations.nl/dataset.xhtml?persistentId=doi:10.17026/dans-xdg-3b26',
 'start_timestamp': '2025-06-17T02:40:46Z',
 'end_timestamp': '2025-06-17T02:40:51Z',
 'metric_specification': 'https://doi.org/10.5281/zenodo.6461229',
 'software_version': '3.5.1',
 'total_metrics': 16,
 'summary': {'score_earned': {'A': 1.0,
   'F': 6.5,
   'I': 3.0,
   'R': 5.0,
   'A1': 1.0,
   'F1': 2.0,
   'F2': 2.0,
   'F3': 0.5,
   'F4': 2.0,
   'I1': 2.0,
   'I2': 0.0,
   'I3': 1.0,
   'R1': 1.0,
   'R1.1': 2.0,
   'R1.2': 1.0,
   'R1.3': 1.0,
   'FAIR': 15.5},
  'score_total': {'A': 3,
   'F': 7,
   'I': 4,
   'R': 10,
   'A1': 3,
   'F1': 2,
   'F2': 2,
   'F3': 1,
   'F4': 2,
   'I1': 2,
   'I2': 1,
   'I3': 1,
   'R1': 4,
   'R1.1': 2,
 