In [1]:
import json
import pickle
import pandas as pd
import requests as rq
import matplotlib.pyplot as plt

In [2]:
class Invenio:
    def __init__(self, _url: str, _token: str):
        self.url = _url
        self.token = _token

    def get_auth_header(self) -> str:
        return 'Bearer ' + self.token

    def get_default_headers(self, content_type: str = 'application/json') -> dict:
        return {
            'Authorization': self.get_auth_header(),
            "Content-Type": content_type,
        }

    def get_response(self, req):
        response = rq.get(
            url=req,
            headers={"Authorization": self.get_auth_header()},
            verify=False
        )

        if response.status_code != 200:
            raise Exception(f'Failed to get all invenio records: {response.text}')

        return response.json()

    def get_all_records(self) -> list:
        rsp = self.get_response(f"{self.url}/api/records?q=access.status:open")
        ret = [rsp]

        while 'next' in rsp['links'] and rsp['links']['next'] is not '':
            rsp = self.get_response(rsp['links']['next'])
            ret.append(rsp)

        return ret

    def get_all_record_ids(self) -> list:
        pages = self.get_all_records()
        records = []
        for page in pages:
            for record in page['hits']['hits']:
                records.append(record['id'])

        return records

  while 'next' in rsp['links'] and rsp['links']['next'] is not '':
  while 'next' in rsp['links'] and rsp['links']['next'] is not '':


In [3]:
FUJI_URL = "http://localhost:1071/fuji/api/v1/evaluate"
FUJI_REQUEST_HEADER = {
    "accept": "application/json",
    "Authorization": "Basic bWFydmVsOndvbmRlcndvbWFu",
    "Content-Type": "application/json"
}
FUJI_REQUEST_BODY = {
    "test_debug": True,
    "metadata_service_endpoint": "http://ws.pangaea.de/oai/provider",
    "metadata_service_type": "oai_pmh",
    "use_datacite": True,
    "use_github": False,
    "metric_version": "metrics_v0.5"
}

INVENIO_URL = "https://test.researchdata.tuwien.at/records"
ENHANCED_URL = "http://localhost:9090/metadata"

def get_fuji_score(url: str) -> dict:
    body = FUJI_REQUEST_BODY.copy()

    body["object_identifier"] = url

    r = rq.post(
        url=FUJI_URL,
        headers=FUJI_REQUEST_HEADER,
        json=body
    )

    if r.status_code != 200:
        raise Exception(f"failed to retrieve fuji score for: {url}, {r.text}")

    return r.json()

In [5]:
invenio = Invenio("https://test.researchdata.tuwien.at", "nyBmTpaBhVYX1bWuHLBeCcb9fzR8wW5CZeRedT0wPFiZyrrnMJORRzvLDCPJ")
record_ids = invenio.get_all_record_ids()

scores = []

for record_id in record_ids:
    print(f"evaluating the scores for {record_id}")

    invenio_score = get_fuji_score(f"{INVENIO_URL}/{record_id}")
    enhanced_score = get_fuji_score(f"{ENHANCED_URL}/{record_id}")

    print(f"got scores: {invenio_score['summary']['score_percent']['FAIR']} vs {enhanced_score['summary']['score_percent']['FAIR']}")

    scores.append({
        "record_id": record_id,
        "invenio_url": f"{INVENIO_URL}/{record_id}",
        "enhanced_url": f"{ENHANCED_URL}/{record_id}",
        "invenio_score": invenio_score,
        "enhanced_score": enhanced_score
    })

    with open(f"invenio_scores/{record_id}.json", "w") as f:
        json.dump(invenio_score, f)

    with open(f"enhanced_scores/{record_id}.json", "w") as f:
        json.dump(enhanced_score, f)



KeyboardInterrupt: 

In [None]:
with open('scores.pkl', 'wb') as f:
    pickle.dump(scores, f)