## Parsing FAIR evaluation metrics

In [1]:
import pandas as pd
import json
import re
import requests

In [2]:
# # Function to fetch fair evaluations
# def fetch_fair_evaluation(doi):
#     url = "https://api.fair-enough.semanticscience.org/evaluations"
#     headers = {
#         "accept": "application/json",
#         "Content-Type": "application/json"
#     }
#     data = {
#         "subject": doi,
#         "collection": "fair-enough-metadata"
#     }
#     response = requests.post(url, headers=headers, json=data)
    
#     if response.status_code != 200:
#         print(f"Failed to fetch data for DOI: {doi}. Status code: {response.status_code}")
#         return None  # Return None for failed requests

### from files

|DOI | Example | File Name |
|---|---|---|
| https://doi.org/10.3233/FAIA200871 | Example 1: Publication metadata | fair_metrics_1_publication.json |
| https://doi.org/10.1145/3184558.3191543 | Example 2: Publication metadata  | fair_metrics_2_publication.json |
|https://doi.org/10.5281/zenodo.8148685| Example 3: Software metadata | fair_metrics_3_software.json |
|https://doi.org/10.34894/Q80QUE| Example 4: Dataset metadata | fair_metrics_4_dataset.json |




In [3]:
# Load the JSON data from the file
with open("data_sample/fair_metrics_1_publication.json", "r") as file:
    data_sample = json.load(file)

In [4]:
attributes = [
    "https://w3id.org/fair-enough/metrics/tests/f1-metadata-identifier-persistent",
    "https://w3id.org/fair-enough/metrics/tests/f1-metadata-identifier-unique",
    "https://w3id.org/fair-enough/metrics/tests/a1-metadata-authorization",
    "https://w3id.org/fair-enough/metrics/tests/a1-metadata-protocol",
    "https://w3id.org/fair-enough/metrics/tests/f2-structured-metadata",
    "https://w3id.org/fair-enough/metrics/tests/f1-data-identifier-persistent",
    "https://w3id.org/fair-enough/metrics/tests/f3-metadata-identifier-in-metadata",
    "https://w3id.org/fair-enough/metrics/tests/i3-metadata-contains-outward-links",
    "https://w3id.org/fair-enough/metrics/tests/r1-includes-license",
    "https://w3id.org/fair-enough/metrics/tests/f2-grounded-metadata",
    "https://w3id.org/fair-enough/metrics/tests/a1-data-protocol",
    "https://w3id.org/fair-enough/metrics/tests/a1-data-authorization",
    "https://w3id.org/fair-enough/metrics/tests/a2-metadata-persistent",
    "https://w3id.org/fair-enough/metrics/tests/f3-data-identifier-in-metadata",
    "https://w3id.org/fair-enough/metrics/tests/i1-data-knowledge-representation-structured",
    "https://w3id.org/fair-enough/metrics/tests/i1-metadata-knowledge-representation-structured",
    "https://w3id.org/fair-enough/metrics/tests/f4-searchable",
    "https://w3id.org/fair-enough/metrics/tests/i1-data-knowledge-representation-semantic",
    "https://w3id.org/fair-enough/metrics/tests/i2-fair-vocabularies-known",
    "https://w3id.org/fair-enough/metrics/tests/i1-metadata-knowledge-representation-semantic",
    "https://w3id.org/fair-enough/metrics/tests/r1-includes-standard-license",
    "https://w3id.org/fair-enough/metrics/tests/i2-fair-vocabularies-resolve"
]

rows = []

In [5]:
for attribute in attributes:
    metric_results = data_sample["contains"].get(attribute, [])
    if not metric_results:
        continue
    
    metric_result = metric_results[0]
    value = metric_result.get("http://semanticscience.org/resource/SIO_000300", [{}])[0].get("@value", None)
    comment = metric_result.get("http://schema.org/comment", [{}])[0].get("@value", None)

    # Extracting the last prompt from the comment
    if comment:
        matches = re.findall(r"(INFO|SUCCESS|FAILURE): \[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\] .+", comment)
        if matches:
            comment = matches[-1]  # Taking the last match

    # Build each row
    rows.append({
        "_id": data_sample["_id"],
        "subject": data_sample["subject"],
        "created_at": data_sample["created_at"],
        "name": data_sample["name"],
        "fair_metric": attribute.split("/")[-1],  # Get the last part of the URL for a cleaner metric name
        "value": value,
        "comment": comment
    })

# Convert rows to DataFrame
df = pd.DataFrame(rows)


In [6]:
df.head()

Unnamed: 0,_id,subject,created_at,name,fair_metric,value,comment
0,01d0b632dfc56a13da4a6bb5886b6862e31d03fa,https://doi.org/10.3233/FAIA200871,2023-09-03T10:22:07,Evaluation of https://doi.org/10.3233/FAIA200871,f1-metadata-identifier-persistent,1,SUCCESS
1,01d0b632dfc56a13da4a6bb5886b6862e31d03fa,https://doi.org/10.3233/FAIA200871,2023-09-03T10:22:07,Evaluation of https://doi.org/10.3233/FAIA200871,f1-metadata-identifier-unique,1,SUCCESS
2,01d0b632dfc56a13da4a6bb5886b6862e31d03fa,https://doi.org/10.3233/FAIA200871,2023-09-03T10:22:07,Evaluation of https://doi.org/10.3233/FAIA200871,a1-metadata-authorization,1,SUCCESS
3,01d0b632dfc56a13da4a6bb5886b6862e31d03fa,https://doi.org/10.3233/FAIA200871,2023-09-03T10:22:07,Evaluation of https://doi.org/10.3233/FAIA200871,a1-metadata-protocol,1,INFO
4,01d0b632dfc56a13da4a6bb5886b6862e31d03fa,https://doi.org/10.3233/FAIA200871,2023-09-03T10:22:07,Evaluation of https://doi.org/10.3233/FAIA200871,f2-structured-metadata,1,SUCCESS


In [7]:
df.to_excel("FAIR_metrics_report.xls")

  df.to_excel("FAIR_metrics_report.xls")
