# Comparing the metadata record of repositories to the corresponding record in DataCite

## Import

In [1]:
import json
import os
import time
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns

## Get record from Figshare

In [2]:
# token
figshare_access_token = os.getenv("FIGSHARE_ACCESS_TOKEN")

In [6]:
# Inspired from the example available here https://help.figshare.com/article/how-to-use-the-figshare-api#search-ids
# get info of all the posters (unlike Zenodo, this doesn't return all the metadata for each poster
# so we do that only to get all the ids of the posters)
BASE_URL = "https://api.figshare.com/v2"
results = []

date_after = "2024-11-01"
date_before = "2024-12-31"

search_logic = (
    ":item_type:dataset AND "
    + ":posted_after:"
    + date_after
    + " AND :posted_before:"
    + date_before
)
query = '{"search_for": "' + search_logic + '"}'
y = json.loads(query)

for j in range(1, 11):
    r = json.loads(
        requests.post(
            BASE_URL + "/articles/search?page_size=1000&page={}".format(j), params=y
        ).content
    )
    if r:
        results.extend(r)
    else:
        break

In [None]:
# use figshare ids to get full metadata of each poster
dict_results = {}
count = 0
for result in results:
    if isinstance(result, dict):
        figshare_id = result["id"]
        api_call_headers = {"Authorization": "token " + str(figshare_access_token)}
        r = requests.get(
            BASE_URL + "/articles/" + str(figshare_id), headers=api_call_headers
        )
        metadata = json.loads(r.text)
        dict_results[count] = metadata
        count += 1

with open("outputs/figshare.json", "w", encoding="utf-8") as f:
    json.dump(dict_results, f, ensure_ascii=False, indent=4)

## Get corresponding record from Datacite