## Extract dataset used by a paper with Crossref API

Exemple of request for the paper "Exploring Soothness...": http://api.crossref.org/works/10.1007/978-3-031-16443-9_4

In [7]:
import requests

#Dictionnary of papers with names as key and DOIs as value
papers = {
    "Exploring Smoothness and Class-Separation for Semi-supervised Medical Image Segmentation":"10.1007/978-3-031-16443-9_4",
    "MaxStyle: Adversarial Style Composition for Robust Medical Image Segmentation":"10.1007/978-3-031-16443-9_15",
    "Swin-Unet: Unet-like Pure Transformer for Medical Image Segmentation":"10.1007/978-3-031-25066-8_9",
    "nnFormer: Volumetric Medical Image Segmentation via a 3D Transformer":"10.48550/arXiv.2109.03201",
    "TransUNet: Transformers Make Strong Encoders for Medical Image Segmentation":"10.48550/arXiv.2102.04306"
}

#Dictionnary with dataset names as key and DOI as value
datasets = {
    "ACDC":"10.1109/TMI.2018.2837502",
    "LA":"10.1016/j.media.2020.101832",
    "PROMISE12":"10.1016/j.media.2013.12.002",
    "MSCMRSeg":"10.48550/arxiv.2006.12434",
    "M&Ms":"10.1109/tmi.2021.3090082",
    "Medical Decathlon":"10.1038/s41467-022-30695-9",
    "I2CVB":"10.1016/j.compbiomed.2015.02.009",
    "BRATS":"10.1109/tmi.2014.2377694"
}

In [8]:
#Base url for the request
base_url = "http://api.crossref.org/works/"

#To only get the field of referenced works in the response
query_param = {
    "select":"referenced_works"
}


#Will be filled with datasets found in the "referenced works" response of each paper
datasets_used = {p:[] for p in papers}

for paper_name in papers:
    request_url = base_url + papers[paper_name]
    request = requests.get(request_url)
    if request.status_code == 200:
        json_response = request.json()
        for ref in json_response["message"]["reference"]:
            if "DOI" in ref:
                for ds in datasets:
                    if ref["DOI"] == datasets[ds]:
                        datasets_used[paper_name].append(ds)
    else:
        print(f"{paper_name} not found")
datasets_used

nnFormer: Volumetric Medical Image Segmentation via a 3D Transformer not found
TransUNet: Transformers Make Strong Encoders for Medical Image Segmentation not found


{'Exploring Smoothness and Class-Separation for Semi-supervised Medical Image Segmentation': ['ACDC',
  'LA'],
 'MaxStyle: Adversarial Style Composition for Robust Medical Image Segmentation': ['I2CVB',
  'PROMISE12'],
 'Swin-Unet: Unet-like Pure Transformer for Medical Image Segmentation': [],
 'nnFormer: Volumetric Medical Image Segmentation via a 3D Transformer': [],
 'TransUNet: Transformers Make Strong Encoders for Medical Image Segmentation': []}