https://support.datacite.org/docs/api-get-lists

In [1]:
import requests
import pandas as pd
import numpy as np
import time
from dataCite_fun import getDataCiteCitations_relationTypes
from crossRef_fun import getPublicationInfo

In [None]:
datacite_url = 'https://api.datacite.org/events'
# https://api.datacite.org/events?page[cursor]=1&page[size]=1000
headers = {
    'prefix': '10.5285',
    'page[cursor]': '1',
    'page[size]': '10'
}
r = requests.get(datacite_url, headers)
print(r.status_code)
r.json().keys()
r.json()['data']

In [None]:
# get more info from datacite API 
# hopefully end up with these columns:
# ['data_publisher', 'data_doi', 'data_title', 'data_authors', 'relation_type_id', 'publication_doi', 'publication_type', 'publication_title', 'publication_authors']

pubDOI = '10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359'
r = requests.get(('https://api.datacite.org/dois/' + pubDOI), headers = {'client-id': 'bl.nerc'})
print(r.status_code)
print(r.json()['data']['attributes'].keys())
r.json()['data']['attributes']


## Workflow

In [2]:
relation_type_id_list = ['is-cited-by', 'is-referenced-by', 'is-supplement-to', 'is-part-of']

In [3]:
dataCite_df_relationTypes = getDataCiteCitations_relationTypes.getDataCiteCitations_relationTypes(relation_type_id_list)

is-cited-by
Total records: 19
Total pages: 1
Status:  200
Page:  1
Final page
is-referenced-by
Total records: 1188
Total pages: 2
Status:  200
Page:  1
https://api.datacite.org/events?page%5Bcursor%5D=MTY3NTk0MDk2OTEyOCw1NGQ3ZDZhZi04OTdjLTQzNzMtOGJmMi1iMzczY2VjNjljNzc&page%5Bsize%5D=1000&prefix=10.5285&relation-type-id=is-referenced-by
Status:  200
Page:  2
Final page
is-supplement-to
Total records: 8
Total pages: 1
Status:  200
Page:  1
Final page
is-part-of
Total records: 626
Total pages: 1
Status:  200
Page:  1
Final page
Done!


In [5]:
print(dataCite_df_relationTypes.columns)
dataCite_df_relationTypes

Index(['id', 'subj-id', 'obj-id', 'source-id', 'relation-type-id',
       'occurred-at', 'Page endpoint'],
      dtype='object')


Unnamed: 0,id,subj-id,obj-id,source-id,relation-type-id,occurred-at,Page endpoint
0,26a0d78f-5efe-4ac9-9aed-6654d2a3af61,https://doi.org/10.5285/2641515f-5b76-445c-a93...,https://doi.org/10.1016/j.scitotenv.2012.05.023,datacite-crossref,is-cited-by,2019-08-02T01:05:30.000Z,https://api.datacite.org/events?page%5Bcursor%...
1,8875aad2-cbde-41c3-93b3-d63cb21f4d2d,https://doi.org/10.5285/6feac38a-5847-46f9-84e...,https://doi.org/10.1002/2015gl065750,datacite-crossref,is-cited-by,2019-08-01T11:28:04.000Z,https://api.datacite.org/events?page%5Bcursor%...
2,6fbc8dba-128b-47a1-a137-8acc251cf530,https://doi.org/10.5285/db55406b-c9a1-4a9e-88c...,https://doi.org/10.17863/cam.20713,datacite-related,is-cited-by,2019-10-14T08:32:43.000Z,https://api.datacite.org/events?page%5Bcursor%...
3,587dccf9-8c7f-4593-88f9-7a82cb80cdbe,https://doi.org/10.5285/5321bc6e-be35-4ed3-9b5...,https://doi.org/10.1111/1365-2656.12728,datacite-crossref,is-cited-by,2019-10-01T19:01:02.000Z,https://api.datacite.org/events?page%5Bcursor%...
4,fb23ff88-1148-4505-829f-c5289a5a035a,https://doi.org/10.5285/c4ecfe25-12f2-453b-ad1...,https://doi.org/10.5194/cp-2017-18,datacite-crossref,is-cited-by,2019-08-01T11:27:16.000Z,https://api.datacite.org/events?page%5Bcursor%...
...,...,...,...,...,...,...,...
1836,fd1a53b9-3c4e-43e7-857a-61add92a61d0,https://doi.org/10.5285/6fcc17ad-425b-4367-bd2...,https://gtr.ukri.org/projects?ref=ne%2Fg013187...,datacite-url,is-part-of,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...
1837,a63cb7b5-5caf-4ece-a436-2ffe0c80c652,https://doi.org/10.5285/6fcc17ad-425b-4367-bd2...,https://gtr.ukri.org/projects?ref=ne%2Fg014159...,datacite-url,is-part-of,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...
1838,24afe93c-f4c0-4539-8196-8d5c499cdfdf,https://doi.org/10.5285/6fcc17ad-425b-4367-bd2...,https://www.bas.ac.uk/project/basal-conditions...,datacite-url,is-part-of,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...
1839,f04dd43a-d799-4d31-aae9-b13c25357048,https://doi.org/10.5285/beda45d1-dd33-4666-886...,https://gtr.ukri.org/projects?ref=ne%2Fr000719...,datacite-url,is-part-of,2023-04-03T10:12:22.000Z,https://api.datacite.org/events?page%5Bcursor%...


In [6]:
# remove http from DOI url
doi_list = []
for url in dataCite_df_relationTypes['subj-id']:
    doi = url.replace('https://doi.org/','')
    doi_list.append(doi)
dataCite_df_relationTypes['data_doi'] = doi_list

dataCite_df_relationTypes = dataCite_df_relationTypes.drop(['subj-id'], axis=1)

doi_list = []
for url in dataCite_df_relationTypes['obj-id']:
    doi = url.replace('https://doi.org/','')
    doi_list.append(doi)
dataCite_df_relationTypes['pub_doi'] = doi_list # rename to work in getPublicationInfo function 
 # they seem to be the wrong way round from datacite - double check this
    
dataCite_df_relationTypes = dataCite_df_relationTypes.drop(['obj-id'], axis=1)

In [9]:
print(dataCite_df_relationTypes.columns)
dataCite_df_relationTypes

Index(['id', 'source-id', 'relation-type-id', 'occurred-at', 'Page endpoint',
       'data_doi', 'pub_doi'],
      dtype='object')


Unnamed: 0,id,source-id,relation-type-id,occurred-at,Page endpoint,data_doi,pub_doi
0,26a0d78f-5efe-4ac9-9aed-6654d2a3af61,datacite-crossref,is-cited-by,2019-08-02T01:05:30.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/2641515f-5b76-445c-a936-1da51bf365ad,10.1016/j.scitotenv.2012.05.023
1,8875aad2-cbde-41c3-93b3-d63cb21f4d2d,datacite-crossref,is-cited-by,2019-08-01T11:28:04.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/6feac38a-5847-46f9-84e4-e7e9d291f935,10.1002/2015gl065750
2,6fbc8dba-128b-47a1-a137-8acc251cf530,datacite-related,is-cited-by,2019-10-14T08:32:43.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/db55406b-c9a1-4a9e-88c2-2abbcb4bcad3,10.17863/cam.20713
3,587dccf9-8c7f-4593-88f9-7a82cb80cdbe,datacite-crossref,is-cited-by,2019-10-01T19:01:02.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/5321bc6e-be35-4ed3-9b56-25598d61ac8f,10.1111/1365-2656.12728
4,fb23ff88-1148-4505-829f-c5289a5a035a,datacite-crossref,is-cited-by,2019-08-01T11:27:16.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/c4ecfe25-12f2-453b-ad19-49a19e90ee32,10.5194/cp-2017-18
...,...,...,...,...,...,...,...
1836,fd1a53b9-3c4e-43e7-857a-61add92a61d0,datacite-url,is-part-of,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359,https://gtr.ukri.org/projects?ref=ne%2Fg013187...
1837,a63cb7b5-5caf-4ece-a436-2ffe0c80c652,datacite-url,is-part-of,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359,https://gtr.ukri.org/projects?ref=ne%2Fg014159...
1838,24afe93c-f4c0-4539-8196-8d5c499cdfdf,datacite-url,is-part-of,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359,https://www.bas.ac.uk/project/basal-conditions...
1839,f04dd43a-d799-4d31-aae9-b13c25357048,datacite-url,is-part-of,2023-04-03T10:12:22.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5285/beda45d1-dd33-4666-8861-b4b91af0180f,https://gtr.ukri.org/projects?ref=ne%2Fr000719...


In [10]:
# see rows where the data_doi doesn't start with 10.5285
test = dataCite_df_relationTypes[~dataCite_df_relationTypes.data_doi.str.startswith("10.5285")]
test

Unnamed: 0,id,source-id,relation-type-id,occurred-at,Page endpoint,data_doi,pub_doi
1207,f57fcf57-30bd-4ff4-9318-5d4295c7c7af,datacite-related,is-supplement-to,2022-12-09T16:00:17.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.17882/45461,10.5285/f8f4dc5f-2eed-24ef-e044-000b5de50f38
1213,4aaa4623-a5a7-48ef-8959-0f7be103c519,datacite-related,is-supplement-to,2020-04-07T08:42:21.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5518/778,10.5285/7e0b2d81-ee71-48d6-a901-3b417d482072
1214,69ad6c6c-bcab-41ce-8edd-d09674410bb7,datacite-related,is-supplement-to,2021-11-02T03:11:14.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5061/dryad.t76hdr7zv,10.5285/6a408415-0575-49c6-af69-b568e343266d
1727,ff7d11a9-344c-41e2-aefa-c16508fa8228,crossref,is-part-of,2022-01-31T13:42:31.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5194/amt-15-503-2022,10.5285/602f11d9a2034dae9d0a7356f9aeaf45
1729,34b36790-52cf-4e5e-8afa-d7fcd54a758b,crossref,is-part-of,2022-02-07T09:15:18.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5194/essd-14-411-2022,10.5285/17c2ce31784048de93996275ee976fff
...,...,...,...,...,...,...,...
1827,4d439940-3e10-4b21-a14e-ea8f1c08f2d0,crossref,is-part-of,2023-01-11T06:31:13.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5194/hess-25-5355-2021,10.5285/505d1e0c-ab60-4a60-b448-68c5bbae403e
1828,37db00fd-b3b9-44bf-acff-bbb6de14efb5,crossref,is-part-of,2023-01-12T07:16:00.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5194/essd-2022-244,10.5285/4014370f-8eb2-492b-a5f3-6dc68bf12c1e
1829,1486e388-b4b7-4832-b446-439fa8b06dc6,crossref,is-part-of,2023-01-18T12:44:05.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5194/os-19-57-2023,10.5285/c6612cbe-50b3-0cff-e053-6c86abc09f8f
1830,736a3eb8-2455-4bab-843a-dcefc7d47bf6,crossref,is-part-of,2023-01-24T06:40:23.000Z,https://api.datacite.org/events?page%5Bcursor%...,10.5194/os-19-77-2023,10.5285/f173b9c1-bb50-0b75-e053-6c86abc02a4a


In [11]:
# drop the rows where the data_doi column value does not start with "10.5285"
dataCite_df_relationTypes = dataCite_df_relationTypes[dataCite_df_relationTypes['data_doi'].str.startswith('10.5285')]

In [12]:
print(dataCite_df_relationTypes.columns)

Index(['id', 'source-id', 'relation-type-id', 'occurred-at', 'Page endpoint',
       'data_doi', 'pub_doi'],
      dtype='object')


In [None]:
# get dataset metadata 
info_list = []
headers = {'client-id': 'bl.nerc'}
api_url = 'https://api.datacite.org/dois/' 
# for doi in dataCite_df_relationTypes['data_doi']:
for (source_id, relation_type_id, occurred_at, Page_endpoint, data_doi, pub_doi) in zip(dataCite_df_relationTypes['source-id'],dataCite_df_relationTypes['relation-type-id'],dataCite_df_relationTypes['occurred-at'],dataCite_df_relationTypes['Page endpoint'],dataCite_df_relationTypes['data_doi'],dataCite_df_relationTypes['pub_doi']):
    r = requests.get((api_url + data_doi), headers)
    print(r.status_code, data_doi)
    
    try:
        # process author info
        author_list = []
        for item in r.json()['data']['attributes']['creators']:
            author_list.append(item['name'])

        info_list.append([
            r.json()['data']['attributes']['publisher'],
            data_doi,
            r.json()['data']['attributes']['titles'][0]['title'],
            author_list,
            r.json()['data']['attributes']['publicationYear'],
            r.json()['data']['attributes']['dates'],
            r.json()['data']['attributes']['registered'],
            source_id, relation_type_id, pub_doi, occurred_at, Page_endpoint
        ])
    except Exception as e:
        info_list.append(["error",data_doi,"error","error","error","error","error","error","error",pub_doi,"error","error"])
        
columns = ['data_publisher', 'data_doi', 'data_title', 'data_authors', 'publicationYear', 'dates', 'registered', 
           'source-id', 'relation-type-id', 'pub_doi', 'occurred-at', 'Page endpoint']
dataCite_df = pd.DataFrame(info_list, columns = columns)    
print("Done!")
    

In [21]:
dataCite_df.columns

Index(['data_publisher', 'data_doi', 'data_title', 'data_authors',
       'publicationYear', 'dates', 'registered', 'source-id',
       'relation-type-id', 'pub_doi', 'occurred-at', 'Page endpoint'],
      dtype='object')

In [17]:
dataCite_df.to_csv("dataCite_events_df.csv", index = False)

In [19]:
dataCite_df = pd.read_csv("dataCite_events_df.csv")

In [23]:
dataCite_df_temp = dataCite_df.rename(columns={"pub_doi": "subj_id"})
dataCite_df_publication_meta = getPublicationInfo.getPublicationInfo(dataCite_df_temp)

10.1016/j.scitotenv.2012.05.023
10.1002/2015gl065750
10.17863/cam.20713
10.1111/1365-2656.12728
10.5194/cp-2017-18
10.1002/2016gl068130
10.1029/2007gl032529
10.1029/2009gl040104
10.1029/2009jd012263
10.1002/2015gl065750
10.3189/172756494794587438
10.1029/2007gl032529
10.1029/2009gl040104
10.1111/1365-2656.12798
10.5194/cp-2017-18
10.1029/2018jc013982
10.1175/jcli-d-17-0320.1
10.1111/ele.13129
10.1016/j.jenvrad.2017.06.024
10.1029/2018jc014464
10.5285/4859dc19-e8e9-4148-8c50-cb2ab16dc696
10.5285/65abc40d-e256-414b-8b50-a5569556d1be
10.5285/c11bdb27-df44-4b56-8f4c-afc51b6e1e3a
10.1016/j.cub.2017.04.034
10.5194/essd-9-445-2017
10.1111/2041-210x.12779
10.1080/13658816.2016.1158823
10.5285/902fc4d8-db74-46c9-b7eb-c988f5325903
10.5194/hess-22-2023-2018
10.1111/1365-2656.12489
10.5285/a87b7897-354c-4435-a1bc-e6053e7569e0
10.5285/0579d4a8-e315-41d7-af43-25fb50c7d3da
10.1016/j.agsy.2019.02.015
10.1016/j.jhydrol.2018.07.034
10.5194/hess-22-2023-2018
10.1016/j.ejrh.2015.05.014
10.5194/hess-22-202

In [24]:
dataCite_df_publication_meta

Unnamed: 0,data_publisher,data_doi,data_title,data_authors,publicationYear,dates,registered,source-id,relation-type-id,subj_id,occurred-at,Page endpoint,pub_Title,pub_authors,publisher
0,NERC EDS Environmental Information Data Centre,10.5285/2641515f-5b76-445c-a936-1da51bf365ad,Exposure of burrowing mammals to Radon Rn-222 ...,"['Beresford, N.A.', 'Barnett, C.L.', 'Vives I ...",2012,"[{'date': '2012-05-29', 'dateType': 'Submitted...",2012-05-29T13:58:37.000Z,datacite-crossref,is-cited-by,10.1016/j.scitotenv.2012.05.023,2019-08-02T01:05:30.000Z,https://api.datacite.org/events?page%5Bcursor%...,API request failed,API request failed,API request failed
1,"Polar Data Centre; British Antarctic Survey, N...",10.5285/6feac38a-5847-46f9-84e4-e7e9d291f935,Snow accumulation from the Bryan Coast ice cor...,"['Thomas, Elizabeth']",2017,"[{'date': '2010', 'dateType': 'Collected'}, {'...",2017-07-10T16:43:38.000Z,datacite-crossref,is-cited-by,10.1002/2015gl065750,2019-08-01T11:28:04.000Z,https://api.datacite.org/events?page%5Bcursor%...,API request failed,API request failed,API request failed
2,NERC EDS Environmental Information Data Centre,10.5285/db55406b-c9a1-4a9e-88c2-2abbcb4bcad3,Foraging behaviour of Parus major held in temp...,"['Thorogood, R', 'Kokko, H', 'Mappes, J']",2017,"[{'date': '2017-10-20', 'dateType': 'Submitted...",2017-10-20T09:10:28.000Z,datacite-related,is-cited-by,10.17863/cam.20713,2019-10-14T08:32:43.000Z,https://api.datacite.org/events?page%5Bcursor%...,API request failed,API request failed,API request failed
3,NERC EDS Environmental Information Data Centre,10.5285/5321bc6e-be35-4ed3-9b56-25598d61ac8f,Invertebrate activity data from an experiment ...,"['Griffiths, H.M.', 'Ashton, L.A.', 'Walker, A...",2017,"[{'date': '2017-06-29', 'dateType': 'Submitted...",2017-06-29T14:31:55.000Z,datacite-crossref,is-cited-by,10.1111/1365-2656.12728,2019-10-01T19:01:02.000Z,https://api.datacite.org/events?page%5Bcursor%...,API request failed,API request failed,API request failed
4,"Polar Data Centre, Natural Environment Researc...",10.5285/c4ecfe25-12f2-453b-ad19-49a19e90ee32,Antarctic regional snow accumulation composite...,"['Thomas, Elizabeth']",2017,"[{'date': '2017-07', 'dateType': 'Accepted'}, ...",2017-07-13T10:43:51.000Z,datacite-crossref,is-cited-by,10.5194/cp-2017-18,2019-08-01T11:27:16.000Z,https://api.datacite.org/events?page%5Bcursor%...,API request failed,API request failed,API request failed
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1762,NERC EDS UK Polar Data Centre,10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359,"Shear wave splitting catalogue, Rutford Ice St...","['Kufner, Sofia-Katerina', 'Brisbourne, Alex',...",2022,"[{'date': '2018-11-20/2019-02-16', 'dateType':...",2022-06-24T11:04:24.000Z,datacite-url,is-part-of,https://gtr.ukri.org/projects?ref=ne%2Fg013187...,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...,Info not given,Info not given,Info not given
1763,NERC EDS UK Polar Data Centre,10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359,"Shear wave splitting catalogue, Rutford Ice St...","['Kufner, Sofia-Katerina', 'Brisbourne, Alex',...",2022,"[{'date': '2018-11-20/2019-02-16', 'dateType':...",2022-06-24T11:04:24.000Z,datacite-url,is-part-of,https://gtr.ukri.org/projects?ref=ne%2Fg014159...,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...,Info not given,Info not given,Info not given
1764,NERC EDS UK Polar Data Centre,10.5285/6fcc17ad-425b-4367-bd23-c4133a38e359,"Shear wave splitting catalogue, Rutford Ice St...","['Kufner, Sofia-Katerina', 'Brisbourne, Alex',...",2022,"[{'date': '2018-11-20/2019-02-16', 'dateType':...",2022-06-24T11:04:24.000Z,datacite-url,is-part-of,https://www.bas.ac.uk/project/basal-conditions...,2023-03-14T11:30:50.000Z,https://api.datacite.org/events?page%5Bcursor%...,Info not given,Info not given,Info not given
1765,NERC EDS UK Polar Data Centre,10.5285/beda45d1-dd33-4666-8861-b4b91af0180f,Simulated changes in East Antarctic mass balan...,"['Jordan, James', 'Stokes, Chris', 'Miles, Ber...",2023,"[{'date': '2020-01-01/2220-01-01', 'dateType':...",2023-02-24T11:53:54.000Z,datacite-url,is-part-of,https://gtr.ukri.org/projects?ref=ne%2Fr000719...,2023-04-03T10:12:22.000Z,https://api.datacite.org/events?page%5Bcursor%...,Info not given,Info not given,Info not given


In [25]:
newPublisherLst = []
for dataCentreName in dataCite_df_publication_meta['data_publisher']:
    if type(dataCentreName) == float or dataCentreName is None:
        newPublisherLst.append(dataCentreName)
        continue
    else:
        pass

    dataCentreName_lower = dataCentreName.lower() # make it all lowercase as 'in' operator used below is case sensitive
    if 'polar' in dataCentreName_lower:
        newPublisherLst.append('Polar Data Centre (PDC)')
    elif 'atmospheric' in dataCentreName_lower or 'badc' in dataCentreName_lower or 'earth' in dataCentreName_lower:
        newPublisherLst.append('Centre for Environmental Data Analysis (CEDA)')
    elif 'oceanographic' in dataCentreName_lower:
        newPublisherLst.append('British Oceanographic Data Centre (BODC)')
    elif 'geological' in dataCentreName_lower or 'geoscience' in dataCentreName_lower:
        newPublisherLst.append('National Geoscience Data Centre (NGDC)')
    elif 'environmental information' in dataCentreName_lower:
        newPublisherLst.append('Environmental Information Data Centre (EIDC)')
    elif 'environmental data' in dataCentreName_lower:
        newPublisherLst.append('Centre for Environmental Data Analysis (CEDA)')
    else:
        newPublisherLst.append(dataCentreName)
dataCite_df_publication_meta['publisher_processed'] = newPublisherLst

dataCite_df_publication_meta = dataCite_df_publication_meta.drop(['data_publisher'], axis=1)
dataCite_df_publication_meta = dataCite_df_publication_meta.rename(columns={'publisher_processed':'data_publisher'})

In [28]:
dataCite_df_publication_meta.to_csv("dataCite_df_events_publication_meta.csv", index = False)

In [None]:
dataCite_df_publication_meta

In [None]:
# chat gpt reccommendation
import requests
from requests.adapters import HTTPAdapter
from requests.exceptions import RequestException
from urllib3.util.retry import Retry
import numpy as np
import pandas as pd
import time
    
def getDataCiteCitations_relationTypes(relation_type_id_list):

    column_names = ["id", "subj-id", "obj-id", "source-id", "relation-type-id", "occurred-at", "Page endpoint"]
    dataCite_info_relationTypes = []  # create an empty list in which all the DataCite info will be placed
    
    # set up retry mechanism for requests
    retry_strategy = Retry(
        total=5,
        status_forcelist=[429, 500, 502, 503, 504],
        backoff_factor=1
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    http = requests.Session()
    http.mount("https://", adapter)
    http.mount("http://", adapter)

    for relation_type_id in relation_type_id_list:

        dataCite_info = []

        # send a request to get initial info from DataCite
        headers = {
        'prefix': '10.5285',
        'page[cursor]': '1',
        'page[size]': '1000',
        'relation-type-id': relation_type_id
        }

        # retry request a few times before giving up
        for retry_count in range(5):
            try:
                r = http.get('https://api.datacite.org/events', headers=headers)
                r.raise_for_status()
                break
            except RequestException as e:
                print(f"Error occurred while making request: {e}")
                if retry_count < 4:
                    print(f"Retrying request in {2 ** retry_count} seconds...")
                    time.sleep(2 ** retry_count)
                else:
                    raise e

        print(relation_type_id)

        # determine the total number of pages and dataset records
        totalPages = r.json()['meta']['total-pages']
        totalRecords = r.json()['meta']['total']
        print("Total records:", totalRecords)
        print("Total pages:", totalPages)

        # create array from 1 to total number of pages to loop through
        pages = np.arange(1,totalPages+1)
        # set next page url
        if totalPages > 1:
            next_url = r.json()['links']['next']
        else:
            pass
            

        #loop through pages
        for p in pages:
            if p == 1:
                url = 'https://api.datacite.org/events?page[cursor]=1'
            else:
                url = next_url

            # make the API request and print the status code in case of an error
            headers = {'prefix': '10.5285',
                       'page[size]': '1000',
                      '
