In [1]:
from re import match, search, compile, I
import requests
import xml.etree.ElementTree as etree

# Step 1 - Clean list of DOIs

DOIs should be provided in a valid format, but often are entered as URLs,
prepended by `https://doi.org`. Sometimes, urls to assets are provided which
are not valid DOIs, and these should raise a warning, but otherwise be ignored.

In [31]:
# Read in a list of DOIs from a csv file
file_path = 'list_of_doi.csv'
with open(file_path, 'r') as csv_file:
    dois = []
    for line in csv_file:
        dois.append(line.strip())

In [32]:
# Use regex pattern from https://www.crossref.org/blog/dois-and-matching-regular-expressions/
pattern = compile("10.\d{4,9}\/[-._;()\/:A-Z0-9]+", I)

valid = []
# Iterate over the list of possible DOIs and return valid, otherwise raise a warning
for doi in dois:
    match = pattern.search(doi)
    if match:
#         print( f"{match.group()} is a valid DOI.")
        valid.append(match.group())
    else:
        print(f"{doi} is not a DOI.")

# Step 2 - Prepare query string

In [162]:
query = f"?format=json&doi={','.join(valid[0:20])}"
# query = f"?format=json&doi=10.1038/s41597-023-02529-w"

In [163]:
query

'?format=json&doi=10.1016/j.esr.2021.100799,10.21203/rs.3.rs-1160025/v1,10.5281/zenodo.4728143,10.5281/zenodo.4650810,10.5281/zenodo.4725486,10.5281/zenodo.4650986,10.5281/zenodo.4650942,10.5281/zenodo.4725445,10.5281/zenodo.4650822,10.5281/zenodo.4650968,10.5281/zenodo.4725466,10.5281/zenodo.4737634,10.5281/zenodo.4737640,10.5281/zenodo.4725462,10.5281/zenodo.4652804,10.5281/zenodo.4650904,10.5281/zenodo.4725456,10.5281/zenodo.4737638,10.5281/zenodo.4650876,10.5281/zenodo.4737642'

# Step 3 - Retrieve matching results from OpenAire

In [164]:
token = "eyJraWQiOiJvaWRjIiwiYWxnIjoiUlMyNTYifQ.eyJzdWIiOiIwMzQxMzE5Mjk2MjIwNzlAb3BlbmFpcmUuZXUiLCJhenAiOiI3Njc0MjJiOS01NDYxLTQ4MDctYTgwYS1mOWEyMDcyZDNhN2QiLCJpc3MiOiJodHRwczpcL1wvYWFpLm9wZW5haXJlLmV1XC9vaWRjXC8iLCJleHAiOjE3MTAxNjk4MjYsImlhdCI6MTcxMDE2NjIyNiwianRpIjoiMmQxMDNhNGUtMDk4Yy00MzI3LTlkM2YtNDU2NzMxOGJhNGZmIn0.MvF-81woYQa0nPcCUn_ZRiIUrvkUR5nUpqgQGBU9K_sYFUmGMzLlTJhSE3nMRXbvD6YKXrkwAgV7-jlw43w6K1JluczPRpuwdZKgUHMl93dvW27eMuWTsGiVfw5sApFHBsNNZ4WW3_HmT_AOLNNQ5AgTe26BqyPb0wuTaalW1C1KCKZ8adT79QjLqCqlF1ms4hNj_DJ2uAO2R9e7A9nVXxey1CLpm81Kx3OUl-NSFzfJCVQMN61IrTEQ5PHmibW1dWUgfZYoncMpeFUfEfcJXG1el63o4LHIqoqci91_o82cjPBYYj40-fsJvMH0KnYBYhCJ0J0Ap2CIHca0LT0vPw"
headers = { "Authorization": f"Bearer {token}" }

api_url = "https://api.openaire.eu/search/researchProducts"
response = requests.get(api_url + query, headers=headers)
response

<Response [200]>

In [165]:
response.headers["Content-Type"]

'application/json;charset=UTF-8'

In [166]:
response.json()

{'response': {'header': {'query': {'$': '(oaftype exact result) and ((pidclassid exact "doi" and pid exact "10.1016/j.esr.2021.100799") or (pidclassid exact "doi" and pid exact "10.21203/rs.3.rs-1160025/v1") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4728143") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4650810") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4725486") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4650986") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4650942") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4725445") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4650822") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4650968") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4725466") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4737634") or (pidclassid exact "doi" and pid exact "10.5281/zenodo.4737640") or (pidclassid exact "doi" and pid exact "10.5281/zenod

In [167]:
for x in response.json()['response']['results']['result']:
    print(x['metadata']['oaf:entity']['oaf:result'])

{'collectedfrom': [{'@name': 'Crossref', '@id': 'openaire____::081b82f96300b6a6e3d282bad31cb6e2'}, {'@name': 'UnpayWall', '@id': 'openaire____::8ac8380272269217cb09a928c8caa993'}], 'originalId': [{'$': '10.21203/rs.3.rs-1160025/v1'}, {'$': '50|doiboost____::642e67d314304eb5dd5c5eb991372b4b'}], 'pid': {'@classid': 'doi', '@classname': 'Digital Object Identifier', '@schemeid': 'dnet:pid_types', '@schemename': 'dnet:pid_types', '$': '10.21203/rs.3.rs-1160025/v1'}, 'measure': [{'@id': 'influence', '@score': '3.317285E-9', '@class': 'C5'}, {'@id': 'popularity', '@score': '3.0997787E-9', '@class': 'C5'}, {'@id': 'influence_alt', '@score': '0', '@class': 'C5'}, {'@id': 'popularity_alt', '@score': '0.0', '@class': 'C5'}, {'@id': 'impulse', '@score': '0', '@class': 'C5'}], 'title': {'@classid': 'main title', '@classname': 'main title', '@schemeid': 'dnet:dataCite_title', '@schemename': 'dnet:dataCite_title', '$': 'Future low-carbon electricity in Africa: how much material is needed?'}, 'bestacc

In [178]:
import re
from html import unescape

CLEANR = re.compile('<.*?>')

def clean_html(raw_html):
    """Remove HTML markup from a string
    """
    cleantext = re.sub(CLEANR, '', raw_html)
    return unescape(cleantext)

for result in response.json()['response']['results']['result']:

    entity = result['metadata']['oaf:entity']['oaf:result']

    title = entity['title']
    if isinstance(title, list):
        count = 0
        for x in title:
            count += 1
            print(f"{count}: {x}")
            if x['@classid'] == 'main title':
                print(x['$'])
            else:
                pass
    else:
        print(title['$'])

    # print(entity.keys())

    authors = entity['creator']

    publisher = entity['publisher']

    journal = entity.get('journal', {'$': ''})

    abstract = entity.get('description', {'$': ''})

    try:
        all_authors = []
        for x in authors:
            name = x.get("$", "No name")
            orcid = x.get('@orcid', "No ORCID")
            all_authors.append(f"{name} ({orcid})")
        print(", ".join(all_authors))
    except TypeError as ex:
        print(authors['$'])

    print(publisher['$'])
    print(journal['$'])
    print(clean_html(abstract['$']))


Future low-carbon electricity in Africa: how much material is needed?
Karla Cervantes Barron (0000-0001-9185-3022), Maaike E Hakker (No ORCID), Jonathan M Cullen (0000-0003-4347-5025)
Research Square Platform LLC

Abstract         African countries are expected to experience some of the worst climate effects, while trying to provide higher electricity access and increase wellbeing.Concrete, steel, and aluminium pre­sent the largest opportunities for action, given their high mass or embodied emissions projections.Embodied emissions related to material use for electricity plants are evaluated in three scenarios: a refer­ence scenario, and two scenarios related to the Paris Agreement (where renewable energy increases), resulting in higher embodied emissions as renewables are integrated.Pursuing strategies to increase the use of renewables should be done along material efficiency strategies to reach the total low-carbon potential.
CCG Starter Data Kit: Algeria
Cannone, Carla (No ORCID), Al