In [1]:
import time
import requests
import pandas as pd


In [3]:
relCollectedFromDatasourceId = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.relCollectedFromDatasourceId')
env = 'dev'
access_token = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.access_token')
refresh_token = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.refresh_token')

In [4]:
def refresh_access_token(refresh_token):
    """Obtiene un nuevo access_token usando el refresh_token."""
    refresh_url = f"https://services.openaire.eu/uoa-user-management/api/users/getAccessToken?refreshToken={refresh_token}"
    response = requests.get(refresh_url)
    if response.status_code == 200:
        return response.json().get("access_token")
    else:
        raise Exception(f"Failed to refresh token: {response.status_code}")

def fetch_openaire_researchproduct_collectedfrom_datasource(relCollectedFromDatasourceId, access_token, refresh_token, env):
    cursor = '*'
    base_url = 'https://api.openaire.eu/graph/researchProducts'
    iteration_limit = 5
    iteration_count = 0
    page_size = 50  # Ajustar este valor según sea necesario
    max_retries = 5  # Máximo número de reintentos en caso de error 429
    retry_wait = 5   # Tiempo inicial de espera entre reintentos en segundos
    
    def get_headers():
        return {
            "accept": "application/json",
            'Authorization': f'Bearer {access_token}'
        }
    
    params = {
        "relCollectedFromDatasourceId": relCollectedFromDatasourceId,  # Búsqueda por institución
        "pageSize": page_size,
        "cursor": cursor
    }
    
    while True:
        response = requests.get(base_url, headers=get_headers(), params=params)
        
        # Si el token es inválido o expiró, intentar renovarlo
        if response.status_code == 403:
            print("Access token expired or invalid. Refreshing token...")
            access_token = refresh_access_token(refresh_token)
            continue  # Reintentar la solicitud con el nuevo token
        
        if response.status_code != 200:
            raise Exception(f"Failed to retrieve data: {response.status_code}")
        
        api_response = response.json()
        print(f'Iteration count: {iteration_count}')
        print(f'GET {response.url}')
        
        # Crear DataFrame con las columnas del primer resultado
        df = pd.DataFrame.from_dict(api_response['results'])
        
        # Actualizar cursor
        cursor = api_response['header'].get('nextCursor', None)
        params["cursor"] = cursor
        
        # Bucle para iterar con el cursor
        while cursor:
            if env == 'dev' and iteration_count >= iteration_limit:
                break
            
            iteration_count += 1
            print(f'Iteration count: {iteration_count}')
            print(f'GET {response.url}')
            
            time.sleep(2)
            
            # Reintentos en caso de error 429
            retries = 0
            while retries < max_retries:
                response = requests.get(base_url, headers=get_headers(), params=params)
                
                if response.status_code == 403:
                    print("Access token expired during execution. Refreshing token...")
                    access_token = refresh_access_token(refresh_token)
                    continue  # Reintentar con el nuevo token
                
                if response.status_code == 429:
                    retries += 1
                    print(f"Rate limit hit. Retry {retries}/{max_retries}. Waiting {retry_wait} seconds...")
                    time.sleep(retry_wait)
                    retry_wait *= 2  # Incrementar el tiempo de espera exponencialmente
                else:
                    break
            
            if response.status_code != 200:
                print(f"Failed to retrieve data at iteration {iteration_count}: {response.status_code}")
                break
            
            api_response = response.json()
            
            if not api_response.get('results'):
                print("No more results. Stopping iteration.")
                break
            
            df_tmp = pd.DataFrame.from_dict(api_response['results'])
            df = pd.concat([df, df_tmp])
            
            cursor = api_response['header'].get('nextCursor', None)
            params["cursor"] = cursor
        
        return df, df.head(1000)


In [6]:
df, df_dev = fetch_openaire_researchproduct_collectedfrom_datasource(relCollectedFromDatasourceId, access_token, refresh_token, env)

Access token expired or invalid. Refreshing token...
Iteration count: 0
GET https://api.openaire.eu/graph/researchProducts?relCollectedFromDatasourceId=opendoar____%3A%3A2222f99fe4ee52bacba5f5406ff033aa&pageSize=50&cursor=%2A
Iteration count: 1
GET https://api.openaire.eu/graph/researchProducts?relCollectedFromDatasourceId=opendoar____%3A%3A2222f99fe4ee52bacba5f5406ff033aa&pageSize=50&cursor=%2A
Iteration count: 2
GET https://api.openaire.eu/graph/researchProducts?relCollectedFromDatasourceId=opendoar____%3A%3A2222f99fe4ee52bacba5f5406ff033aa&pageSize=50&cursor=AoI%2FD2RlZHVwX3dmXzAwMjo6MGFkYzJlMjhiMTU1YTQ2NjQzYTIxMDQ5M2IxMmFkODgIP4AAAA%3D%3D
Iteration count: 3
GET https://api.openaire.eu/graph/researchProducts?relCollectedFromDatasourceId=opendoar____%3A%3A2222f99fe4ee52bacba5f5406ff033aa&pageSize=50&cursor=AoI%2FD2RlZHVwX3dmXzAwMjo6MTM2MmE5MjFhY2FjODEzZGE0NjVhYzhiZmFlMzQ5NTkIP4AAAA%3D%3D
Iteration count: 4
GET https://api.openaire.eu/graph/researchProducts?relCollectedFromDatasourceI

In [5]:
df

Unnamed: 0,author,openAccessColor,publiclyFunded,type,language,subjects,mainTitle,description,publicationDate,publisher,format,bestAccessRight,id,originalId,indicators,instance,isGreen,isInDiamondJournal,contributor,country
0,"[{'fullName': 'Miniussi, Claudio L.', 'name': ...",,,other,"{'code': 'Español', 'label': 'Español'}","[{'subject': {'scheme': 'keyword', 'value': 'a...",Aplicacion de la fluorescencia de Rayos X al a...,[La fluorescencia de Rayos X puede ser aplicad...,1969-01-01,Laboratorio de Entrenamiento Multidisciplinari...,"[application/pdf, 11 p.]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",dedup_wf_002::0084e248427bc09d1ea962ee8254febc,"[oai:digital.cic.gba.gob.ar:11746/755, 50|od__...","{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Other ORP type', 'url': ['https://d...",,,,
1,"[{'fullName': 'De Giusti, Marisa Raquel', 'nam...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'C...",2º Taller para la generación de perfiles de in...,[Objetivos del taller: - Entender la necesida...,2018-10-01,,"[application/pdf, 55 diap.]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",dedup_wf_002::00df9d3169981ad636b245c4eb46fd2e,[50|od______9441::40276da60c9d26730d7d2c5d53f9...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Unknown', 'url': ['http://digital.c...",,,,
2,"[{'fullName': 'Pesado, Patricia Mabel', 'name'...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'I...",Calidad en el desarrollo de Sistemas de Software,[El proyecto de investigación y desarrollo “Si...,2006-06-01,,"[application/pdf, 4 p.]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",dedup_wf_002::02486530ee27ea6d8fa443ce0bf3156e,[50|od______9441::d0e0daf7569603798aa06f76cdc1...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Unknown', 'url': ['http://digital.c...",,,,
3,"[{'fullName': 'Almirón, Adriana E.', 'name': '...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'P...","First record of Rineloricaria reisi, R. stella...","[Rineloricaria reisi, R. stellata, and R. zain...",2014-01-01,,"[application/pdf, p. 1528-1530]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",dedup_wf_002::030e478a4a6206520f0c95a165c4582b,[50|od______9441::25284dcd824f3211323f138deb3d...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Unknown', 'url': ['http://digital.c...",,,,
4,"[{'fullName': 'Rascio, Vicente J. D.', 'name':...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'r...",Contribución al estudio del comportamiento de ...,[Formulaciones antiincrustantes fueron estudia...,1971-01-01,Laboratorio de Entrenamiento Multidisciplinari...,"[application/pdf, p. 77-147]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",dedup_wf_002::04d710c7907d0fc982ef0df1528ac550,"[oai:digital.cic.gba.gob.ar:11746/591, 50|od__...","{'citationImpact': {'citationCount': 0.0, 'inf...","[{'license': 'CC BY', 'type': 'Unknown', 'url'...",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,[{'fullName': 'Comisión de Investigaciones Cie...,,,other,"{'code': 'Español', 'label': 'Español'}","[{'subject': {'scheme': 'keyword', 'value': 'A...",Acta n° 1515,[Incluye los siguientes anexos: - Anexo 2.1. B...,2020-11-19,,[application/pdf],"{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",od______9441::02f1a7708a0ad499a99819b6db55f1f3,"[oai:digital.cic.gba.gob.ar:11746/10735, 50|od...","{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Other ORP type', 'url': ['https://d...",,,,
46,"[{'fullName': 'Caprari, Juan J.', 'name': 'Jua...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'I...",Efecto del ensayo de inmersión alternada sobre...,[El presente trabajo es el tercero de una seri...,1980-01-01,,"[application/pdf, p. 119-151]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",od______9441::02fd0e72152b88e7ac2f1bed7fba04bb,[50|od______9441::02fd0e72152b88e7ac2f1bed7fba...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Unknown', 'url': ['http://digital.c...",,,,
47,"[{'fullName': 'Voisin, Axel Iván', 'name': 'Ax...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'A...",Extracción y balance de nutrientes para trigo ...,[El desplazamiento de los sistemas de producci...,2016-09-01,,"[application/pdf, 1 p.]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",od______9441::03110795eb060ab071c876557ba7b7b1,[50|od______9441::03110795eb060ab071c876557ba7...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Unknown', 'url': ['http://digital.c...",,,,
48,"[{'fullName': 'Betancur, T.', 'name': 'T.', 's...",,,other,"{'code': 'und', 'label': 'Undetermined'}","[{'subject': {'scheme': 'keyword', 'value': 'h...",Métodos para evaluar interacciones entre cuerp...,[La comprensión de la dinámica de ecosistemas ...,2012-01-01,,"[application/pdf, 5 p.]","{'code': 'c_abf2', 'label': 'OPEN', 'scheme': ...",od______9441::031137adcb518537962c55dbba14058e,"[oai:digital.cic.gba.gob.ar:11746/4028, 50|od_...","{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Unknown', 'url': ['http://digital.c...",,,,
