In [1]:
from datetime import date
import pandas as pd
import requests
import time

env = 'dev'

filter_param = catalog.load('params:openaire_fetch_options.filter_param')
filter_value = catalog.load('params:openaire_fetch_options.filter_value')
access_token = catalog.load('params:openaire_fetch_options.access_token')
refresh_token = catalog.load('params:openaire_fetch_options.refresh_token')

In [2]:
def refresh_access_token(refresh_token):
    """Obtiene un nuevo access_token usando el refresh_token."""
    refresh_url = f"https://services.openaire.eu/uoa-user-management/api/users/getAccessToken?refreshToken={refresh_token}"
    response = requests.get(refresh_url)
    if response.status_code == 200:
        return response.json().get("access_token")
    else:
        raise Exception(f"Failed to refresh token: {response.status_code}")

In [3]:
def openaire_fetch_researchproduct(filter_param, filter_value, access_token, refresh_token, env):
    cursor = '*'
    base_url = 'https://api.openaire.eu/graph/v2/researchProducts'
    iteration_limit = 5
    iteration_count = 0
    page_size = 50         # Ajustar según sea necesario
    max_retries = 5        # Máximo número de reintentos en caso de error 429
    retry_wait = 5         # Tiempo inicial de espera entre reintentos (segundos)
    max_refresh_attempts = 3  # Máximo número de intentos para refrescar el token
    refresh_attempts = 0

    request_headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {access_token}"
    }

    query_params = {
        filter_param: filter_value,
        "pageSize": page_size,
        "cursor": cursor
    }

    while True:
        response = requests.get(base_url, headers=request_headers, params=query_params)

        # Si el token es inválido o expiró, intentar renovarlo
        if response.status_code == 403:
            if refresh_attempts >= max_refresh_attempts:
                raise Exception("Máximo de intentos para refrescar el token alcanzado. Abortando.")
            print("Access token expired or invalid. Refreshing token...")
            new_token = refresh_access_token(refresh_token)
            if not new_token:
                raise Exception("No se pudo refrescar el access token.")
            access_token = new_token
            request_headers["Authorization"] = f"Bearer {access_token}"
            refresh_attempts += 1
            continue  # Reintenta la solicitud con el nuevo token

        if response.status_code != 200:
            raise Exception(f"Failed to retrieve data: {response.status_code}")

        # Restablecemos el contador de refrescos al tener una respuesta exitosa
        refresh_attempts = 0

        api_response = response.json()
        print(f"Iteration count: {iteration_count}")
        print(f"GET {response.url}")

        # Crear DataFrame con el primer bloque de resultados
        df = pd.DataFrame.from_dict(api_response["results"])

        # Actualizar cursor
        cursor = api_response["header"].get("nextCursor", None)
        query_params["cursor"] = cursor

        # Bucle para iterar con el cursor
        while cursor:
            if env == "dev" and iteration_count >= iteration_limit:
                break

            iteration_count += 1
            print(f"Iteration count: {iteration_count}")
            print(f"GET {response.url}")
            time.sleep(2)

            # Reintentos en caso de error 429
            retries = 0
            while retries < max_retries:
                response = requests.get(base_url, headers=request_headers, params=query_params)

                if response.status_code == 403:
                    if refresh_attempts >= max_refresh_attempts:
                        raise Exception("Máximo de intentos para refrescar el token alcanzado durante la ejecución. Abortando.")
                    print("Access token expired during execution. Refreshing token...")
                    new_token = refresh_access_token(refresh_token)
                    if not new_token:
                        raise Exception("No se pudo refrescar el access token durante la ejecución.")
                    access_token = new_token
                    request_headers["Authorization"] = f"Bearer {access_token}"
                    refresh_attempts += 1
                    continue  # Reintenta con el nuevo token

                if response.status_code == 429:
                    retries += 1
                    print(f"Rate limit hit. Retry {retries}/{max_retries}. Waiting {retry_wait} seconds...")
                    time.sleep(retry_wait)
                    retry_wait *= 2  # Incremento exponencial del tiempo de espera
                else:
                    break

            if response.status_code != 200:
                print(f"Failed to retrieve data at iteration {iteration_count}: {response.status_code}")
                break

            # Restablecer contador de refrescos tras respuesta exitosa
            refresh_attempts = 0

            api_response = response.json()

            if not api_response.get("results"):
                print("No more results. Stopping iteration.")
                break

            df_tmp = pd.DataFrame.from_dict(api_response["results"])
            df = pd.concat([df, df_tmp], ignore_index=True)

            cursor = api_response["header"].get("nextCursor", None)
            query_params["cursor"] = cursor

        df[filter_param] = filter_value

        df['load_datetime'] = date.today()

        return df, df.head(1000)



In [4]:
df, df_dev = openaire_fetch_researchproduct(filter_param, filter_value, access_token, refresh_token, env)

Access token expired or invalid. Refreshing token...
Iteration count: 0
GET https://api.openaire.eu/graph/v2/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=%2A
Iteration count: 1
GET https://api.openaire.eu/graph/v2/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=%2A
Iteration count: 2
GET https://api.openaire.eu/graph/v2/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=AoI%2FD2E5YWM1MGY1NzZhYTo6ODk3OTRhMmFmNzEzMzBkYzNlY2QyOGExNjQwNGExNzIIP4AAAA%3D%3D
Iteration count: 3
GET https://api.openaire.eu/graph/v2/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=AoI%2FD2E5YWM1MGY1NzZhYTo6ZjU3ZmZiZmE4OWI1MGQ3NDU1NTQ0ZTVkOGJkZWM5MmQIP4AAAA%3D%3D
Iteration count: 4
GET https://api.openaire.eu/graph/v2/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d

In [5]:
df

Unnamed: 0,authors,openAccessColor,publiclyFunded,type,language,countries,subjects,mainTitle,subTitle,descriptions,...,indicators,projects,organizations,communities,collectedFrom,instances,isGreen,isInDiamondJournal,relOrganizationId,load_datetime
0,"[{'fullName': 'Susevich, María Laura', 'name':...",gold,False,publication,"{'code': 'spa', 'label': 'Spanish; Castilian'}",,"[{'subject': {'scheme': 'keyword', 'value': 'p...",Dicistrovirus from the pollinator community fo...,,[Los Dicistrovirus son una familia de virus qu...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'National University of La Pamp...,"[{'code': 'knowmad', 'label': 'Knowmad Institu...",[{'key': 'openaire____::0b74b6a356bbf23c245f9a...,"[{'alternateIdentifiers': [{'scheme': 'doi', '...",False,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
1,"[{'fullName': 'Steffen, Kevin Denis', 'name': ...",gold,False,publication,"{'code': 'spa', 'label': 'Spanish; Castilian'}",,"[{'subject': {'scheme': 'keyword', 'value': 'g...",Variability in the growth rates of Saanen kids...,,[This study was carried out to determine the v...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'Universidad Nacional de La Pam...,,[{'key': 'openaire____::0b74b6a356bbf23c245f9a...,"[{'alternateIdentifiers': [{'scheme': 'doi', '...",False,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
2,"[{'fullName': 'Paredes A., Manuel', 'name': 'M...",gold,False,publication,"{'code': 'spa', 'label': 'Spanish; Castilian'}",,"[{'subject': {'scheme': 'keyword', 'value': 'l...",Effects of dietary inclusion of alfalfa meal o...,,[Se utilizaron 240 pollos de engorde hembras H...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'Universidad Nacional de La Pam...,,[{'key': 'openaire____::0b74b6a356bbf23c245f9a...,"[{'alternateIdentifiers': [{'scheme': 'doi', '...",False,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
3,"[{'fullName': 'Evangelista, Carolina', 'name':...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}",,"[{'subject': {'scheme': 'keyword', 'value': 'C...",Estudio de la viabilidad del algoritmo super-t...,,[En este trabajo se presenta el diseño de un c...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",[{'id': 'corda_______::2ce3ff89b03fd4595fb14dc...,[{'legalName': 'Spanish National Research Coun...,,[{'key': 'openaire____::4cb2a3eb94033446c37331...,"[{'pids': [{'scheme': 'handle', 'value': '1026...",True,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
4,"[{'fullName': 'Ferral, Anabella', 'name': 'Ana...",,False,publication,"{'code': 'eng', 'label': 'English'}",,"[{'subject': {'scheme': 'keyword', 'value': 'W...",Bringing satellite and nanotechnologies together,,"[Nowadays, we witness remarkable technological...",...,,,[{'legalName': 'National University of La Plat...,,[{'key': 'openaire____::4cb2a3eb94033446c37331...,"[{'license': 'CC BY', 'accessRight': {'code': ...",True,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,"[{'fullName': 'Pérez, Verónica', 'name': 'Vero...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'code': 'AR', 'label': 'Argentina', 'provena...","[{'subject': {'scheme': 'keyword', 'value': 'M...",A 90 años de la aparición del colectivo : Refl...,,[El artículo aborda el modo como se ha desarro...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'National University of La Plat...,,[{'key': 'opendoar____::dc6e224a8d74ce03bf3011...,"[{'type': 'Article', 'urls': ['http://sedici.u...",True,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
296,"[{'fullName': 'Otaola, Clara', 'name': 'Clara'...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'code': 'AR', 'label': 'Argentina', 'provena...","[{'subject': {'scheme': 'keyword', 'value': 'A...",Procesos de intensificación y análisis tafonóm...,,[En este artículo presentamos los resultados d...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'National Scientific and Techni...,"[{'code': 'knowmad', 'label': 'Knowmad Institu...",[{'key': 'openaire____::df45502607927471ecf8a6...,"[{'type': 'Article', 'urls': ['http://sedici.u...",True,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
297,"[{'fullName': 'Marchegiani, Mariana Rosa', 'na...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'code': 'AR', 'label': 'Argentina', 'provena...","[{'subject': {'scheme': 'keyword', 'value': 'P...",Residencia Docente en tiempos de Pandemia: un ...,,[El Trabajo Final Integrador presenta la propu...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'National University of La Plat...,,[{'key': 'openaire____::df45502607927471ecf8a6...,"[{'type': 'Doctoral thesis', 'urls': ['http://...",True,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
298,"[{'fullName': 'Seré, Pablo Ricardo', 'name': '...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'code': 'AR', 'label': 'Argentina', 'provena...","[{'subject': {'scheme': 'keyword', 'value': 'I...",Efecto de la concentración del silano MTMO sob...,,[Los pretratamientos superficiales a base de s...,...,"{'citationImpact': {'citationCount': 0.0, 'inf...",,[{'legalName': 'National University of La Plat...,,[{'key': 'opendoar____::dc6e224a8d74ce03bf3011...,"[{'type': 'Article', 'urls': ['http://sedici.u...",True,False,openorgs____::40b9f835648a3e0d057d6917dd7e54d5,2025-09-04
