In [1]:
import time
import requests
import pandas as pd


In [2]:
filter = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.filter')
filter_value = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.filter_value')
env = 'dev'
access_token = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.access_token')
refresh_token = catalog.load('params:openaire_researchproduct_collectedfrom_datasource_fetch_options.refresh_token')

In [3]:
def refresh_access_token(refresh_token):
    """Obtiene un nuevo access_token usando el refresh_token."""
    refresh_url = f"https://services.openaire.eu/uoa-user-management/api/users/getAccessToken?refreshToken={refresh_token}"
    response = requests.get(refresh_url)
    if response.status_code == 200:
        return response.json().get("access_token")
    else:
        raise Exception(f"Failed to refresh token: {response.status_code}")

def fetch_openaire_graph_researchproduct(filter, filter_value, access_token, refresh_token, env):
    cursor = '*'
    base_url = 'https://api.openaire.eu/graph/researchProducts'
    iteration_limit = 5
    iteration_count = 0
    page_size = 50  # Ajustar este valor según sea necesario
    max_retries = 5  # Máximo número de reintentos en caso de error 429
    retry_wait = 5   # Tiempo inicial de espera entre reintentos en segundos
    
    def get_headers():
        return {
            "accept": "application/json",
            'Authorization': f'Bearer {access_token}'
        }
    
    params = {
        filter: filter_value,  # Búsqueda por institución
        "pageSize": page_size,
        "cursor": cursor
    }
    
    while True:
        response = requests.get(base_url, headers=get_headers(), params=params)
        
        # Si el token es inválido o expiró, intentar renovarlo
        if response.status_code == 403:
            print("Access token expired or invalid. Refreshing token...")
            access_token = refresh_access_token(refresh_token)
            continue  # Reintentar la solicitud con el nuevo token
        
        if response.status_code != 200:
            raise Exception(f"Failed to retrieve data: {response.status_code}")
        
        api_response = response.json()
        print(f'Iteration count: {iteration_count}')
        print(f'GET {response.url}')
        
        # Crear DataFrame con las columnas del primer resultado
        df = pd.DataFrame.from_dict(api_response['results'])
        
        # Actualizar cursor
        cursor = api_response['header'].get('nextCursor', None)
        params["cursor"] = cursor
        
        # Bucle para iterar con el cursor
        while cursor:
            if env == 'dev' and iteration_count >= iteration_limit:
                break
            
            iteration_count += 1
            print(f'Iteration count: {iteration_count}')
            print(f'GET {response.url}')
            
            time.sleep(2)
            
            # Reintentos en caso de error 429
            retries = 0
            while retries < max_retries:
                response = requests.get(base_url, headers=get_headers(), params=params)
                
                if response.status_code == 403:
                    print("Access token expired during execution. Refreshing token...")
                    access_token = refresh_access_token(refresh_token)
                    continue  # Reintentar con el nuevo token
                
                if response.status_code == 429:
                    retries += 1
                    print(f"Rate limit hit. Retry {retries}/{max_retries}. Waiting {retry_wait} seconds...")
                    time.sleep(retry_wait)
                    retry_wait *= 2  # Incrementar el tiempo de espera exponencialmente
                else:
                    break
            
            if response.status_code != 200:
                print(f"Failed to retrieve data at iteration {iteration_count}: {response.status_code}")
                break
            
            api_response = response.json()
            
            if not api_response.get('results'):
                print("No more results. Stopping iteration.")
                break
            
            df_tmp = pd.DataFrame.from_dict(api_response['results'])
            df = pd.concat([df, df_tmp])
            
            cursor = api_response['header'].get('nextCursor', None)
            params["cursor"] = cursor
        
        return df, df.head(1000)


In [4]:
df, df_dev = fetch_openaire_graph_researchproduct(filter, filter_value, access_token, refresh_token, env)

Access token expired or invalid. Refreshing token...
Iteration count: 0
GET https://api.openaire.eu/graph/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=%2A
Iteration count: 1
GET https://api.openaire.eu/graph/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=%2A
Iteration count: 2
GET https://api.openaire.eu/graph/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=AoI%2FD2RlZHVwX3dmXzAwMjo6MDAzOTVjZDIxYTA1ZDI0YTBhMjc3ZGRlNzBlM2MyZjQIP4AAAA%3D%3D
Iteration count: 3
GET https://api.openaire.eu/graph/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pageSize=50&cursor=AoI%2FD2RlZHVwX3dmXzAwMjo6MDA3MTZmMjkxNGE0NGMyMzA2YWI4MDMzMjgwZWQ1ODAIP4AAAA%3D%3D
Iteration count: 4
GET https://api.openaire.eu/graph/researchProducts?relOrganizationId=openorgs____%3A%3A40b9f835648a3e0d057d6917dd7e54d5&pa

In [5]:
df

Unnamed: 0,author,openAccessColor,publiclyFunded,type,language,subjects,mainTitle,description,publicationDate,publisher,...,id,originalId,indicators,instance,isGreen,isInDiamondJournal,contributor,pid,country,contactPerson
0,"[{'fullName': 'Steffen, Kevin Denis', 'name': ...",gold,False,publication,"{'code': 'spa', 'label': 'Spanish; Castilian'}","[{'subject': {'scheme': 'keyword', 'value': 'g...",Variability in the growth rates of Saanen kids...,[This study was carried out to determine the v...,2023-12-18,"Universidad Nacional Mayor de San Marcos, Facu...",...,4dc99724cf04::95ea5df70a451a0487e051faa6c0a646,"[oai:ojs.csi.unmsm:article/25240, 50|4dc99724c...","{'citationImpact': {'citationCount': 0.0, 'inf...","[{'alternateIdentifier': [{'scheme': 'doi', 'v...",False,False,,,,
1,"[{'fullName': 'Evangelista, Carolina', 'name':...",,False,publication,"{'code': 'Español', 'label': 'Español'}","[{'subject': {'scheme': 'keyword', 'value': 'C...",Estudio de la viabilidad del algoritmo super-t...,[En este trabajo se presenta el diseño de un c...,2013-01-01,,...,RECOLECTA___::24cb4438d1afe299e63cfdea4a31911f,[50|RECOLECTA___::24cb4438d1afe299e63cfdea4a31...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'pid': [{'scheme': 'handle', 'value': '10261...",True,False,"[European Commission, Ministerio de Economía y...","[{'scheme': 'handle', 'value': '10261/97737'}]",,
2,"[{'fullName': 'Mostoghiu, Robert', 'name': 'Ro...",,False,publication,"{'code': 'eng', 'label': 'English'}","[{'subject': {'scheme': 'keyword', 'value': 'S...",The Three Hundred project: The gas disruption ...,[We analyse the gas content evolution of infal...,2021-03-01,Oxford University Press (OUP),...,core_ac_uk__::a391293fe8bd6c9c6c203ee15e8c2e8f,[oai:nottingham-repository.worktribe.com:53501...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Article', 'url': ['https://nottingh...",True,False,,,,
3,"[{'fullName': 'Rubio, Damián', 'name': 'Damiá...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'subject': {'scheme': 'keyword', 'value': 'G...",Evolución del sistema de gestión de incidentes...,[Esta tesina detalla el desarrollo de un siste...,2023-06-23,,...,dedup_wf_002::00004d08998bcb6f35ce5d53811f12e9,"[oai:sedici.unlp.edu.ar:10915/154923, 50|od___...","{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Doctoral thesis', 'url': ['http://s...",True,False,"[Lanfranco, Einar Felipe, Venosa, Paula]",,"[{'code': 'AR', 'label': 'Argentina', 'provena...",
4,"[{'fullName': 'Ojeda Hidalgo, José F.', 'name'...",gold,False,publication,"{'code': 'eng', 'label': 'English'}","[{'subject': {'scheme': 'keyword', 'value': 'A...",Responsabilidad Social Corporativa en Empresas...,[El presente artículo presenta un conjunto de ...,2013-12-01,Universidad Nacional de La Plata,...,dedup_wf_002::0000eb33b6be05d67799615dd117e5b2,[oai:doaj.org/article:a86e0a7dd7ad4000ae40ad07...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Article', 'url': ['https://doaj.org...",False,False,,,"[{'code': 'AR', 'label': 'Argentina', 'provena...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,"[{'fullName': 'Lasta, Jorge Augusto', 'name': ...",,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'subject': {'scheme': 'keyword', 'value': 'C...",Respuesta inmunológica de bovinos vacunados si...,[Se estudia la respuesta inmunológica de bovin...,1978-01-01,,...,dedup_wf_002::01504e314a4f352cc244d64c6cb72a79,[50|od______1329::3d938f84a5fcb447e48259a71e15...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Doctoral thesis', 'url': ['http://s...",True,False,"[Gimeno, Emilio Juan]",,"[{'code': 'AR', 'label': 'Argentina', 'provena...",
46,[{'fullName': 'Revista Institucional de la Fac...,,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'subject': {'scheme': 'keyword', 'value': 'C...","Workshop: ""Fluctuaciones macroeconómicas y cri...",[Perry G. Mehrling es doctor en Economía de la...,2014-12-01,,...,dedup_wf_002::0150d22e4775af3c7965c14921f4176f,[50|od______1329::bd16c62b0c656e595ffdbc725b03...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Article', 'url': ['http://sedici.un...",True,False,,,"[{'code': 'AR', 'label': 'Argentina', 'provena...",
47,"[{'fullName': 'Cei, José Miguel Alfredo María'...",,False,publication,"{'code': 'eng', 'label': 'English'}","[{'subject': {'scheme': 'keyword', 'value': 'A...",Primeros apuntes ecológicos y herpetológicos s...,[Geological and ecological aspects of an extra...,1970-01-01,,...,dedup_wf_002::0152767df678ac928375ebd4d6079725,[50|od______1329::ef5d80f7bdd894c3807786e0c048...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Article', 'url': ['http://sedici.un...",False,False,,,"[{'code': 'AR', 'label': 'Argentina', 'provena...",
48,"[{'fullName': 'Cilento, Laura', 'name': 'Laura...",gold,False,publication,"{'code': 'esl/spa', 'label': 'Spanish'}","[{'subject': {'scheme': 'keyword', 'value': 'e...","“Serie del Encuentro”, primera colección del C...",[La reflexión sobre la práctica editorial y lo...,2013-01-01,Universidad Nacional de La Plata,...,dedup_wf_002::0152976d6d89c7781aa7447192d99498,[oai:doaj.org/article:637f323d009e4f64be0e3ae6...,"{'citationImpact': {'citationCount': 0.0, 'inf...","[{'type': 'Article', 'url': ['https://doaj.org...",True,False,,,"[{'code': 'AR', 'label': 'Argentina', 'provena...",
