In [1]:
import requests
import pandas as pd
import time
pd.set_option('display.max_columns', None)


In [2]:
institution_ror = catalog.load('params:openalex_fetch_options.institution_ror')
#env = catalog.load('params:fetch_options.env')
env = 'dev'

In [3]:
env

[32m'dev'[0m

In [4]:
import requests
import pandas as pd
import time

def clean_work_dataframe(df):
    """Elimina columnas innecesarias si están presentes."""
    columns_to_drop = {"abstract_inverted_index", "abstract_inverted_index_v3"}
    return df.drop(columns=columns_to_drop.intersection(df.columns), inplace=False)

def fetch_work_openalex(institution_ror, env):
    session = requests.Session()  # Reutilizar la sesión para eficiencia
    base_url = 'https://api.openalex.org/works?filter=institutions.ror:{}&cursor={}&per-page=200'
    cursor = '*'
    iteration_limit = 5
    iteration_count = 0
    all_dataframes = []  # Lista para almacenar los DataFrames antes de concatenar

    while True:
        url = base_url.format(institution_ror, cursor)
        print(f'Iteration count: {iteration_count}')
        print(f'GET {url}')

        try:
            response = session.get(url, timeout=10)
            response.raise_for_status()
            api_response = response.json()
        except requests.RequestException as e:
            print(f"Error en la solicitud: {e}")
            break
        except ValueError:
            print("Error al decodificar JSON.")
            break

        # Si no hay resultados, se termina el bucle
        if 'results' not in api_response or not api_response['results']:
            print("No hay más datos disponibles.")
            break

        df_tmp = pd.DataFrame.from_dict(api_response['results'])
        df_tmp = clean_work_dataframe(df_tmp)
        all_dataframes.append(df_tmp)

        # Actualizar cursor
        cursor = api_response.get('meta', {}).get('next_cursor')
        if not cursor:
            break

        # Control de iteraciones en entorno 'dev'
        iteration_count += 1
        if env == 'dev' and iteration_count >= iteration_limit:
            break

        time.sleep(1)  # Respetar límites de la API

    # Concatenar todos los DataFrames en uno solo
    df = pd.concat(all_dataframes, ignore_index=True) if all_dataframes else pd.DataFrame()

    return df, df.head(1000)


In [5]:
df, df_dev = fetch_work_openalex(institution_ror, env)

Iteration count: 0
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/03cqe8w59&cursor=*&per-page=200


Iteration count: 1
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/03cqe8w59&cursor=IlsxMDAuMCwgNTE3LCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzQzOTU1MzQ5NTInXSI=&per-page=200
Iteration count: 2
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/03cqe8w59&cursor=IlsxMDAuMCwgMzU0LCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzIwMDg3NjI1MDgnXSI=&per-page=200
Iteration count: 3
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/03cqe8w59&cursor=IlsxMDAuMCwgMjcwLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzIwMjY1NDAwMjInXSI=&per-page=200
Iteration count: 4
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/03cqe8w59&cursor=IlsxMDAuMCwgMjE1LCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzI1NTM3NjMyMjgnXSI=&per-page=200


In [6]:
df.head(5)

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,type_crossref,indexed_in,open_access,authorships,institution_assertions,countries_distinct_count,institutions_distinct_count,corresponding_author_ids,corresponding_institution_ids,apc_list,apc_paid,fwci,has_fulltext,fulltext_origin,cited_by_count,citation_normalized_percentile,cited_by_percentile_year,biblio,is_retracted,is_paratext,primary_topic,topics,keywords,concepts,mesh,locations_count,locations,best_oa_location,sustainable_development_goals,grants,datasets,versions,referenced_works_count,referenced_works,related_works,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2140131090,https://doi.org/10.1016/j.foreco.2009.09.001,A global overview of drought and heat-induced ...,A global overview of drought and heat-induced ...,2009,2009-10-22,{'openalex': 'https://openalex.org/W2140131090...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,journal-article,[crossref],"{'is_oa': True, 'oa_status': 'green', 'oa_url'...","[{'author_position': 'first', 'author': {'id':...",[],13,13,[],[],"{'value': 3460, 'currency': 'USD', 'value_usd'...",,85.497,True,pdf,6811,"{'value': 0.99993, 'is_in_top_1_percent': True...","{'min': 99, 'max': 100}","{'volume': '259', 'issue': '4', 'first_page': ...",False,False,"{'id': 'https://openalex.org/T10555', 'display...","[{'id': 'https://openalex.org/T10555', 'displa...",[{'id': 'https://openalex.org/keywords/global-...,"[{'id': 'https://openalex.org/C132651083', 'wi...",[],5,"[{'is_oa': False, 'landing_page_url': 'https:/...","{'is_oa': True, 'landing_page_url': 'https://h...","[{'display_name': 'Climate action', 'id': 'htt...",[],[],[],233,"[https://openalex.org/W1444856208, https://ope...","[https://openalex.org/W4321354035, https://ope...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2025, 'cited_by_count': 28}, {'year'...",2025-02-03T20:11:47.091148,2016-06-24,
1,https://openalex.org/W2762087180,https://doi.org/10.1016/j.cell.2017.09.021,Ferroptosis: A Regulated Cell Death Nexus Link...,Ferroptosis: A Regulated Cell Death Nexus Link...,2017,2017-10-01,{'openalex': 'https://openalex.org/W2762087180...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",review,journal-article,"[crossref, pubmed]","{'is_oa': True, 'oa_status': 'bronze', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",[],6,29,[https://openalex.org/A5030207693],[https://openalex.org/I78577930],"{'value': 10100, 'currency': 'USD', 'value_usd...",,55.273,True,pdf,5203,"{'value': 0.999666, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '171', 'issue': '2', 'first_page': ...",False,False,"{'id': 'https://openalex.org/T11297', 'display...","[{'id': 'https://openalex.org/T11297', 'displa...",[{'id': 'https://openalex.org/keywords/cell-me...,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'descriptor_ui': 'D000818', 'descriptor_name...",6,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...",[{'display_name': 'Good health and well-being'...,[{'funder': 'https://openalex.org/F4320311602'...,[],[],113,"[https://openalex.org/W1561232794, https://ope...","[https://openalex.org/W46528357, https://opena...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2025, 'cited_by_count': 73}, {'year'...",2025-02-03T13:13:18.515620,2017-10-20,
2,https://openalex.org/W2151692850,https://doi.org/10.1111/j.1096-0031.2008.00217.x,"TNT, a free program for phylogenetic analysis","TNT, a free program for phylogenetic analysis",2008,2008-07-11,{'openalex': 'https://openalex.org/W2151692850...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,[crossref],"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",[],3,4,[],[],"{'value': 3810, 'currency': 'USD', 'value_usd'...",,190.857,True,ngrams,5105,"{'value': 0.999928, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '24', 'issue': '5', 'first_page': '...",False,False,"{'id': 'https://openalex.org/T10385', 'display...","[{'id': 'https://openalex.org/T10385', 'displa...","[{'id': 'https://openalex.org/keywords/tree', ...","[{'id': 'https://openalex.org/C61423126', 'wik...",[],2,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...",[],[],[],[],38,"[https://openalex.org/W1527858535, https://ope...","[https://openalex.org/W262455470, https://open...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2025, 'cited_by_count': 10}, {'year'...",2025-02-05T01:39:35.627205,2016-06-24,
3,https://openalex.org/W4211046286,https://doi.org/10.1088/1748-0221/3/08/s08003,The ATLAS Experiment at the CERN Large Hadron ...,The ATLAS Experiment at the CERN Large Hadron ...,2008,2008-08-14,{'openalex': 'https://openalex.org/W4211046286...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,[crossref],"{'is_oa': True, 'oa_status': 'green', 'oa_url'...","[{'author_position': 'first', 'author': {'id':...",[],36,188,[],[],,,53.846,True,pdf,3806,"{'value': 0.999911, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '3', 'issue': '08', 'first_page': '...",False,False,"{'id': 'https://openalex.org/T11044', 'display...","[{'id': 'https://openalex.org/T11044', 'displa...",[{'id': 'https://openalex.org/keywords/atlas-d...,"[{'id': 'https://openalex.org/C87668248', 'wik...",[],9,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...",[],[],[],[],154,"[https://openalex.org/W1578925023, https://ope...","[https://openalex.org/W4385358068, https://ope...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2025, 'cited_by_count': 9}, {'year':...",2025-02-12T09:13:14.545546,2022-02-13,True
4,https://openalex.org/W2130811469,https://doi.org/10.1038/nature14324,Global effects of land use on local terrestria...,Global effects of land use on local terrestria...,2015,2015-03-31,{'openalex': 'https://openalex.org/W2130811469...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,journal-article,"[crossref, pubmed]","{'is_oa': True, 'oa_status': 'green', 'oa_url'...","[{'author_position': 'first', 'author': {'id':...",[],5,15,[https://openalex.org/A5047278242],[https://openalex.org/I164576550],"{'value': 9750, 'currency': 'EUR', 'value_usd'...",,187.999,True,pdf,3462,"{'value': 0.999892, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '520', 'issue': '7545', 'first_page...",False,False,"{'id': 'https://openalex.org/T10005', 'display...","[{'id': 'https://openalex.org/T10005', 'displa...",[{'id': 'https://openalex.org/keywords/rarefac...,"[{'id': 'https://openalex.org/C53565203', 'wik...","[{'descriptor_ui': 'D044822', 'descriptor_name...",12,"[{'is_oa': False, 'landing_page_url': 'https:/...","{'is_oa': True, 'landing_page_url': 'http://hd...",[],[],[],[],383,"[https://openalex.org/W103754345, https://open...","[https://openalex.org/W2968623938, https://ope...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2025, 'cited_by_count': 25}, {'year'...",2025-02-11T23:42:13.644403,2016-06-24,
