In [1]:
from datetime import date
import requests
import pandas as pd
import time
pd.set_option('display.max_columns', None)


In [2]:
institution_ror = catalog.load('params:openalex_fetch_options.institution_ror')
#env = catalog.load('params:fetch_options.env')
env = 'dev'

In [3]:
env

[32m'dev'[0m

In [4]:
def clean_work_dataframe(df):
    """Elimina columnas innecesarias si están presentes."""
    columns_to_drop = {"abstract_inverted_index", "abstract_inverted_index_v3"}
    return df.drop(columns=columns_to_drop.intersection(df.columns), inplace=False)

def fetch_openalex_work(institution_ror, env):
    session = requests.Session()  # Reutilizar la sesión para eficiencia
    base_url = 'https://api.openalex.org/works?filter=institutions.ror:{}&cursor={}&per-page=200'
    cursor = '*'
    iteration_limit = 5
    iteration_count = 0
    all_dataframes = []  # Lista para almacenar los DataFrames antes de concatenar

    while True:
        url = base_url.format(institution_ror, cursor)
        print(f'Iteration count: {iteration_count}')
        print(f'GET {url}')

        try:
            response = session.get(url, timeout=10)
            response.raise_for_status()
            api_response = response.json()
        except requests.RequestException as e:
            print(f"Error en la solicitud: {e}")
            break
        except ValueError:
            print("Error al decodificar JSON.")
            break

        # Si no hay resultados, se termina el bucle
        if 'results' not in api_response or not api_response['results']:
            print("No hay más datos disponibles.")
            break

        df_tmp = pd.DataFrame.from_dict(api_response['results'])
        df_tmp = clean_work_dataframe(df_tmp)
        all_dataframes.append(df_tmp)

        # Actualizar cursor
        cursor = api_response.get('meta', {}).get('next_cursor')
        if not cursor:
            break

        # Control de iteraciones en entorno 'dev'
        iteration_count += 1
        if env == 'dev' and iteration_count >= iteration_limit:
            break

        time.sleep(1)  # Respetar límites de la API

    # Concatenar todos los DataFrames en uno solo
    df = pd.concat(all_dataframes, ignore_index=True) if all_dataframes else pd.DataFrame()

    df['load_datetime'] = date.today()

    return df, df.head(1000)


In [5]:
df, df_dev = fetch_openalex_work(institution_ror, env)

Iteration count: 0
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/01tjs6929&cursor=*&per-page=200
Iteration count: 1
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/01tjs6929&cursor=IlsxMDAuMCwgMjIyLCAnaHR0cHM6Ly9vcGVuYWxleC5vcmcvVzIxNDIxMTk3NjMnXSI=&per-page=200
Iteration count: 2
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/01tjs6929&cursor=Ils5OS4wLCAyMDksICdodHRwczovL29wZW5hbGV4Lm9yZy9XMjAxODcwMjkyNSddIg==&per-page=200
Iteration count: 3
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/01tjs6929&cursor=Ils5OS4wLCAxNDAsICdodHRwczovL29wZW5hbGV4Lm9yZy9XMzA0MTIzMDQyMyddIg==&per-page=200
Iteration count: 4
GET https://api.openalex.org/works?filter=institutions.ror:https://ror.org/01tjs6929&cursor=Ils5OS4wLCAxMDYsICdodHRwczovL29wZW5hbGV4Lm9yZy9XMjc0ODE1NjAxNSddIg==&per-page=200


In [6]:
df.head(5)

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,type_crossref,indexed_in,open_access,authorships,institution_assertions,countries_distinct_count,institutions_distinct_count,corresponding_author_ids,corresponding_institution_ids,apc_list,apc_paid,fwci,is_authors_truncated,has_fulltext,fulltext_origin,cited_by_count,citation_normalized_percentile,cited_by_percentile_year,biblio,is_retracted,is_paratext,primary_topic,topics,keywords,concepts,mesh,locations_count,locations,best_oa_location,sustainable_development_goals,grants,datasets,versions,referenced_works_count,referenced_works,related_works,cited_by_api_url,counts_by_year,updated_date,created_date,load_datetime
0,https://openalex.org/W4211046286,https://doi.org/10.1088/1748-0221/3/08/s08003,The ATLAS Experiment at the CERN Large Hadron ...,The ATLAS Experiment at the CERN Large Hadron ...,2008,2008-08-14,{'openalex': 'https://openalex.org/W4211046286...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,[crossref],"{'is_oa': True, 'oa_status': 'green', 'oa_url'...","[{'author_position': 'first', 'author': {'id':...",[],36,188,[],[],,,53.865,True,True,pdf,3903,"{'value': 0.999907, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '3', 'issue': '08', 'first_page': '...",False,False,"{'id': 'https://openalex.org/T11044', 'display...","[{'id': 'https://openalex.org/T11044', 'displa...",[{'id': 'https://openalex.org/keywords/atlas-d...,"[{'id': 'https://openalex.org/C87668248', 'wik...",[],9,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...",[],[],[],[],154,"[https://openalex.org/W1578925023, https://ope...","[https://openalex.org/W4385358068, https://ope...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2025, 'cited_by_count': 95}, {'year'...",2025-09-06T09:41:56.891629,2022-02-13,2025-09-08
1,https://openalex.org/W4292403095,https://doi.org/10.1051/0004-6361:20041864,The Leiden/Argentine/Bonn (LAB) Survey of Gala...,The Leiden/Argentine/Bonn (LAB) Survey of Gala...,2005,2005-08-29,{'openalex': 'https://openalex.org/W4292403095...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"[arxiv, crossref, datacite]","{'is_oa': True, 'oa_status': 'bronze', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",[],2,4,[],[],,,20.876,,True,pdf,3232,"{'value': 0.999902, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '440', 'issue': '2', 'first_page': ...",False,False,"{'id': 'https://openalex.org/T10818', 'display...","[{'id': 'https://openalex.org/T10818', 'displa...",[{'id': 'https://openalex.org/keywords/galacti...,"[{'id': 'https://openalex.org/C121332964', 'wi...",[],8,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...",[],[],[],[https://openalex.org/W4292403095],7,"[https://openalex.org/W1539512060, https://ope...","[https://openalex.org/W4296758524, https://ope...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2025, 'cited_by_count': 37}, {'year'...",2025-09-01T06:55:20.685347,2022-08-20,2025-09-08
2,https://openalex.org/W2501864044,https://doi.org/10.1093/mnras/stx721,The clustering of galaxies in the completed SD...,The clustering of galaxies in the completed SD...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2501864044...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"[arxiv, crossref, datacite]","{'is_oa': True, 'oa_status': 'bronze', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",[],12,57,"[https://openalex.org/A5102897520, https://ope...","[https://openalex.org/I148283060, https://open...","{'value': 2310, 'currency': 'GBP', 'value_usd'...",,118.1,,True,pdf,2768,"{'value': 0.999951, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '470', 'issue': '3', 'first_page': ...",False,False,"{'id': 'https://openalex.org/T10026', 'display...","[{'id': 'https://openalex.org/T10026', 'displa...",[{'id': 'https://openalex.org/keywords/baryon-...,"[{'id': 'https://openalex.org/C121332964', 'wi...",[],21,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...",[],[],[],[https://openalex.org/W2501864044],234,"[https://openalex.org/W1483545773, https://ope...","[https://openalex.org/W3148180306, https://ope...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2025, 'cited_by_count': 209}, {'year...",2025-09-02T08:13:28.728906,2016-08-23,2025-09-08
3,https://openalex.org/W2098406858,,The ATLAS Experiment at the CERN Large Hadron ...,The ATLAS Experiment at the CERN Large Hadron ...,2020,2020-02-23,{'openalex': 'https://openalex.org/W2098406858...,en,"{'is_oa': False, 'landing_page_url': 'https://...",book,book,[],"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...","[{'author_position': 'first', 'author': {'id':...",[],32,170,[],[],,,19.933,True,False,,2468,"{'value': 0.671875, 'is_in_top_1_percent': Fal...","{'min': 99, 'max': 100}","{'volume': None, 'issue': None, 'first_page': ...",False,False,"{'id': 'https://openalex.org/T10048', 'display...","[{'id': 'https://openalex.org/T10048', 'displa...",[{'id': 'https://openalex.org/keywords/atlas-d...,"[{'id': 'https://openalex.org/C87668248', 'wik...",[],1,"[{'is_oa': False, 'landing_page_url': 'https:/...",,[],[],[],[],220,"[https://openalex.org/W1578925023, https://ope...","[https://openalex.org/W3121642222, https://ope...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 12}, {'year'...",2025-09-08T09:47:59.233631,2016-06-24,2025-09-08
4,https://openalex.org/W2012331775,https://doi.org/10.1038/nature10158,Genome sequence and analysis of the tuber crop...,Genome sequence and analysis of the tuber crop...,2011,2011-07-01,{'openalex': 'https://openalex.org/W2012331775...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"[crossref, pubmed]","{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",[],12,19,[],[],"{'value': 9750, 'currency': 'EUR', 'value_usd'...","{'value': 9750, 'currency': 'EUR', 'value_usd'...",148.969,,True,ngrams,2028,"{'value': 0.999978, 'is_in_top_1_percent': Tru...","{'min': 99, 'max': 100}","{'volume': '475', 'issue': '7355', 'first_page...",False,False,"{'id': 'https://openalex.org/T11771', 'display...","[{'id': 'https://openalex.org/T11771', 'displa...",[{'id': 'https://openalex.org/keywords/sequenc...,"[{'id': 'https://openalex.org/C137580998', 'wi...","[{'descriptor_ui': 'D018745', 'descriptor_name...",6,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/2', 'disp...",[],[],[],55,"[https://openalex.org/W1214546200, https://ope...","[https://openalex.org/W3178279768, https://ope...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2025, 'cited_by_count': 53}, {'year'...",2025-09-01T11:16:46.729607,2016-06-24,2025-09-08
