In [7]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import date

In [8]:
base_url = catalog.load('params:oai_fetch_options.base_url')
context = catalog.load('params:oai_fetch_options.context')
env = 'dev'

In [9]:
base_url

[32m'https://ri.conicet.gov.ar/oai/'[0m

In [None]:
def get_oai_records(base_url):
    start_time = time.time()

    response = requests.get(base_url)
    end_time = time.time()
    elapsed_time = end_time - start_time

    print(f"Sleeping for {elapsed_time:.2f} seconds")
    time.sleep(elapsed_time)

    if response.status_code == 200:
        return response
    else:
        print(f"Error: {response.status_code}")
        return None

def parse_oai_response(xml_data):
    root = ET.fromstring(xml_data)
    namespaces = {'oai': 'http://www.openarchives.org/OAI/2.0/',
                  'dc': 'http://purl.org/dc/elements/1.1/'}

    records = []
    for record in root.findall('.//oai:record', namespaces):
        metadata = record.find('.//oai:metadata', namespaces)
        if metadata is not None:
            record_dict = {}
            for elem in metadata.findall('.//dc:*', namespaces):
                tag = elem.tag.split('}')[-1]
                if tag in record_dict:
                    record_dict[tag].append(elem.text)
                else:
                    record_dict[tag] = [elem.text]
            records.append(record_dict)

    return records

def oai_extract_items(base_url, context, env):
    
    resumption_token = '0'
    params = f'/{context}?verb=ListRecords&resumptionToken=oai_dc////'
    url = base_url + params + str(resumption_token)
    items = []
    resumption_token = 0
    url = base_url + params + str(resumption_token)

    response = get_oai_records(url)
    records = parse_oai_response(response.text)
    record_size = len(records) 
    items.extend(records)

    iteration_limit = 2
    iteration_count = 0

    while record_size > 0:

        if env == 'dev' and iteration_count >= iteration_limit:
            break

        record_size = len(records)
        resumption_token += 100
        url = base_url + params + str(resumption_token)
        print(url)
        response = get_oai_records(url)
        records = parse_oai_response(response.text)
        items.extend(records)
        iteration_count += 1

    df = pd.DataFrame(items)
    return df


In [None]:
df = oai_extract_items(base_url, context, env)

Sleeping for 0.24 seconds
https://ri.conicet.gov.ar/oai//request?verb=ListRecords&resumptionToken=oai_dc////100
Sleeping for 0.24 seconds
https://ri.conicet.gov.ar/oai//request?verb=ListRecords&resumptionToken=oai_dc////200
Sleeping for 0.20 seconds


In [12]:
df

Unnamed: 0,identifier,title,creator,subject,description,date,type,language,relation,rights,format,publisher,contributor,coverage,source
0,"[http://hdl.handle.net/11336/179477, Cassiodor...",[Paisajes mesetarios en Patagonia: Tecnología ...,"[Cassiodoro, Gisela Eva]","[MESETAS, TECNOLOGÍA, USO DEL ESPACIO, LOGÍSTI...",[La disponibilidad de recursos hídricos en Pat...,[2016-12],"[info:eu-repo/semantics/article, info:ar-repo/...",[spa],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf]",[Instituto Nacional de Antropología y Pensamie...,,,
1,"[http://hdl.handle.net/11336/116972, Nuñez, Pa...",[Distancias entre la ecología y la praxis ambi...,"[Nuñez, Paula Gabriela]","[ECOLOGÍA, PRAXIS AMBIENTAL, ECOFEMINISMO, TEO...",[Este libro recorre un problema que cotidianam...,[2011],"[info:eu-repo/semantics/publishedVersion, info...",[spa],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf, application...",[Universidad Nacional de La Plata],,,
2,"[http://hdl.handle.net/11336/23263, Lipoma, Ma...",[Plant community resilience in the face of fir...,"[Lipoma, Maria Lucrecia, Gurvich, Diego Ezequi...","[Firefire, Functional Redundancy, Plan Functio...",[The ability of communities or ecosystems to r...,[2016-02],"[info:eu-repo/semantics/article, info:ar-repo/...",[eng],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf, application...","[Wiley Blackwell Publishing, Inc]",,,
3,"[http://hdl.handle.net/11336/215533, Marchetti...",[La puesta en acto de las políticas de formaci...,"[Marchetti, Braian, Bazán, Sonia]","[POLÍTICAS PÚBLICAS EDUCATIVAS, FORMACIÓN DOCE...",[El presente artículo recupera las conclusione...,[2022-11],"[info:eu-repo/semantics/article, info:ar-repo/...",[spa],[info:eu-repo/semantics/altIdentifier/doi/10.3...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf]",[Universidad de Buenos Aires. Facultad de Filo...,,,
4,"[http://hdl.handle.net/11336/109802, Giovambat...",[Introducción a la Genética Forense No-Humana],"[Giovambattista, Guillermo, Barrientos, Laura ...","[GENÉTICA FORENSE, IDENTIFICACIÓN GENÉTICA, ID...",[El presente libro nace como producto de una c...,[2015],"[info:eu-repo/semantics/publishedVersion, info...",[spa],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf, application...",[Universidad Nacional de La Plata. Facultad de...,"[Peral Garcia, Pilar, Giovambattista, Guillerm...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,"[http://hdl.handle.net/11336/88817, Martin, An...",[Prospective multicentre evaluation of the dir...,"[Martin, Anandi, Imperiale, Belén Rocío, Ravol...","[MULTIDRUG RESISTANCE, MYCOBACTERIUM TUBERCULO...",[Objectives: To perform a multicentre study ev...,[2014-02],"[info:eu-repo/semantics/article, info:ar-repo/...",[eng],[info:eu-repo/semantics/altIdentifier/doi/10.1...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf]",[Oxford University Press],,,
291,"[http://hdl.handle.net/11336/11072, Espinosa, ...",[Una iglesia primitiva e internacional: el per...,"[Espinosa, Mariana Esther]","[HERMANOS LIBRES, MISIONES, INTERNACIONALISMO,...",[Este artículo busca aportar conocimiento sobr...,[2014-01],"[info:eu-repo/semantics/article, info:ar-repo/...",[spa],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf]",[Asociación de Cientistas Sociales de la Relig...,,,
292,"[http://hdl.handle.net/11336/173007, Schaller,...",[Reorganización agraria y expansión territoria...,"[Schaller, Enrique Cesar, Almiron, Adrian Alej...","[Tierras fiscales, Tenencia de la tierra, Chac...",[En el trabajo se analiza la política de tierr...,[2021],"[info:eu-repo/semantics/publishedVersion, info...",[spa],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/pdf]",[Imprenta Corintios],"[Carini, Gabriel Fernando, Poggetti, Rocío Sol...",,
293,"[http://hdl.handle.net/11336/134034, Ames, Mar...",[Los límites de la tolerancia religiosa en la ...,"[Ames, Maria Cecilia]","[RELIGIÓN ROMANA, CULTURA CLÁSICA, CONTROL SOC...",[En el año 186 a.C. fueron duramente reprimido...,[2008-12],"[info:eu-repo/semantics/article, info:ar-repo/...",[spa],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr...","[application/pdf, application/msword, applicat...",[Trotta],,,
