In [1]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time


In [2]:
base_url = catalog.load('params:oai_fetch_options.base_url')
context = catalog.load('params:oai_fetch_options.context')
metadata_format = catalog.load('params:oai_fetch_options.metadata_format')
env = 'dev'
set_id = catalog.load('params:oai_fetch_options.set_id')


In [3]:
def get_oai_records(base_url):
    start_time = time.time()

    response = requests.get(base_url)
    end_time = time.time()
    elapsed_time = end_time - start_time

    # Esperar el doble del tiempo de la solicitud + un delay fijo (ej. 1 segundo)
    delay = max(2 * elapsed_time, 1.0)  # Al menos 1 segundo de espera
    print(f"Sleeping for {delay:.2f} seconds")
    time.sleep(delay)


    if response.status_code == 200:
        return response
    else:
        print(f"Error: {response.status_code}")
        return None


In [4]:
def oai_extract_item_by_set(base_url: str, context: str, set_id: str, metadata_format: str, env: str) -> pd.DataFrame:
    records = []
    resumption_token = 0
    iteration_limit = 2
    iteration_count = 0
   
    while True:
        if env == 'dev' and iteration_count >= iteration_limit:
            break

        params = f'/{context}?verb=ListRecords&resumptionToken={metadata_format}///{set_id}/{resumption_token}'
        url = base_url + params
        
        print(f"Consultando: {url}")
        
        response = get_oai_records(url)

        resumption_token += 100
        iteration_count += 1

        if not response or not response.ok:
            print(f"Error al consultar: {url}")
            break

        xml_content = response.text
        root = ET.fromstring(xml_content)
        ns = {
            'oai': 'http://www.openarchives.org/OAI/2.0/',
            'dc': 'http://purl.org/dc/elements/1.1/'
        }

        record_nodes = root.findall('.//oai:record', ns)


        if not record_nodes:
            print("No se encontraron más registros.")
            break

        for record in record_nodes:
            identifier = record.find('.//oai:identifier', ns)
            item_id = identifier.text if identifier is not None else None
            metadata = record.find('.//oai:metadata', ns)

            if metadata is None:
                continue

            # Valores simples
            title = metadata.find('.//dc:title', ns)
            date = metadata.find('.//dc:date', ns)

            # Multivaluados
            creators = [e.text for e in metadata.findall('.//dc:creator', ns)]
            types = [e.text for e in metadata.findall('.//dc:type', ns)]
            identifiers = [e.text for e in metadata.findall('.//dc:identifier', ns)]
            languages = [e.text for e in metadata.findall('.//dc:language', ns)]
            publishers = [e.text for e in metadata.findall('.//dc:publisher', ns)]
            subjects = [e.text for e in metadata.findall('.//dc:subject', ns)]
            relations = [e.text for e in metadata.findall('.//dc:relation', ns)]
            rights = [e.text for e in metadata.findall('.//dc:rights', ns)]

            records.append({
                'item_id': item_id,
                'col_id': set_id,
                'title': title.text if title is not None else None,
                'date': date.text if date is not None else None,
                'creators': creators,
                'types': types,
                'identifiers': identifiers,
                'languages': languages,
                'subjects': subjects,
                'publishers': publishers,
                'relations': relations,
                'rights': rights
            })

    df = pd.DataFrame(records)

    return df, df.head(100)


In [5]:
df, df_dev = oai_extract_item_by_set(base_url, context, set_id, metadata_format, env)

Consultando: https://ri.conicet.gov.ar/oai/request?verb=ListRecords&resumptionToken=oai_dc///col_11336_109892/0
Sleeping for 1.00 seconds
Consultando: https://ri.conicet.gov.ar/oai/request?verb=ListRecords&resumptionToken=oai_dc///col_11336_109892/100
Sleeping for 1.00 seconds


In [6]:
df

Unnamed: 0,item_id,col_id,title,date,creators,types,identifiers,languages,subjects,publishers,relations,rights
0,oai:ri.conicet.gov.ar:11336/181613,col_11336_109892,Hydrophilization of magnetic nanoparticles wit...,2021-12,"[Lavorato, Gabriel Carlos, Azcárate, Julio Cés...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/181613, Lavorato,...",[eng],"[FE3O4 NANOPARTICLES, INTERPARTICLE MAGNETIC I...",[Elsevier Science],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/restrictedAccess, http..."
1,oai:ri.conicet.gov.ar:11336/242335,col_11336_109892,Structure of Zn x Fe3− x O4 nanoparticles stud...,2024-07,"[Lohr, Javier Hernán, Tobia, Dina, Torres, T. ...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/242335, Lohr, Jav...",[eng],"[Zn ferrita, Hyperthermia, Magnetism, Neutron ...",[American Institute of Physics],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
2,oai:ri.conicet.gov.ar:11336/255685,col_11336_109892,Programa de Acompañamiento del Sueño en la Inf...,2024-02,"[Leive, Lorena, Melfi, Daniela, Lipovetzky, Jo...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/255685, Leive, Lo...",[eng],"[Sleep, Neurodevelopmental disorders, Autism, ...",[Sociedad Argentina de Pediatría],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
3,oai:ri.conicet.gov.ar:11336/224725,col_11336_109892,Flexible NbTiN thin films for superconducting ...,2023-03,"[Rezinovsky Nieto, S. J., Hofer, Juan Andres, ...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/224725, Rezinovsk...",[eng],"[Flexible substrates, Reactive sputtering, Sup...",[Elsevier Science],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/restrictedAccess, http..."
4,oai:ri.conicet.gov.ar:11336/123737,col_11336_109892,Self-calibrated double luminescent thermometer...,2019-04,"[Brites, Carlos D.S., Martínez, Eduardo David,...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/123737, Brites, C...",[eng],"[DOUBLE THERMOMETERS, LUMINESCENCE, POLYMER NA...",[Frontiers Media S.A.],[info:eu-repo/semantics/altIdentifier/doi/10.3...,"[info:eu-repo/semantics/openAccess, https://cr..."
...,...,...,...,...,...,...,...,...,...,...,...,...
191,oai:ri.conicet.gov.ar:11336/203654,col_11336_109892,Comparison of the (photo)catalytic efficiency ...,2022-03,"[Donadelli, Jorge Andrés, Rivas Aiello, Maria ...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/203654, Donadelli...",[eng],"[Silver iron oxide nanocomposites, Laser ablat...",[Springer],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/restrictedAccess, http..."
192,oai:ri.conicet.gov.ar:11336/146772,col_11336_109892,Modeling the Magnetic-Hyperthermia Response of...,2020-07,"[Valdés, Daniela Paola, Lima, Enio Junior, Zys...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/146772, Valdés, D...",[eng],"[MAGNETIC NANOPARTICLES, MAGNETIC FLUID HYPERT...",[American Physical Society],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
193,oai:ri.conicet.gov.ar:11336/215948,col_11336_109892,Giant optomechanical coupling and dephasing pr...,2022-12,"[Sesin, Pablo Ezequiel, Kuznetsov, A. S., Roza...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/215948, Sesin, Pa...",[eng],"[Optomecánica en Cavidades, https://purl.org/b...",[Cornell University],[info:eu-repo/semantics/altIdentifier/doi/10.4...,"[info:eu-repo/semantics/openAccess, https://cr..."
194,oai:ri.conicet.gov.ar:11336/234148,col_11336_109892,Chemical compatibility of solid oxide fuel cel...,2024-04,"[Tagarelli, V. E., Vega Castillo, Jesus Eduard...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/234148, Tagarelli...",[eng],"[CHEMICAL COMPATIBILITY, EIS, RUDDLESDEN POPPE...",[Wiley VCH Verlag],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/restrictedAccess, http..."


In [7]:
df_dev

Unnamed: 0,item_id,col_id,title,date,creators,types,identifiers,languages,subjects,publishers,relations,rights
0,oai:ri.conicet.gov.ar:11336/181613,col_11336_109892,Hydrophilization of magnetic nanoparticles wit...,2021-12,"[Lavorato, Gabriel Carlos, Azcárate, Julio Cés...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/181613, Lavorato,...",[eng],"[FE3O4 NANOPARTICLES, INTERPARTICLE MAGNETIC I...",[Elsevier Science],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/restrictedAccess, http..."
1,oai:ri.conicet.gov.ar:11336/242335,col_11336_109892,Structure of Zn x Fe3− x O4 nanoparticles stud...,2024-07,"[Lohr, Javier Hernán, Tobia, Dina, Torres, T. ...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/242335, Lohr, Jav...",[eng],"[Zn ferrita, Hyperthermia, Magnetism, Neutron ...",[American Institute of Physics],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
2,oai:ri.conicet.gov.ar:11336/255685,col_11336_109892,Programa de Acompañamiento del Sueño en la Inf...,2024-02,"[Leive, Lorena, Melfi, Daniela, Lipovetzky, Jo...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/255685, Leive, Lo...",[eng],"[Sleep, Neurodevelopmental disorders, Autism, ...",[Sociedad Argentina de Pediatría],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
3,oai:ri.conicet.gov.ar:11336/224725,col_11336_109892,Flexible NbTiN thin films for superconducting ...,2023-03,"[Rezinovsky Nieto, S. J., Hofer, Juan Andres, ...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/224725, Rezinovsk...",[eng],"[Flexible substrates, Reactive sputtering, Sup...",[Elsevier Science],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/restrictedAccess, http..."
4,oai:ri.conicet.gov.ar:11336/123737,col_11336_109892,Self-calibrated double luminescent thermometer...,2019-04,"[Brites, Carlos D.S., Martínez, Eduardo David,...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/123737, Brites, C...",[eng],"[DOUBLE THERMOMETERS, LUMINESCENCE, POLYMER NA...",[Frontiers Media S.A.],[info:eu-repo/semantics/altIdentifier/doi/10.3...,"[info:eu-repo/semantics/openAccess, https://cr..."
...,...,...,...,...,...,...,...,...,...,...,...,...
95,oai:ri.conicet.gov.ar:11336/228516,col_11336_109892,Annealing effects on photoresist films' mechan...,2023-10,"[Avellaneda, Manuel, Boasso, Andrés, Sirena, M...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/228516, Avellaned...",[eng],"[ATOMIC FORCE MICROSCOPY, DEPTH-SENSING NANOIN...",[Elsevier],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/embargoedAccess, https..."
96,oai:ri.conicet.gov.ar:11336/218757,col_11336_109892,A kinetic study of La0.75Sr0.25Cr0.5Mn0.5O3-δ ...,2023-10,"[Montenegro Hernández, Alejandra, Chanquia, Co...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/218757, Montenegr...",[eng],"[CHROMITE, EIS, MANGANITE, NANO-STRUCTURED, S-...",[Elsevier],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
97,oai:ri.conicet.gov.ar:11336/126868,col_11336_109892,Tuning LiBH4 for hydrogen storage: Destabiliza...,2020-01,"[Puszkiel, Julián Atilio, Gasnier, Aurelien, A...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/126868, Puszkiel,...",[eng],"[ADDITIVE, BOROHYDRIDES, DESTABILIZATION, HYDR...",[Multidisciplinary Digital Publishing Institute],[info:eu-repo/semantics/altIdentifier/url/http...,"[info:eu-repo/semantics/openAccess, https://cr..."
98,oai:ri.conicet.gov.ar:11336/123909,col_11336_109892,Running condition and material response fretti...,2019-07,"[Soria, Sergio Raul, Tolley, Alfredo Juan, Yaw...","[info:eu-repo/semantics/article, info:ar-repo/...","[http://hdl.handle.net/11336/123909, Soria, Se...",[eng],"[AISI 304L, FRETTING MAPS, INCOLOY 800, STEAM ...",[Elsevier],[info:eu-repo/semantics/altIdentifier/doi/10.1...,"[info:eu-repo/semantics/restrictedAccess, http..."
