In [1]:
import pandas as pd
from datetime import date


In [5]:
df_item_raw = catalog.load('raw/oai/item_by_set#parquet')


In [6]:
df_item_raw

In [4]:
# Helper to safely get nth element from lists or tuples

def get_nth(x, n):
    try:
        if x is not None and len(x) > n:
            return x[n]
    except Exception:
        pass
    return None

# Helper to explode a column into its own dataframe

def explode_column(df_raw, col, new_col_name):
    df = df_raw[['handle', col]] \
        .dropna(subset=[col]) \
        .explode(col) 
    df = df.rename(columns={col: new_col_name})
    df['load_datetime'] = date.today()
    return df

In [None]:

def land_oai_item_by_set(df_item_raw):
    # Compute handle once
    df = df_item_raw.copy()
    df['handle'] = df_item_raw['identifiers'].apply(lambda x: x[0] if x is not None and len(x) > 0 else None)

    # Main item table
    df_item = pd.DataFrame({
        'item_id': df['item_id'],
        'handle':      df['handle'],
        'col_id':      df['col_id'],
        'title':       df['title'],
        'date_issued': df['date'],
        'type_openaire':    df['types'].apply(lambda x: get_nth(x, 0)),
        'type_snrd':        df['types'].apply(lambda x: get_nth(x, 1)),
        'version':          df['types'].apply(lambda x: get_nth(x, 2)),
        'access_right':     df['rights'].apply(lambda x: get_nth(x, 0)),
        'license_condition':df['rights'].apply(lambda x: get_nth(x, 1)),
    })
    df_item['load_datetime'] = date.today()

    # Explode multivalued fields
    df_item_creator   = explode_column(df, 'creators',  'creator')
    df_item_language  = explode_column(df, 'languages', 'language_iso')
    df_item_subject   = explode_column(df, 'subjects',  'subject')
    df_item_publisher = explode_column(df, 'publishers','publisher')
    df_item_relation  = explode_column(df, 'relations','relation')
    
    # Clean up relation URLs
    df_item_relation['relation'] = df_item_relation['relation'] \
        .str.replace('info:eu-repo/semantics/altIdentifier/url/', '', regex=False)

    return (
        df_item,
        df_item_creator,
        df_item_language,
        df_item_subject,
        df_item_relation,
        df_item_publisher
    )


In [6]:
df_item, df_item_creator, df_item_language, df_item_subject, df_item_relation, df_item_publisher = land_oai_item_by_set(df_item_raw)


In [7]:
df_item

Unnamed: 0,item_oai_id,handle,col_id,title,date_issued,type_openaire,type_snrd,version,access_right,license_condition,load_datetime
0,oai:ri.conicet.gov.ar:11336/181613,http://hdl.handle.net/11336/181613,col_11336_109892,Hydrophilization of magnetic nanoparticles wit...,2021-12,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/restrictedAccess,https://creativecommons.org/licenses/by-nc-nd/...,2025-05-30
1,oai:ri.conicet.gov.ar:11336/242335,http://hdl.handle.net/11336/242335,col_11336_109892,Structure of Zn x Fe3− x O4 nanoparticles stud...,2024-07,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/openAccess,https://creativecommons.org/licenses/by-nc-sa/...,2025-05-30
2,oai:ri.conicet.gov.ar:11336/255685,http://hdl.handle.net/11336/255685,col_11336_109892,Programa de Acompañamiento del Sueño en la Inf...,2024-02,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/openAccess,https://creativecommons.org/licenses/by-nc-nd/...,2025-05-30
3,oai:ri.conicet.gov.ar:11336/224725,http://hdl.handle.net/11336/224725,col_11336_109892,Flexible NbTiN thin films for superconducting ...,2023-03,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/restrictedAccess,https://creativecommons.org/licenses/by-nc-nd/...,2025-05-30
4,oai:ri.conicet.gov.ar:11336/123737,http://hdl.handle.net/11336/123737,col_11336_109892,Self-calibrated double luminescent thermometer...,2019-04,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/openAccess,https://creativecommons.org/licenses/by/2.5/ar/,2025-05-30
...,...,...,...,...,...,...,...,...,...,...,...
350,oai:ri.conicet.gov.ar:11336/216872,http://hdl.handle.net/11336/216872,col_11336_109892,Low-temperature thermal expansion of the topol...,2022-06,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/restrictedAccess,https://creativecommons.org/licenses/by-nc-sa/...,2025-05-30
351,oai:ri.conicet.gov.ar:11336/216757,http://hdl.handle.net/11336/216757,col_11336_109892,Response of the chiral soliton lattice to spin...,2022-09,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/openAccess,https://creativecommons.org/licenses/by-nc-sa/...,2025-05-30
352,oai:ri.conicet.gov.ar:11336/123896,http://hdl.handle.net/11336/123896,col_11336_109892,Study of BaCe0.4Zr0.4Y0.2O3-δ/BaCe0.8Pr0.2O3-δ...,2020-02,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/restrictedAccess,https://creativecommons.org/licenses/by-nc-sa/...,2025-05-30
353,oai:ri.conicet.gov.ar:11336/181755,http://hdl.handle.net/11336/181755,col_11336_109892,Iron phthalocyanine on Au(111) is a “non-Landa...,2021-10,info:eu-repo/semantics/article,info:ar-repo/semantics/artículo,info:eu-repo/semantics/publishedVersion,info:eu-repo/semantics/openAccess,https://creativecommons.org/licenses/by/2.5/ar/,2025-05-30


In [8]:
df_item_creator

Unnamed: 0,handle,creator,load_datetime
0,http://hdl.handle.net/11336/181613,"Lavorato, Gabriel Carlos",2025-05-30
0,http://hdl.handle.net/11336/181613,"Azcárate, Julio César",2025-05-30
0,http://hdl.handle.net/11336/181613,"Rivas Aiello, Maria Belen",2025-05-30
0,http://hdl.handle.net/11336/181613,"Orozco Henao, Juan Manuel",2025-05-30
0,http://hdl.handle.net/11336/181613,"Mendoza Zélis, Pedro",2025-05-30
...,...,...,...
354,http://hdl.handle.net/11336/216310,"Sánchez, F. A.",2025-05-30
354,http://hdl.handle.net/11336/216310,"Sofo Haro, Miguel Francisco",2025-05-30
354,http://hdl.handle.net/11336/216310,"Longhino, Juan Manuel",2025-05-30
354,http://hdl.handle.net/11336/216310,"Gomez Berisso, Mariano",2025-05-30


In [9]:
df_item_language

Unnamed: 0,handle,language_iso,load_datetime
0,http://hdl.handle.net/11336/181613,eng,2025-05-30
1,http://hdl.handle.net/11336/242335,eng,2025-05-30
2,http://hdl.handle.net/11336/255685,eng,2025-05-30
3,http://hdl.handle.net/11336/224725,eng,2025-05-30
4,http://hdl.handle.net/11336/123737,eng,2025-05-30
...,...,...,...
350,http://hdl.handle.net/11336/216872,eng,2025-05-30
351,http://hdl.handle.net/11336/216757,eng,2025-05-30
352,http://hdl.handle.net/11336/123896,eng,2025-05-30
353,http://hdl.handle.net/11336/181755,eng,2025-05-30


In [10]:
df_item_subject

Unnamed: 0,handle,subject,load_datetime
0,http://hdl.handle.net/11336/181613,FE3O4 NANOPARTICLES,2025-05-30
0,http://hdl.handle.net/11336/181613,INTERPARTICLE MAGNETIC INTERACTIONS,2025-05-30
0,http://hdl.handle.net/11336/181613,OLEATE CAPPING DENSITY,2025-05-30
0,http://hdl.handle.net/11336/181613,POLY(MALEIC ANHYDRIDE ALT-1-OCTADECENE),2025-05-30
0,http://hdl.handle.net/11336/181613,POLYMER-NANOPARTICLE ASSEMBLIES,2025-05-30
...,...,...,...
353,http://hdl.handle.net/11336/181755,https://purl.org/becyt/ford/1,2025-05-30
354,http://hdl.handle.net/11336/216310,INSPECTION WITH NEUTRONS,2025-05-30
354,http://hdl.handle.net/11336/216310,NEUTRON RADIOGRAPHY,2025-05-30
354,http://hdl.handle.net/11336/216310,https://purl.org/becyt/ford/1.3,2025-05-30


In [11]:
df_item_relation

Unnamed: 0,handle,relation,load_datetime
0,http://hdl.handle.net/11336/181613,https://www.sciencedirect.com/science/article/...,2025-05-30
0,http://hdl.handle.net/11336/181613,info:eu-repo/semantics/altIdentifier/doi/10.10...,2025-05-30
1,http://hdl.handle.net/11336/242335,https://pubs.aip.org/jap/article/136/4/043905/...,2025-05-30
1,http://hdl.handle.net/11336/242335,info:eu-repo/semantics/altIdentifier/doi/10.10...,2025-05-30
2,http://hdl.handle.net/11336/255685,https://www.sap.org.ar/docs/publicaciones/arch...,2025-05-30
...,...,...,...
352,http://hdl.handle.net/11336/123896,info:eu-repo/semantics/altIdentifier/doi/10.10...,2025-05-30
353,http://hdl.handle.net/11336/181755,https://www.nature.com/articles/s41467-021-263...,2025-05-30
353,http://hdl.handle.net/11336/181755,info:eu-repo/semantics/altIdentifier/doi/10.10...,2025-05-30
354,http://hdl.handle.net/11336/216310,https://doi.org/10.1088/1748-0221/17/02/P02004,2025-05-30


In [12]:
df_item_publisher

Unnamed: 0,handle,publisher,load_datetime
0,http://hdl.handle.net/11336/181613,Elsevier Science,2025-05-30
1,http://hdl.handle.net/11336/242335,American Institute of Physics,2025-05-30
2,http://hdl.handle.net/11336/255685,Sociedad Argentina de Pediatría,2025-05-30
3,http://hdl.handle.net/11336/224725,Elsevier Science,2025-05-30
4,http://hdl.handle.net/11336/123737,Frontiers Media S.A.,2025-05-30
...,...,...,...
350,http://hdl.handle.net/11336/216872,Elsevier Science,2025-05-30
351,http://hdl.handle.net/11336/216757,American Physical Society,2025-05-30
352,http://hdl.handle.net/11336/123896,Pergamon-Elsevier Science Ltd,2025-05-30
353,http://hdl.handle.net/11336/181755,Springer,2025-05-30
