In [1]:
from datetime import date
import ast
import pandas as pd

pd.set_option('display.max_columns', None)
df = catalog.load('raw/openaire/researchproduct')

## Paso 0: Seleccionar columnas con identificador y 'creator'

In [2]:
df_researchproduct = df.loc[:, ['dri:objIdentifier', 'creator']]
df_researchproduct = df_researchproduct.convert_dtypes()

In [3]:
df_researchproduct.head(5)

Unnamed: 0,dri:objIdentifier,creator
0,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,"[{'@rank': '1', '@name': 'Diana', '@surname': ..."
1,doi_________::01bdfeebe25730711be433245a4426e7,"[{'@rank': '1', '@name': 'María S.', '@surname..."
2,doi_dedup___::fccba469c4dbc14c5cec34e69ab21625,"[{'@rank': '1', '#text': 'Nancy Esther Quarant..."
3,doi_dedup___::000d1effb7dd214508cfa2db791e3814,"[{'@rank': '1', '@name': 'Federico', '@surname..."
4,doi_dedup___::387324dfb1abac93013eccfd545780c1,"{'@rank': '1', '#text': 'Clara Tapia'}"


## Paso 1: Asegurarse de que 'creator' sea un diccionario o lista

In [4]:
df_researchproduct['creator'] = df_researchproduct['creator'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df_researchproduct['creator'] = df_researchproduct['creator'].apply(lambda x: [x] if not isinstance(x, list) else x)

In [5]:
df_researchproduct

Unnamed: 0,dri:objIdentifier,creator
0,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,"[{'@rank': '1', '@name': 'Diana', '@surname': ..."
1,doi_________::01bdfeebe25730711be433245a4426e7,"[{'@rank': '1', '@name': 'María S.', '@surname..."
2,doi_dedup___::fccba469c4dbc14c5cec34e69ab21625,"[{'@rank': '1', '#text': 'Nancy Esther Quarant..."
3,doi_dedup___::000d1effb7dd214508cfa2db791e3814,"[{'@rank': '1', '@name': 'Federico', '@surname..."
4,doi_dedup___::387324dfb1abac93013eccfd545780c1,"[{'@rank': '1', '#text': 'Clara Tapia'}]"
...,...,...
3870,doi_dedup___::a901f8fcb7a8e4fd764da3fdbf42c064,"[{'@rank': '1', '@name': 'María Esther Ferná..."
3871,doi_dedup___::7d4824281cdafde3676552da1141877a,"[{'@rank': '1', '@name': 'Rosa María', '@surn..."
3872,doi_dedup___::acf6451bd9b42e6b944f84d81fe47232,"[{'@rank': '1', '@name': 'Jason', '@surname': ..."
3873,doi_dedup___::42f7f1f2a1e586ca7bb4942987b54b71,"[{'@rank': '1', '#text': 'Guillermo Daniel Man..."


## Paso 2: Explode la columna 'creator' y reinicia el índice

In [6]:
df_researchproduct = df_researchproduct.explode('creator').reset_index(drop=True)

In [7]:
df_researchproduct

Unnamed: 0,dri:objIdentifier,creator
0,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,"{'@rank': '1', '@name': 'Diana', '@surname': '..."
1,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,"{'@rank': '2', '@name': 'Angelo', '@surname': ..."
2,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,"{'@rank': '3', '@name': 'Angel Ricardo', '@sur..."
3,doi_________::01bdfeebe25730711be433245a4426e7,"{'@rank': '1', '@name': 'María S.', '@surname'..."
4,doi_________::01bdfeebe25730711be433245a4426e7,"{'@rank': '2', '@name': 'Juan J.', '@surname':..."
...,...,...
18572,doi_dedup___::42f7f1f2a1e586ca7bb4942987b54b71,"{'@rank': '5', '@orcid': '0000-0002-6677-2471'..."
18573,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,"{'@rank': '1', '@surname': 'Ana María Vargas D..."
18574,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,"{'@rank': '2', '@surname': 'Stella Maris Martí..."
18575,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,"{'@rank': '3', '@surname': 'Roxana Mariani', '..."


## Paso 3: Normalizar la columna 'creator' en nuevas columnas

In [8]:
creator_expanded = pd.json_normalize(df_researchproduct["creator"])

In [9]:
creator_expanded

Unnamed: 0,@rank,@name,@surname,#text,@orcid,@orcid_pending
0,1,Diana,Monteoliva,Diana Monteoliva,,
1,2,Angelo,Plastino,Angelo Plastino,0000-0001-5934-2783,
2,3,Angel Ricardo,Plastino,Angel Ricardo Plastino,,
3,1,María S.,Leguizamón‐Aparicio,María S. Leguizamón‐Aparicio,,
4,2,Juan J.,Musci,Juan J. Musci,,
...,...,...,...,...,...,...
18572,5,,,Diego Bautista Genovese,0000-0002-6677-2471,
18573,1,,Ana María Vargas Díaz,Ana María Vargas Díaz,,
18574,2,,Stella Maris Martín,Stella Maris Martín,,
18575,3,,Roxana Mariani,Roxana Mariani,,0000-0002-0066-6692


## Paso 4: Concatenar df_researchproduct con df_creator asegurando que los índices están alineados

In [10]:
df_researchproduct2creator = pd.concat([df_researchproduct, creator_expanded], axis=1)
df_researchproduct2creator.drop(columns='creator', inplace=True)

In [11]:
df_researchproduct2creator

Unnamed: 0,dri:objIdentifier,@rank,@name,@surname,#text,@orcid,@orcid_pending
0,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,1,Diana,Monteoliva,Diana Monteoliva,,
1,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,2,Angelo,Plastino,Angelo Plastino,0000-0001-5934-2783,
2,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,3,Angel Ricardo,Plastino,Angel Ricardo Plastino,,
3,doi_________::01bdfeebe25730711be433245a4426e7,1,María S.,Leguizamón‐Aparicio,María S. Leguizamón‐Aparicio,,
4,doi_________::01bdfeebe25730711be433245a4426e7,2,Juan J.,Musci,Juan J. Musci,,
...,...,...,...,...,...,...,...
18572,doi_dedup___::42f7f1f2a1e586ca7bb4942987b54b71,5,,,Diego Bautista Genovese,0000-0002-6677-2471,
18573,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,1,,Ana María Vargas Díaz,Ana María Vargas Díaz,,
18574,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,2,,Stella Maris Martín,Stella Maris Martín,,
18575,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,3,,Roxana Mariani,Roxana Mariani,,0000-0002-0066-6692


## Paso 5: Agrego load_datetime

In [12]:
df_researchproduct2creator['load_datetime'] = date.today()

In [13]:
df_researchproduct2creator

Unnamed: 0,dri:objIdentifier,@rank,@name,@surname,#text,@orcid,@orcid_pending,load_datetime
0,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,1,Diana,Monteoliva,Diana Monteoliva,,,2024-11-28
1,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,2,Angelo,Plastino,Angelo Plastino,0000-0001-5934-2783,,2024-11-28
2,doi_dedup___::2577d684a839dc78be3e1307914cdfd5,3,Angel Ricardo,Plastino,Angel Ricardo Plastino,,,2024-11-28
3,doi_________::01bdfeebe25730711be433245a4426e7,1,María S.,Leguizamón‐Aparicio,María S. Leguizamón‐Aparicio,,,2024-11-28
4,doi_________::01bdfeebe25730711be433245a4426e7,2,Juan J.,Musci,Juan J. Musci,,,2024-11-28
...,...,...,...,...,...,...,...,...
18572,doi_dedup___::42f7f1f2a1e586ca7bb4942987b54b71,5,,,Diego Bautista Genovese,0000-0002-6677-2471,,2024-11-28
18573,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,1,,Ana María Vargas Díaz,Ana María Vargas Díaz,,,2024-11-28
18574,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,2,,Stella Maris Martín,Stella Maris Martín,,,2024-11-28
18575,doi_dedup___::10fb361856b54e85f1dd2a296b90d1ad,3,,Roxana Mariani,Roxana Mariani,,0000-0002-0066-6692,2024-11-28
