In [41]:
pip install pyxlsb



In [42]:
import requests
import pandas as pd
from io import BytesIO

*Leemos los datasets*

In [43]:
url = "https://github.com/Convergent-Sequence/Korea/raw/main/Data/OECD%20(version%201).xlsb"
response = requests.get(url)

if response.status_code == 200:
    excel_data = BytesIO(response.content)
    df_oecd1 = pd.read_excel(excel_data, engine='pyxlsb', sheet_name="OECD.DCD",index_col='Recipient')
    df_oecd2 = pd.read_excel(excel_data, engine='pyxlsb', sheet_name="OECD",index_col=0)
    df_koica = pd.read_excel(excel_data, engine='pyxlsb', sheet_name="KOICA",index_col=0)
    df_war = pd.read_excel(excel_data, engine='pyxlsb', sheet_name="Korean war",index_col=0)
    df_tlc = pd.read_excel(excel_data, engine='pyxlsb', sheet_name="TLC",index_col=0)
    df_gdp = pd.read_excel(excel_data, engine='pyxlsb', sheet_name="GDP",index_col=0)
else:
    print("Error al descargar el archivo")

### oecd1

In [44]:
df_oecd1

Unnamed: 0_level_0,Donor,SECTOR,Sector,TIME_PERIOD,Time period,OBS_VALUE,BASE_PER
Recipient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Croatia,Korea,998,Unallocated / unspecified,2002,2002,2.612561e+07,2022
Croatia,Korea,1000,All sectors,2002,2002,2.612561e+07,2022
Kenya,Korea,998,Unallocated / unspecified,2002,2002,3.217740e-01,2022
Kenya,Korea,1000,All sectors,2002,2002,3.217740e-01,2022
Nicaragua,Korea,998,Unallocated / unspecified,2002,2002,2.340877e+06,2022
...,...,...,...,...,...,...,...
Lao Peopleâ€™s Democratic Republic,Korea,160,Other social infrastructure and services,2023,2023,1.510723e+06,2022
Lao Peopleâ€™s Democratic Republic,Korea,200,Economic infrastructure and services,2023,2023,6.482256e+06,2022
Lao Peopleâ€™s Democratic Republic,Korea,210,Transport and storage,2023,2023,2.812419e+06,2022
Lao Peopleâ€™s Democratic Republic,Korea,220,Communications,2023,2023,2.110840e+05,2022


In [45]:
df_oecd1.drop(columns=['Donor','SECTOR','TIME_PERIOD','Time period','BASE_PER'], axis=1, inplace=True)
df_oecd1

Unnamed: 0_level_0,Sector,OBS_VALUE
Recipient,Unnamed: 1_level_1,Unnamed: 2_level_1
Croatia,Unallocated / unspecified,2.612561e+07
Croatia,All sectors,2.612561e+07
Kenya,Unallocated / unspecified,3.217740e-01
Kenya,All sectors,3.217740e-01
Nicaragua,Unallocated / unspecified,2.340877e+06
...,...,...
Lao Peopleâ€™s Democratic Republic,Other social infrastructure and services,1.510723e+06
Lao Peopleâ€™s Democratic Republic,Economic infrastructure and services,6.482256e+06
Lao Peopleâ€™s Democratic Republic,Transport and storage,2.812419e+06
Lao Peopleâ€™s Democratic Republic,Communications,2.110840e+05


In [46]:
df_oecd1['Sector'].unique()

array(['Unallocated / unspecified', 'All sectors',
       'Social infrastructure and services', 'Health',
       'Water supply & sanitation', 'Government and civil society',
       'Economic infrastructure and services', 'Communications',
       'Production sectors', 'Agriculture, forestry, fishing',
       'Industry, mining, construction', 'Trade policies and regulations',
       'Humanitarian aid', 'Education', 'Transport and storage',
       'Multi-sector / Cross-cutting', 'General environment protection',
       'Energy', 'Other social infrastructure and services',
       'Other multisector', 'Banking and financial services',
       'Business and other services', 'Tourism',
       'Commodity aid / General programme assistance',
       'Population policies/Programmes & reproductive health',
       'Action relating to debt', 'Administrative costs of donors'],
      dtype=object)

In [47]:
df_oecd1 = df_oecd1[df_oecd1['Sector'].isin(['Social infrastructure and services', 'Economic infrastructure and services',
                                             'Production sectors','Multi-sector / Cross-cutting'])]
df_oecd1

Unnamed: 0_level_0,Sector,OBS_VALUE
Recipient,Unnamed: 1_level_1,Unnamed: 2_level_1
Nicaragua,Social infrastructure and services,5.461726e+06
Nicaragua,Economic infrastructure and services,7.339000e-03
Nicaragua,Production sectors,1.739800e-02
Panama,Social infrastructure and services,2.926160e-01
Panama,Economic infrastructure and services,2.298600e-02
...,...,...
Indonesia,Economic infrastructure and services,1.335312e+07
Indonesia,Production sectors,1.633287e+07
Indonesia,Multi-sector / Cross-cutting,5.379229e+06
Lao Peopleâ€™s Democratic Republic,Social infrastructure and services,3.997658e+07


In [48]:
# Paso 1: Crear columna binaria de presencia (sin perder OBS_VALUE)
df_oecd1['presente'] = 1

# Paso 2: Resetear índice para usar 'Recipient' como columna
df_oecd1.reset_index(inplace=True)

# Paso 3: Crear variables binarias por sector
df_bin = df_oecd1.pivot_table(
    index='Recipient',
    columns='Sector',
    values='presente',
    aggfunc='max',
    fill_value=0
)

# Paso 4: Calcular promedio de OBS_VALUE por país
df_prom = df_oecd1.groupby('Recipient')['OBS_VALUE'].mean().to_frame(name='OBS_VALUE_mean')

# Paso 5: Juntar binarios + promedio
df_oecd1 = df_prom.join(df_bin)

# (Opcional) Reordenar columnas
cols = ['OBS_VALUE_mean'] + [col for col in df_oecd1.columns if col != 'OBS_VALUE_mean']
df_oecd1 = df_oecd1[cols]

# Paso 6: Limpiar el nombre del eje de columnas (por estética)
df_oecd1.columns.name = None

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_oecd1['presente'] = 1


In [49]:
df_oecd1.rename(columns={'OBS_VALUE_mean': 'Obs_mean'}, inplace=True)
df_oecd1

Unnamed: 0_level_0,Obs_mean,Economic infrastructure and services,Multi-sector / Cross-cutting,Production sectors,Social infrastructure and services
Recipient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,7.872309e+06,1,1,1,1
Albania,9.881099e+05,1,1,1,1
Algeria,4.560489e+05,1,1,1,1
Angola,3.885601e+06,1,1,1,1
Antigua and Barbuda,8.200050e-02,1,1,0,1
...,...,...,...,...,...
Viet Nam,2.855539e+07,1,1,1,1
Western Africa unspecified,5.621264e+05,0,0,1,1
Yemen,4.454313e+05,1,1,1,1
Zambia,6.462298e+04,1,1,1,1


####oecd2

In [50]:
df_oecd2

Unnamed: 0,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,...,2016,2017,2018,2019,2020,2021,2022,2023,2024,Unnamed: 24
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Albania,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Algeria,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Angola,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Antigua and Barbuda,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Viet Nam,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Western Africa unspecified,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Yemen,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N
Zambia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,N


In [51]:
years = [i for i in range(2002,2025)]
df_oecd2.drop(columns=years, axis=1, inplace=True)
df_oecd2

Unnamed: 0,Unnamed: 24
Afghanistan,N
Albania,N
Algeria,N
Angola,N
Antigua and Barbuda,N
...,...
Viet Nam,N
Western Africa unspecified,N
Yemen,N
Zambia,N


In [52]:
df_oecd2['Unnamed: 24'] = df_oecd2['Unnamed: 24'].map({'Y': 1, 'N': 0})
df_oecd2.rename(columns={'Unnamed: 24': 'OECD'}, inplace=True)
df_oecd2

Unnamed: 0,OECD
Afghanistan,0
Albania,0
Algeria,0
Angola,0
Antigua and Barbuda,0
...,...
Viet Nam,0
Western Africa unspecified,0
Yemen,0
Zambia,0


###Koica

In [53]:
df_koica

Unnamed: 0,KOICA
Afghanistan,0
Albania,0
Algeria,0
Angola,0
Antigua and Barbuda,0
...,...
Viet Nam,1
Western Africa unspecified,0
Yemen,0
Zambia,0


###War

In [54]:
df_war

Unnamed: 0,Guerra de Corea
Afghanistan,0
Albania,0
Algeria,0
Angola,0
Antigua and Barbuda,0
...,...
Viet Nam,0
Western Africa unspecified,0
Yemen,0
Zambia,0


###gpd

In [55]:
df_gdp

Unnamed: 0_level_0,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,Unnamed: 23
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,3825701438.999633,4520946818.545814,5224896718.67782,6203256538.709669,6971758282.293506,9747886187.393929,1.010930e+10,1.241615e+10,1.585667e+10,1.780510e+10,...,19134221644.732494,18116572395.077213,18753456497.815865,18053222687.412624,18799444490.112782,19955929052.149597,14259995441.075907,14497243872.133739,17233051620.111732,..
Albania,4348068242.195123,5611496257.142313,7184685781.518761,8052077248.146379,8896075004.635162,10677324852.882233,1.288135e+10,1.204421e+10,1.192693e+10,1.289076e+10,...,11386853113.0189,11861199830.83956,13019726211.736889,15379509891.719603,15585105131.064415,15241458744.868845,18032010563.755798,19017242585.780338,23547179830.441257,..
Algeria,61516103406.168777,73482264190.924545,91913680985.170822,107046618669.707001,123084258693.009735,142482739809.849274,1.803838e+11,1.503173e+11,1.777851e+11,2.183319e+11,...,187493855609.344635,180763839522.150818,189880896903.073303,194554483655.52774,193459662090.676819,164873415325.201477,186231205262.082367,225638456572.142761,247626161016.414459,..
Angola,15285592370.373981,17812704586.43969,23552057679.499435,36970900883.810905,52381025141.426811,65266415494.258392,8.853867e+10,7.030720e+10,8.379947e+10,1.117897e+11,...,90496420506.595657,52761617225.925285,73690154990.731232,79450688259.366364,70897962732.02774,48501561203.568634,66505129987.723549,104399746853.401413,84824654481.72493,..
Antigua and Barbuda,897988888.888889,947955555.555555,1026181481.481481,1143714814.814815,1303548148.148148,1487300000,1.557541e+09,1.386444e+09,1.298256e+09,1.281337e+09,...,1437755555.555555,1489692592.592592,1531151851.851851,1661529629.62963,1725351851.851852,1410796296.296296,1601366666.666667,1867733333.333333,2033085185.185185,..
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Viet Nam,35064105500.83445,39552513231.916878,45427854693.255432,57633255738.199158,66371664817.043625,77414425532.245163,9.913030e+10,1.060147e+11,1.472012e+11,1.725950e+11,...,239258328381.741394,257096001177.981842,281353605986.903442,310106478394.658875,334365270496.667053,346615738537.796326,366474752771.009338,410324028883.325256,429716969043.571716,..
"Venezuela, RB",92893587733.654922,83620628582.108154,112451400424.963898,145513489651.872223,183477522123.893829,230364012575.687012,3.159534e+11,3.297876e+11,3.931924e+11,3.164822e+11,...,..,..,..,..,..,..,..,..,..,..
Zambia,4193850445.426323,4901869764.059568,6221110219.455416,8331870169.149771,12756858899.281174,14056957976.264833,1.791086e+10,1.532834e+10,2.026556e+10,2.345952e+10,...,21251216798.776245,20958412538.309345,25873601260.835304,26311507273.673538,23308667781.225754,18137764930.65097,22096416932.008896,29163782140.485832,27577956471.243988,..
"Yemen, Rep.",10693430511.466354,11777532662.049891,13867634371.477274,16731566717.188839,19063143369.857979,21650528674.093262,2.691086e+10,2.513028e+10,3.090675e+10,3.272642e+10,...,42444489522.230309,31317824943.207253,26842228828.562885,21606160783.984451,..,..,..,..,..,..


In [56]:
df_gdp = df_gdp.replace(',', '', regex=True).apply(pd.to_numeric, errors='coerce')
df_gdp

Unnamed: 0_level_0,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,Unnamed: 23
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,3.825701e+09,4.520947e+09,5.224897e+09,6.203257e+09,6.971758e+09,9.747886e+09,1.010930e+10,1.241615e+10,1.585667e+10,1.780510e+10,...,1.913422e+10,1.811657e+10,1.875346e+10,1.805322e+10,1.879944e+10,1.995593e+10,1.426000e+10,1.449724e+10,1.723305e+10,
Albania,4.348068e+09,5.611496e+09,7.184686e+09,8.052077e+09,8.896075e+09,1.067732e+10,1.288135e+10,1.204421e+10,1.192693e+10,1.289076e+10,...,1.138685e+10,1.186120e+10,1.301973e+10,1.537951e+10,1.558511e+10,1.524146e+10,1.803201e+10,1.901724e+10,2.354718e+10,
Algeria,6.151610e+10,7.348226e+10,9.191368e+10,1.070466e+11,1.230843e+11,1.424827e+11,1.803838e+11,1.503173e+11,1.777851e+11,2.183319e+11,...,1.874939e+11,1.807638e+11,1.898809e+11,1.945545e+11,1.934597e+11,1.648734e+11,1.862312e+11,2.256385e+11,2.476262e+11,
Angola,1.528559e+10,1.781270e+10,2.355206e+10,3.697090e+10,5.238103e+10,6.526642e+10,8.853867e+10,7.030720e+10,8.379947e+10,1.117897e+11,...,9.049642e+10,5.276162e+10,7.369015e+10,7.945069e+10,7.089796e+10,4.850156e+10,6.650513e+10,1.043997e+11,8.482465e+10,
Antigua and Barbuda,8.979889e+08,9.479556e+08,1.026181e+09,1.143715e+09,1.303548e+09,1.487300e+09,1.557541e+09,1.386444e+09,1.298256e+09,1.281337e+09,...,1.437756e+09,1.489693e+09,1.531152e+09,1.661530e+09,1.725352e+09,1.410796e+09,1.601367e+09,1.867733e+09,2.033085e+09,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Viet Nam,3.506411e+10,3.955251e+10,4.542785e+10,5.763326e+10,6.637166e+10,7.741443e+10,9.913030e+10,1.060147e+11,1.472012e+11,1.725950e+11,...,2.392583e+11,2.570960e+11,2.813536e+11,3.101065e+11,3.343653e+11,3.466157e+11,3.664748e+11,4.103240e+11,4.297170e+11,
"Venezuela, RB",9.289359e+10,8.362063e+10,1.124514e+11,1.455135e+11,1.834775e+11,2.303640e+11,3.159534e+11,3.297876e+11,3.931924e+11,3.164822e+11,...,,,,,,,,,,
Zambia,4.193850e+09,4.901870e+09,6.221110e+09,8.331870e+09,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026556e+10,2.345952e+10,...,2.125122e+10,2.095841e+10,2.587360e+10,2.631151e+10,2.330867e+10,1.813776e+10,2.209642e+10,2.916378e+10,2.757796e+10,
"Yemen, Rep.",1.069343e+10,1.177753e+10,1.386763e+10,1.673157e+10,1.906314e+10,2.165053e+10,2.691086e+10,2.513028e+10,3.090675e+10,3.272642e+10,...,4.244449e+10,3.131782e+10,2.684223e+10,2.160616e+10,,,,,,


In [57]:
df_gpdy = df_gdp.copy()
df_gpdy = df_gpdy.T.fillna(df_gpdy.mean(axis=1)).T
df_gpdy.drop(columns=['Unnamed: 23'], axis=1, inplace=True)
df_gpdy.columns = [f"gdp_{col}" for col in df_gpdy.columns]
df_gpdy

Unnamed: 0_level_0,gdp_2002,gdp_2003,gdp_2004,gdp_2005,gdp_2006,gdp_2007,gdp_2008,gdp_2009,gdp_2010,gdp_2011,...,gdp_2014,gdp_2015,gdp_2016,gdp_2017,gdp_2018,gdp_2019,gdp_2020,gdp_2021,gdp_2022,gdp_2023
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,3.825701e+09,4.520947e+09,5.224897e+09,6.203257e+09,6.971758e+09,9.747886e+09,1.010930e+10,1.241615e+10,1.585667e+10,1.780510e+10,...,2.049713e+10,1.913422e+10,1.811657e+10,1.875346e+10,1.805322e+10,1.879944e+10,1.995593e+10,1.426000e+10,1.449724e+10,1.723305e+10
Albania,4.348068e+09,5.611496e+09,7.184686e+09,8.052077e+09,8.896075e+09,1.067732e+10,1.288135e+10,1.204421e+10,1.192693e+10,1.289076e+10,...,1.322815e+10,1.138685e+10,1.186120e+10,1.301973e+10,1.537951e+10,1.558511e+10,1.524146e+10,1.803201e+10,1.901724e+10,2.354718e+10
Algeria,6.151610e+10,7.348226e+10,9.191368e+10,1.070466e+11,1.230843e+11,1.424827e+11,1.803838e+11,1.503173e+11,1.777851e+11,2.183319e+11,...,2.389427e+11,1.874939e+11,1.807638e+11,1.898809e+11,1.945545e+11,1.934597e+11,1.648734e+11,1.862312e+11,2.256385e+11,2.476262e+11
Angola,1.528559e+10,1.781270e+10,2.355206e+10,3.697090e+10,5.238103e+10,6.526642e+10,8.853867e+10,7.030720e+10,8.379947e+10,1.117897e+11,...,1.359668e+11,9.049642e+10,5.276162e+10,7.369015e+10,7.945069e+10,7.089796e+10,4.850156e+10,6.650513e+10,1.043997e+11,8.482465e+10
Antigua and Barbuda,8.979889e+08,9.479556e+08,1.026181e+09,1.143715e+09,1.303548e+09,1.487300e+09,1.557541e+09,1.386444e+09,1.298256e+09,1.281337e+09,...,1.378830e+09,1.437756e+09,1.489693e+09,1.531152e+09,1.661530e+09,1.725352e+09,1.410796e+09,1.601367e+09,1.867733e+09,2.033085e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Viet Nam,3.506411e+10,3.955251e+10,4.542785e+10,5.763326e+10,6.637166e+10,7.741443e+10,9.913030e+10,1.060147e+11,1.472012e+11,1.725950e+11,...,2.334515e+11,2.392583e+11,2.570960e+11,2.813536e+11,3.101065e+11,3.343653e+11,3.466157e+11,3.664748e+11,4.103240e+11,4.297170e+11
"Venezuela, RB",9.289359e+10,8.362063e+10,1.124514e+11,1.455135e+11,1.834775e+11,2.303640e+11,3.159534e+11,3.297876e+11,3.931924e+11,3.164822e+11,...,4.823593e+11,2.644913e+11,2.644913e+11,2.644913e+11,2.644913e+11,2.644913e+11,2.644913e+11,2.644913e+11,2.644913e+11,2.644913e+11
Zambia,4.193850e+09,4.901870e+09,6.221110e+09,8.331870e+09,1.275686e+10,1.405696e+10,1.791086e+10,1.532834e+10,2.026556e+10,2.345952e+10,...,2.714102e+10,2.125122e+10,2.095841e+10,2.587360e+10,2.631151e+10,2.330867e+10,1.813776e+10,2.209642e+10,2.916378e+10,2.757796e+10
"Yemen, Rep.",1.069343e+10,1.177753e+10,1.386763e+10,1.673157e+10,1.906314e+10,2.165053e+10,2.691086e+10,2.513028e+10,3.090675e+10,3.272642e+10,...,4.322859e+10,4.244449e+10,3.131782e+10,2.684223e+10,2.160616e+10,2.651259e+10,2.651259e+10,2.651259e+10,2.651259e+10,2.651259e+10


In [58]:
df_gdp = df_gdp.T.fillna(df_gdp.mean(axis=1)).T
df_gdp["PIB_promedio"] = df_gdp.mean(axis=1)
df_gdp = df_gdp[["PIB_promedio"]]
df_gdp

Unnamed: 0_level_0,PIB_promedio
Country Name,Unnamed: 1_level_1
Afghanistan,1.418344e+10
Albania,1.254125e+10
Algeria,1.723933e+11
Angola,7.425412e+10
Antigua and Barbuda,1.414550e+09
...,...
Viet Nam,2.029303e+11
"Venezuela, RB",2.644913e+11
Zambia,1.921761e+10
"Yemen, Rep.",2.651259e+10


###tlc

In [59]:
df_tlc

Unnamed: 0,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,...,2017,2018,2019,2020,2021,2022,2023,2024,Unnamed: 24,Unnamed: 25
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
Albania,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
Algeria,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
Angola,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
Antigua and Barbuda,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Viet Nam,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,Y,
Western Africa unspecified,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
Yemen,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,
Zambia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,N,


In [60]:
years.append('Unnamed: 25')
df_tlc.drop(columns=years, axis=1, inplace=True)
df_tlc

Unnamed: 0,Unnamed: 24
Afghanistan,N
Albania,N
Algeria,N
Angola,N
Antigua and Barbuda,N
...,...
Viet Nam,Y
Western Africa unspecified,N
Yemen,N
Zambia,N


In [61]:
df_tlc['Unnamed: 24'] = df_tlc['Unnamed: 24'].map({'Y': 1, 'N': 0})
df_tlc.rename(columns={'Unnamed: 24': 'TLC'}, inplace=True)
df_tlc

Unnamed: 0,TLC
Afghanistan,0
Albania,0
Algeria,0
Angola,0
Antigua and Barbuda,0
...,...
Viet Nam,1
Western Africa unspecified,0
Yemen,0
Zambia,0


###Balanza

In [62]:
#balanza
url = "https://github.com/Convergent-Sequence/Korea/raw/main/Data/balanza%20comercial.xlsx"

response = requests.get(url)

if response.status_code == 200:
    # Read the Excel file directly from the downloaded content
    excel_data = BytesIO(response.content)
    df_balanza = pd.read_excel(excel_data,header = 1,index_col=0)
    #print(df.head()
else:
    print(f"Error al descargar el archivo: {response.status_code}")

df_balanza

Unnamed: 0_level_0,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
China,23266890,20902581,18959979,14461935,32457567,45264679,47754035,53540193,62799338,55255585,46903546,37470718,44258294,55678600,28993635,23684806,24297635,1213075,-18039656,-6831506
United States of America,10711927,9523768,8490973,7944724,8642287,9403020,11606713,15154442,20566116,25065337,25914535,23361524,17953247,13973827,11523722,16671515,22698415,28042635,44590331,55956995
Viet Nam,2737611,3002620,4368466,5767732,4779507,6321258,8380676,10226726,13917278,14344157,17970521,20155559,31573023,28996860,27106237,27964319,32763363,34238963,27537753,29880641
"Hong Kong, China",13488003,16877230,16512139,17548549,18173753,23348124,28652393,30546846,25832071,25525900,28921457,31165326,37236352,44001845,30138583,29121253,35225633,25773581,23361682,32766518
"Taipei, Chinese",2813381,3708124,3060578,819.104,-350.273,1183419,3512376,802.898,1071118,-586.983,-4646915,-4181417,-3187122,4057223,-58.015,-1373227,809.71,-2076354,-4191874,3738439
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Saint Pierre and Miquelon,2,0,-1,0,0,0,0,0,0,0,0,0,-2,0,0,0,-1,,,
Western Sahara,0,-21,0,0,0,227,-89,1,0,-1,6,8,76.646,0,0,6,-1,150.284,,
Serbia and Montenegro,15.661,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,
Mayotte,0,76,144,164,109,110,157,156,324,0,0,0,0,0,0,0,,,,


In [63]:
df_balanza = df_balanza.replace(',', '', regex=True).apply(pd.to_numeric, errors='coerce')
df_balanza

Unnamed: 0_level_0,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
China,2.326689e+07,2.090258e+07,1.895998e+07,1.446194e+07,3.245757e+07,4.526468e+07,47754035.0,5.354019e+07,62799338.0,5.525558e+07,46903546.0,37470718.0,4.425829e+07,55678600.0,2.899364e+07,23684806.0,24297635.00,1.213075e+06,-18039656.0,-6831506.0
United States of America,1.071193e+07,9.523768e+06,8.490973e+06,7.944724e+06,8.642287e+06,9.403020e+06,11606713.0,1.515444e+07,20566116.0,2.506534e+07,25914535.0,23361524.0,1.795325e+07,13973827.0,1.152372e+07,16671515.0,22698415.00,2.804264e+07,44590331.0,55956995.0
Viet Nam,2.737611e+06,3.002620e+06,4.368466e+06,5.767732e+06,4.779507e+06,6.321258e+06,8380676.0,1.022673e+07,13917278.0,1.434416e+07,17970521.0,20155559.0,3.157302e+07,28996860.0,2.710624e+07,27964319.0,32763363.00,3.423896e+07,27537753.0,29880641.0
"Hong Kong, China",1.348800e+07,1.687723e+07,1.651214e+07,1.754855e+07,1.817375e+07,2.334812e+07,28652393.0,3.054685e+07,25832071.0,2.552590e+07,28921457.0,31165326.0,3.723635e+07,44001845.0,3.013858e+07,29121253.0,35225633.00,2.577358e+07,23361682.0,32766518.0
"Taipei, Chinese",2.813381e+06,3.708124e+06,3.060578e+06,8.191040e+02,-3.502730e+02,1.183419e+06,3512376.0,8.028980e+02,1071118.0,-5.869830e+02,-4646915.0,-4181417.0,-3.187122e+06,4057223.0,-5.801500e+01,-1373227.0,809.71,-2.076354e+06,-4191874.0,3738439.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Saint Pierre and Miquelon,2.000000e+00,0.000000e+00,-1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.0,0.000000e+00,0.0,0.0,-2.000000e+00,0.0,0.000000e+00,0.0,-1.00,,,
Western Sahara,0.000000e+00,-2.100000e+01,0.000000e+00,0.000000e+00,0.000000e+00,2.270000e+02,-89.0,1.000000e+00,0.0,-1.000000e+00,6.0,8.0,7.664600e+01,0.0,0.000000e+00,6.0,-1.00,1.502840e+02,,
Serbia and Montenegro,1.566100e+01,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.000000e+00,0.0,,,,
Mayotte,0.000000e+00,7.600000e+01,1.440000e+02,1.640000e+02,1.090000e+02,1.100000e+02,157.0,1.560000e+02,324.0,0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.000000e+00,0.0,,,,


In [64]:
df_balanzay = df_balanza.copy()
df_balanzay.columns = [f"balanza_{col}" for col in df_balanzay.columns]
df_balanzay

Unnamed: 0_level_0,balanza_2005,balanza_2006,balanza_2007,balanza_2008,balanza_2009,balanza_2010,balanza_2011,balanza_2012,balanza_2013,balanza_2014,balanza_2015,balanza_2016,balanza_2017,balanza_2018,balanza_2019,balanza_2020,balanza_2021,balanza_2022,balanza_2023,balanza_2024
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
China,2.326689e+07,2.090258e+07,1.895998e+07,1.446194e+07,3.245757e+07,4.526468e+07,47754035.0,5.354019e+07,62799338.0,5.525558e+07,46903546.0,37470718.0,4.425829e+07,55678600.0,2.899364e+07,23684806.0,24297635.00,1.213075e+06,-18039656.0,-6831506.0
United States of America,1.071193e+07,9.523768e+06,8.490973e+06,7.944724e+06,8.642287e+06,9.403020e+06,11606713.0,1.515444e+07,20566116.0,2.506534e+07,25914535.0,23361524.0,1.795325e+07,13973827.0,1.152372e+07,16671515.0,22698415.00,2.804264e+07,44590331.0,55956995.0
Viet Nam,2.737611e+06,3.002620e+06,4.368466e+06,5.767732e+06,4.779507e+06,6.321258e+06,8380676.0,1.022673e+07,13917278.0,1.434416e+07,17970521.0,20155559.0,3.157302e+07,28996860.0,2.710624e+07,27964319.0,32763363.00,3.423896e+07,27537753.0,29880641.0
"Hong Kong, China",1.348800e+07,1.687723e+07,1.651214e+07,1.754855e+07,1.817375e+07,2.334812e+07,28652393.0,3.054685e+07,25832071.0,2.552590e+07,28921457.0,31165326.0,3.723635e+07,44001845.0,3.013858e+07,29121253.0,35225633.00,2.577358e+07,23361682.0,32766518.0
"Taipei, Chinese",2.813381e+06,3.708124e+06,3.060578e+06,8.191040e+02,-3.502730e+02,1.183419e+06,3512376.0,8.028980e+02,1071118.0,-5.869830e+02,-4646915.0,-4181417.0,-3.187122e+06,4057223.0,-5.801500e+01,-1373227.0,809.71,-2.076354e+06,-4191874.0,3738439.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Saint Pierre and Miquelon,2.000000e+00,0.000000e+00,-1.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.0,0.000000e+00,0.0,0.0,-2.000000e+00,0.0,0.000000e+00,0.0,-1.00,,,
Western Sahara,0.000000e+00,-2.100000e+01,0.000000e+00,0.000000e+00,0.000000e+00,2.270000e+02,-89.0,1.000000e+00,0.0,-1.000000e+00,6.0,8.0,7.664600e+01,0.0,0.000000e+00,6.0,-1.00,1.502840e+02,,
Serbia and Montenegro,1.566100e+01,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.0,0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.000000e+00,0.0,,,,
Mayotte,0.000000e+00,7.600000e+01,1.440000e+02,1.640000e+02,1.090000e+02,1.100000e+02,157.0,1.560000e+02,324.0,0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.000000e+00,0.0,,,,


In [65]:
df_balanza = df_balanza.T.fillna(df_balanza.mean(axis=1)).T
df_balanza["Balanza_promedio"] = df_balanza.mean(axis=1)
df_balanza = df_balanza[["Balanza_promedio"]]
df_balanza

Unnamed: 0_level_0,Balanza_promedio
Country,Unnamed: 1_level_1
China,3.061460e+07
United States of America,1.938980e+07
Viet Nam,1.760166e+07
"Hong Kong, China",2.671086e+07
"Taipei, Chinese",1.744593e+05
...,...
Saint Pierre and Miquelon,-1.176471e-01
Western Sahara,2.016278e+01
Serbia and Montenegro,9.788125e-01
Mayotte,7.750000e+01


###hdr

In [66]:
#hdr
url = "https://github.com/Convergent-Sequence/Korea/raw/main/Data/hdr-data.xlsx"

response = requests.get(url)

if response.status_code == 200:
    excel_data = BytesIO(response.content)
    df_hdr = pd.read_excel(excel_data)
    df_hdr.set_index('country', inplace=True)
else:
    print(f"Error al descargar el archivo: {response.status_code}")
df_hdr

Unnamed: 0_level_0,countryIsoCode,indexCode,index,dimension,indicatorCode,indicator,year,value,note
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Afghanistan,AFG,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,182,
Angola,AGO,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,150,
Albania,ALB,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,74,
Argentina,ARG,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,48,
Armenia,ARM,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,76,
...,...,...,...,...,...,...,...,...,...
Samoa,WSM,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,116,
Yemen,YEM,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,186,
South Africa,ZAF,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,110,
Zambia,ZMB,HDI,Human Development Index,,hdi_rank,HDI Rank,2022,153,


In [67]:
df_hdr.drop(columns=['countryIsoCode','indexCode','index','dimension','indicatorCode','indicator','note','year'], axis=1, inplace=True)
df_hdr.rename(columns={'value': 'hdr_value'}, inplace=True)
df_hdr

Unnamed: 0_level_0,hdr_value
country,Unnamed: 1_level_1
Afghanistan,182
Angola,150
Albania,74
Argentina,48
Armenia,76
...,...
Samoa,116
Yemen,186
South Africa,110
Zambia,153


###ya pasó lo peor, ahora viene lo más culero, unir los datasets.

In [68]:
datasets = {
    'oecd1': df_oecd1,
    'oecd2': df_oecd2,
    'koica': df_koica,
    'war': df_war,
    'gdp': df_gdp,
    'tlc': df_tlc,
    'balanza': df_balanza,
    'hdr': df_hdr
}

for name, df in datasets.items():
    print("dataset:", name, "\n")
    print(list(df.index))


dataset: oecd1 

['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Azerbaijan', 'Bangladesh', 'Barbados', 'Belarus', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Caribbean unspecified', 'Central African Republic', 'Central America unspecified', 'Central Asia unspecified', 'Chad', 'Chile', 'China (Peopleâ€™s Republic of)', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'CÃ´te dâ€™Ivoire', 'Democratic Republic of the Congo', 'Djibouti', 'Dominica', 'Dominican Republic', 'Eastern Africa unspecified', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Eswatini', 'Ethiopia', 'Europe unspecified', 'Far East Asia unspecified', 'Fiji', 'Gabon', 'Gambia', 'Georgia', 'Ghana', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'India', 'Indonesia', 'Iran', 'Iraq',

In [69]:
# Obtener la intersección de todos los índices
interseccion_indices = set.intersection(*(set(df.index) for df in datasets.values()))

print("Índices comunes en todos los datasets:\n", interseccion_indices)
print(len(interseccion_indices))


Índices comunes en todos los datasets:
 {'Cameroon', 'Georgia', 'Cabo Verde', 'Tonga', 'Nepal', 'Bhutan', 'Argentina', 'Afghanistan', 'Sudan', 'India', 'Fiji', 'Saudi Arabia', 'Zimbabwe', 'Serbia', 'Brazil', 'Haiti', 'Somalia', 'Myanmar', 'Armenia', 'Algeria', 'Tajikistan', 'Viet Nam', 'Albania', 'Benin', 'Guinea', 'Guatemala', 'Malaysia', 'Thailand', 'Mali', 'Mozambique', 'Sao Tome and Principe', 'Montenegro', 'Dominican Republic', 'Nigeria', 'El Salvador', 'Grenada', 'Peru', 'Philippines', 'Belize', 'Ethiopia', 'Vanuatu', 'Nauru', 'Solomon Islands', 'South Sudan', 'Panama', 'Ecuador', 'Mauritania', 'Turkmenistan', 'Djibouti', 'Malawi', 'Jamaica', 'Mongolia', 'Madagascar', 'Namibia', 'Seychelles', 'Syrian Arab Republic', 'Gabon', 'Burundi', 'Morocco', 'Belarus', 'Chile', 'Kenya', 'Nicaragua', 'Oman', 'Burkina Faso', 'Ukraine', 'Senegal', 'Jordan', 'Chad', 'Costa Rica', 'Marshall Islands', 'Croatia', 'Iraq', 'Tuvalu', 'Cambodia', 'Lebanon', 'Ghana', 'Pakistan', 'Sierra Leone', 'Samoa',

*Creamos uno sin los años*

In [70]:
interseccion_indices = list(interseccion_indices)
datasets_filtrados = {}

for name, df in datasets.items():
    df_filtrado = df.loc[interseccion_indices]
    df_filtrado = df_filtrado.loc[~df_filtrado.index.duplicated()]
    datasets_filtrados[name] = df_filtrado.copy()


In [71]:
datasets_prefijados = {
    name: df.add_prefix(f"{name}_") for name, df in datasets_filtrados.items()
}


In [72]:
df_final = pd.concat(datasets_filtrados.values(), axis=1)
df_final

Unnamed: 0,Obs_mean,Economic infrastructure and services,Multi-sector / Cross-cutting,Production sectors,Social infrastructure and services,OECD,KOICA,Guerra de Corea,PIB_promedio,TLC,Balanza_promedio,hdr_value
Cameroon,2.056159e+06,1,1,1,1,0,0,0,3.121715e+10,0,-6.089255e+01,151
Georgia,3.035524e+04,1,1,1,1,0,0,0,1.427239e+10,0,6.643400e+01,60
Cabo Verde,2.342777e-01,1,0,1,1,0,0,0,1.748340e+09,0,3.285901e+02,131
Tonga,7.218053e-02,1,1,1,1,0,0,0,3.859794e+08,0,-7.170340e+01,98
Nepal,3.401407e+06,1,1,1,1,0,1,0,2.156057e+10,0,3.021065e+01,146
...,...,...,...,...,...,...,...,...,...,...,...,...
Indonesia,9.115240e+06,1,1,1,1,0,1,0,7.770246e+11,1,-2.604224e+06,112
Togo,6.377484e+04,1,1,1,1,0,0,0,5.523183e+09,0,2.789101e+05,163
Bosnia and Herzegovina,3.975134e+06,1,1,1,1,0,0,0,1.729656e+10,0,-3.375550e+00,80
Zambia,6.462298e+04,1,1,1,1,0,0,0,1.921761e+10,0,-2.333061e+02,153


In [75]:
df_final.to_csv("categorical_korea.csv", index=True)

*Creamos otro con los años*




In [73]:
datasets2 = {
    'oecd1': df_oecd1,
    'oecd2': df_oecd2,
    'koica': df_koica,
    'war': df_war,
    'gdp': df_gpdy,
    'tlc': df_tlc,
    'balanza': df_balanzay,
    'hdr': df_hdr
}

interseccion_indices2 = set.intersection(*(set(df.index) for df in datasets2.values()))

interseccion_indices2 = list(interseccion_indices2)
datasets_filtrados2 = {}

for name, df in datasets2.items():
    df_filtrado = df.loc[interseccion_indices]
    df_filtrado = df_filtrado.loc[~df_filtrado.index.duplicated()]
    datasets_filtrados[name] = df_filtrado.copy()

df_final2 = pd.concat(datasets_filtrados.values(), axis=1)
df_final2

Unnamed: 0,Obs_mean,Economic infrastructure and services,Multi-sector / Cross-cutting,Production sectors,Social infrastructure and services,OECD,KOICA,Guerra de Corea,gdp_2002,gdp_2003,...,balanza_2016,balanza_2017,balanza_2018,balanza_2019,balanza_2020,balanza_2021,balanza_2022,balanza_2023,balanza_2024,hdr_value
Cameroon,2.056159e+06,1,1,1,1,0,0,0,1.241725e+10,1.597032e+10,...,20.914,-31.077,44.398,-15.072,-23.656,23.520,60.190,58.906,-131.426,151
Georgia,3.035524e+04,1,1,1,1,0,0,0,3.395767e+09,3.991378e+09,...,73.525,23.016,9.603,69.726,83.486,92.024,110.586,181.046,126.208,60
Cabo Verde,2.342777e-01,1,0,1,1,0,0,0,6.205074e+08,8.132605e+08,...,1.414,1.511,6.672,3.914,930.000,3.729,3.111,1.590,3.055,131
Tonga,7.218053e-02,1,1,1,1,0,0,0,1.827643e+08,2.022466e+08,...,-249.000,-3.213,-1.736,473.000,-1.230,-1.450,-723.000,2.591,155.000,98
Nepal,3.401407e+06,1,1,1,1,0,1,0,6.050876e+09,6.330473e+09,...,23.656,32.577,32.999,45.250,57.320,81.542,32.259,25.202,39.440,146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Indonesia,9.115240e+06,1,1,1,1,0,1,0,1.956606e+11,2.347725e+11,...,-1677603.000,-1160558.000,-2291487.000,-1165783.000,-1277196.000,-2175144.000,-5518951.000,-3005662.000,-4624983.000,112
Togo,6.377484e+04,1,1,1,1,0,0,0,2.410199e+09,2.987984e+09,...,309.013,1011828.000,1016708.000,1252772.000,763.733,433.353,2293943.000,975.432,141.727,163
Bosnia and Herzegovina,3.975134e+06,1,1,1,1,0,0,0,6.728221e+09,8.498894e+09,...,-8.850,-17.788,-18.538,-20.146,-13.468,-20.117,-37.135,-30.823,-20.276,80
Zambia,6.462298e+04,1,1,1,1,0,0,0,4.193850e+09,4.901870e+09,...,-273.884,-164.972,-81.688,-38.507,2.372,-72.215,-68.130,-38.408,-3.445,153


In [76]:
df_final2.to_csv("historical_korea.csv", index=True)

###Verificación de lectura de csv

In [79]:
url = "https://raw.githubusercontent.com/Convergent-Sequence/Korea/refs/heads/main/TransformedData/categorical_korea.csv"
df_cat = pd.read_csv(url,index_col=0)
df_cat

Unnamed: 0,Obs_mean,Economic infrastructure and services,Multi-sector / Cross-cutting,Production sectors,Social infrastructure and services,OECD,KOICA,Guerra de Corea,PIB_promedio,TLC,Balanza_promedio,hdr_value
Cameroon,2.056159e+06,1,1,1,1,0,0,0,3.121715e+10,0,-6.089255e+01,151
Georgia,3.035524e+04,1,1,1,1,0,0,0,1.427239e+10,0,6.643400e+01,60
Cabo Verde,2.342777e-01,1,0,1,1,0,0,0,1.748340e+09,0,3.285901e+02,131
Tonga,7.218053e-02,1,1,1,1,0,0,0,3.859794e+08,0,-7.170340e+01,98
Nepal,3.401407e+06,1,1,1,1,0,1,0,2.156057e+10,0,3.021065e+01,146
...,...,...,...,...,...,...,...,...,...,...,...,...
Indonesia,9.115240e+06,1,1,1,1,0,1,0,7.770246e+11,1,-2.604224e+06,112
Togo,6.377484e+04,1,1,1,1,0,0,0,5.523183e+09,0,2.789101e+05,163
Bosnia and Herzegovina,3.975134e+06,1,1,1,1,0,0,0,1.729656e+10,0,-3.375550e+00,80
Zambia,6.462298e+04,1,1,1,1,0,0,0,1.921761e+10,0,-2.333062e+02,153


In [80]:
url = "https://raw.githubusercontent.com/Convergent-Sequence/Korea/refs/heads/main/TransformedData/historical_korea.csv"
df_hist = pd.read_csv(url,index_col=0)
df_hist

Unnamed: 0,Obs_mean,Economic infrastructure and services,Multi-sector / Cross-cutting,Production sectors,Social infrastructure and services,OECD,KOICA,Guerra de Corea,gdp_2002,gdp_2003,...,balanza_2016,balanza_2017,balanza_2018,balanza_2019,balanza_2020,balanza_2021,balanza_2022,balanza_2023,balanza_2024,hdr_value
Cameroon,2.056159e+06,1,1,1,1,0,0,0,1.241725e+10,1.597032e+10,...,20.914,-31.077,44.398,-15.072,-23.656,23.520,60.190,58.906,-131.426,151
Georgia,3.035524e+04,1,1,1,1,0,0,0,3.395767e+09,3.991378e+09,...,73.525,23.016,9.603,69.726,83.486,92.024,110.586,181.046,126.208,60
Cabo Verde,2.342777e-01,1,0,1,1,0,0,0,6.205074e+08,8.132605e+08,...,1.414,1.511,6.672,3.914,930.000,3.729,3.111,1.590,3.055,131
Tonga,7.218053e-02,1,1,1,1,0,0,0,1.827643e+08,2.022466e+08,...,-249.000,-3.213,-1.736,473.000,-1.230,-1.450,-723.000,2.591,155.000,98
Nepal,3.401407e+06,1,1,1,1,0,1,0,6.050876e+09,6.330473e+09,...,23.656,32.577,32.999,45.250,57.320,81.542,32.259,25.202,39.440,146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Indonesia,9.115240e+06,1,1,1,1,0,1,0,1.956606e+11,2.347725e+11,...,-1677603.000,-1160558.000,-2291487.000,-1165783.000,-1277196.000,-2175144.000,-5518951.000,-3005662.000,-4624983.000,112
Togo,6.377484e+04,1,1,1,1,0,0,0,2.410199e+09,2.987984e+09,...,309.013,1011828.000,1016708.000,1252772.000,763.733,433.353,2293943.000,975.432,141.727,163
Bosnia and Herzegovina,3.975134e+06,1,1,1,1,0,0,0,6.728221e+09,8.498894e+09,...,-8.850,-17.788,-18.538,-20.146,-13.468,-20.117,-37.135,-30.823,-20.276,80
Zambia,6.462298e+04,1,1,1,1,0,0,0,4.193850e+09,4.901870e+09,...,-273.884,-164.972,-81.688,-38.507,2.372,-72.215,-68.130,-38.408,-3.445,153
