In [2]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Cargar y modelar datos generales

In [18]:
ods_data = pd.read_csv("ods_final.csv")

In [19]:
ods_data.drop(["Country Name", "Indicator Code"], axis=1, inplace=True)
ods_data.drop([str(year) for year in range(1990, 2005)], axis=1, inplace=True)

In [20]:

melted_df = ods_data.melt(id_vars=['Country Code', 'Indicator Name'], var_name='Year', value_name='Value')


ods_data = melted_df.pivot_table(index=['Country Code', 'Year'], columns='Indicator Name', values='Value')


ods_data.reset_index(inplace=True)

ods_data

Indicator Name,Country Code,Year,Access to clean fuels and technologies for cooking (% of population),Access to electricity (% of population),Adolescent fertility rate (births per 1000 women ages 15-19),Adolescents out of school (% of lower secondary school age),Employment in agriculture (% of total employment) (modeled ILO estimate),Employment in industry (% of total employment) (modeled ILO estimate),Employment in services (% of total employment) (modeled ILO estimate),Households and NPISHs Final consumption expenditure (annual % growth),People using at least basic drinking water services (% of population),People using at least basic sanitation services (% of population),People using safely managed drinking water services (% of population),Primary completion rate total (% of relevant age group),Urban population (% of total population)
0,ABW,2005,,100.000000,36.7648,6.071430,,,,,95.897230,97.996573,,88.197968,44.875
1,ABW,2006,,100.000000,35.3134,,,,,,96.178915,97.931846,,93.186119,44.511
2,ABW,2007,,100.000000,33.8620,3.847370,,,,,96.460600,97.867118,,95.588242,44.147
3,ABW,2008,,100.000000,32.7920,3.824090,,,,,96.742284,97.802391,,95.133034,43.783
4,ABW,2009,,100.000000,31.7220,7.427830,,,,,97.023969,97.737663,,96.242577,43.421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4167,ZWE,2016,29.8,42.561729,90.6766,2.261470,66.879997,7.05,26.070000,-10.501114,64.468964,37.529193,30.183683,99.265839,32.296
4168,ZWE,2017,29.8,44.178635,86.1350,2.356790,66.480003,6.90,26.629999,1.396143,63.996625,36.941673,29.997683,95.476372,32.237
4169,ZWE,2018,29.9,45.572647,83.2486,6.226070,66.019997,6.75,27.230000,-22.393220,63.538773,36.357160,29.827913,92.195152,32.209
4170,ZWE,2019,30.1,46.781475,80.3622,7.544310,66.190002,6.57,27.240000,,63.094957,35.774337,29.673569,88.508812,32.210


### Cargar y moldear datos de indicadores


In [23]:
indicators_data = pd.read_csv("HDR23-24_Composite_indices_complete_time_series.csv",  encoding='latin1')

In [24]:
# Eliminar columnas
indicators_data = indicators_data.drop(["hdicode","region"], axis=1)
indicators_data = indicators_data.rename(columns={"iso3":"Country Code"})

In [25]:

# Usar pd.melt para aplanar el DataFrame
df_melted = pd.melt(indicators_data, id_vars=['Country Code', 'country'], var_name='variable_year', value_name='value')

# Separar 'variable' y 'year' usando rsplit
df_melted[['variable', 'Year']] = df_melted['variable_year'].str.rsplit('_', n=1, expand=True)

# Pivotar para tener una columna por cada variable
indicators_data = df_melted.pivot_table(index=['Country Code', 'country', 'Year'], columns='variable', values='value').reset_index()


### Juntar conjunto de datos


In [28]:
data = pd.merge(ods_data, indicators_data, on=['Country Code', 'Year'])
data

Unnamed: 0,Country Code,Year,Access to clean fuels and technologies for cooking (% of population),Access to electricity (% of population),Adolescent fertility rate (births per 1000 women ages 15-19),Adolescents out of school (% of lower secondary school age),Employment in agriculture (% of total employment) (modeled ILO estimate),Employment in industry (% of total employment) (modeled ILO estimate),Employment in services (% of total employment) (modeled ILO estimate),Households and NPISHs Final consumption expenditure (annual % growth),...,mys,mys_f,mys_m,phdi,pop_total,pr_f,pr_m,rankdiff_hdi_phdi,se_f,se_m
0,AFG,2005,12.20,25.390894,138.4284,,62.150002,11.33,26.530001,,...,1.521544,0.480141,2.509840,0.399,24.411191,25.925926,74.074074,,3.024593,12.915637
1,AFG,2006,13.85,30.718691,135.7602,,61.279999,11.79,26.930000,,...,1.595281,0.495506,2.620990,0.407,25.442944,25.925926,74.074074,,2.985100,13.786732
2,AFG,2007,15.30,36.051010,133.0920,,60.139999,12.20,27.660000,,...,1.669017,0.510870,2.732140,0.423,25.903301,25.872093,74.127907,,2.945607,14.657827
3,AFG,2008,16.70,42.400002,126.6330,,59.290001,12.48,28.230000,,...,1.742754,0.526235,2.843290,0.428,26.427199,25.872093,74.127907,,2.906114,15.528922
4,AFG,2009,18.40,46.740051,120.1740,,56.889999,13.63,29.480000,,...,1.816490,0.541599,2.954440,0.438,27.385307,25.925926,74.074074,,2.866621,16.400018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3115,ZWE,2016,29.80,42.561729,90.6766,2.261470,66.879997,7.05,26.070000,-10.501114,...,8.425370,7.881975,9.090810,0.539,14.452704,35.142857,64.857143,,53.450329,66.209465
3116,ZWE,2017,29.80,44.178635,86.1350,2.356790,66.480003,6.90,26.629999,1.396143,...,8.461690,8.061880,8.916270,0.544,14.751101,36.151604,63.848396,,59.792019,70.783081
3117,ZWE,2018,29.90,45.572647,83.2486,6.226070,66.019997,6.75,27.230000,-22.393220,...,8.574575,8.065950,9.189455,0.556,15.052184,34.285714,65.714286,,60.982430,71.707101
3118,ZWE,2019,30.10,46.781475,80.3622,7.544310,66.190002,6.57,27.240000,,...,8.687460,8.070020,9.462640,0.553,15.354608,34.571429,65.428571,,62.196541,72.643183


In [29]:
data.columns

Index(['Country Code', 'Year',
       'Access to clean fuels and technologies for cooking (% of population)',
       'Access to electricity (% of population)',
       'Adolescent fertility rate (births per 1000 women ages 15-19)',
       'Adolescents out of school (% of lower secondary school age)',
       'Employment in agriculture (% of total employment) (modeled ILO estimate)',
       'Employment in industry (% of total employment) (modeled ILO estimate)',
       'Employment in services (% of total employment) (modeled ILO estimate)',
       'Households and NPISHs Final consumption expenditure (annual % growth)',
       'People using at least basic drinking water services (% of population)',
       'People using at least basic sanitation services (% of population)',
       'People using safely managed drinking water services (% of population)',
       'Primary completion rate total (% of relevant age group)',
       'Urban population (% of total population)', 'country', 'abr',
     