Link to repo - common to all files: 'https://github.com/Derick047/PC3/tree/main/PC3-DataSets'

**Importacion de Archivos**

In [64]:
import pandas as pd

In [65]:
# all the links
education ='https://raw.githubusercontent.com/Derick047/PC3/refs/heads/main/PC3-DataSets/Education%20expenditures.csv'
electricity = 'https://raw.githubusercontent.com/Derick047/PC3/refs/heads/main/PC3-DataSets/Electricity%20-%20installed%20generating%20capacity.csv'
energy = 'https://raw.githubusercontent.com/Derick047/PC3/refs/heads/main/PC3-DataSets/Energy%20consumption%20per%20capita.csv'
reserves = 'https://raw.githubusercontent.com/Derick047/PC3/refs/heads/main/PC3-DataSets/Reserves%20of%20foreign%20exchange%20and%20gold.csv'

In [66]:
# Convirtiendo a DF y Limpieza de filas
education = pd.read_csv(education)
electricity = pd.read_csv(electricity)
energy = pd.read_csv(energy)
reserves = pd.read_csv(reserves)

**Limpieza**

In [42]:
# Limpiando nombres de columnas
for df in [education, electricity, energy, reserves]:
  df.columns = df.columns.str.strip()

In [44]:
education.columns

Index(['name', 'slug', '% of GDP', 'date_of_information', 'ranking', 'region'], dtype='object')

In [49]:
electricity.columns

Index(['name', 'slug', 'kW', 'date_of_information', 'ranking', 'region'], dtype='object')

In [50]:
energy.columns

Index(['name', 'slug', 'Btu/person', 'date_of_information', 'ranking',
       'region'],
      dtype='object')

In [51]:
reserves.columns

Index(['name', 'slug', 'value', 'date_of_information', 'ranking', 'region'], dtype='object')

In [45]:
# Educación
education_clean = education[['name', 'region', '% of GDP']].rename(
    columns={'name': 'Country', 'region': 'Region', '% of GDP': 'Education_GDP%'})

In [46]:
# Electricidad
electricity_clean = electricity[['name', 'region', 'kW']].rename(
    columns={'name': 'Country', 'region': 'Region', 'kW': 'Electricity_kW'})
electricity_clean['Electricity_kW'] = electricity_clean['Electricity_kW'].str.replace(",", "").astype(float)

In [47]:
# Energía
energy_clean = energy[['name', 'region', 'Btu/person']].rename(
    columns={'name': 'Country', 'region': 'Region', 'Btu/person': 'Energy_BtuPerCapita'})
energy_clean['Energy_BtuPerCapita'] = energy_clean['Energy_BtuPerCapita'].str.replace(",", "").astype(float)

In [48]:
# Reservas
reserves_clean = reserves[['name', 'region', 'value']].rename(
    columns={'name': 'Country', 'region': 'Region', 'value': 'NaturalReserves_ValueUSD'})
reserves_clean['NaturalReserves_ValueUSD'] = reserves_clean['NaturalReserves_ValueUSD'].str.replace("[$,]", "", regex=True).astype(float)

**Uniendo los Archivos**

In [53]:
df = education_clean.merge(electricity_clean[['Country', 'Electricity_kW']], on='Country')
df = df.merge(energy_clean[['Country', 'Energy_BtuPerCapita']], on='Country')
df = df.merge(reserves_clean[['Country', 'NaturalReserves_ValueUSD']], on='Country')

**Creando Columnas de Intervalos**

In [60]:
#Se dividio las columnas en 5 intervalos con igual cantidad de datos
etiquetas = ['Muy bajo', 'Bajo', 'Medio', 'Alto', 'Muy alto']
for col in ['Education_GDP%', 'Electricity_kW', 'Energy_BtuPerCapita', 'NaturalReserves_ValueUSD']:
    df[f'{col}_Interval'] = pd.qcut(df[col], 5, labels=etiquetas)

In [58]:
stats_by_region = df.groupby('Region')[['Education_GDP%', 'Electricity_kW', 'Energy_BtuPerCapita', 'NaturalReserves_ValueUSD']].agg(['mean', 'min', 'max'])

**Resultados**

In [59]:
display(df)

Unnamed: 0,Country,Region,Education_GDP%,Electricity_kW,Energy_BtuPerCapita,NaturalReserves_ValueUSD,Education_GDP%_Interval,Electricity_kW_Interval,Energy_BtuPerCapita_Interval,NaturalReserves_ValueUSD_Interval
0,Solomon Islands,Australia and Oceania,12.8,37000.0,5655000.0,6.882200e+08,Muy alto,Muy bajo,Muy bajo,Muy bajo
1,Bolivia,South America,9.8,4375000.0,29340000.0,8.710000e+07,Muy alto,Medio,Medio,Muy bajo
2,Namibia,Africa,9.6,646000.0,21734000.0,3.356000e+09,Muy alto,Bajo,Bajo,Bajo
3,Sierra Leone,Africa,9.1,149000.0,2301000.0,4.956990e+08,Muy alto,Muy bajo,Muy bajo,Muy bajo
4,Belize,Central America and the Caribbean,8.7,220000.0,30752000.0,4.980870e+08,Muy alto,Muy bajo,Medio,Muy bajo
...,...,...,...,...,...,...,...,...,...,...
171,Haiti,Central America and the Caribbean,1.4,472000.0,3486000.0,2.566000e+09,Muy bajo,Bajo,Muy bajo,Bajo
172,Papua New Guinea,East and Southeast Asia,1.4,1148000.0,8781000.0,3.901000e+09,Muy bajo,Bajo,Bajo,Medio
173,Venezuela,South America,1.3,33493000.0,54474000.0,9.794000e+09,Muy bajo,Muy alto,Medio,Medio
174,Nigeria,Africa,0.5,4094000.0,7993000.0,3.861200e+10,Muy bajo,Medio,Muy bajo,Alto


In [62]:
#Estadisticas por Region
display(stats_by_region)

Unnamed: 0_level_0,Education_GDP%,Education_GDP%,Education_GDP%,Electricity_kW,Electricity_kW,Electricity_kW,Energy_BtuPerCapita,Energy_BtuPerCapita,Energy_BtuPerCapita,NaturalReserves_ValueUSD,NaturalReserves_ValueUSD,NaturalReserves_ValueUSD
Unnamed: 0_level_1,mean,min,max,mean,min,max,mean,min,max,mean,min,max
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Africa,4.268,0.3,9.6,4619820.0,29000.0,65989000.0,14122840.0,649000.0,98847000.0,6860587000.0,30450000.0,68448000000.0
Australia and Oceania,6.314286,2.2,12.8,17061000.0,34000.0,108193000.0,62216710.0,5655000.0,223158000.0,11479040000.0,396530000.0,54455000000.0
Central America and the Caribbean,4.555,1.4,8.7,1580500.0,41000.0,6581000.0,55873000.0,3486000.0,153952000.0,4561870000.0,155971000.0,23834000000.0
Central Asia,4.333333,2.8,6.2,42395780.0,3944000.0,301926000.0,103916100.0,16192000.0,261142000.0,75558220000.0,3237000000.0,597217000000.0
East and Southeast Asia,3.633333,1.4,6.3,210844600.0,277000.0,2949000000.0,118687400.0,6825000.0,643259000.0,353614200000.0,781995000.0,3265000000000.0
Europe,5.151429,3.1,7.2,40275090.0,779000.0,275658000.0,113142200.0,27407000.0,234698000.0,64158750000.0,921269000.0,822130000000.0
Middle East,4.528571,1.7,7.8,33817290.0,352000.0,119620000.0,240650900.0,14991000.0,814308000.0,94324790000.0,1328000000.0,436769000000.0
North America,5.2,4.3,6.1,500858000.0,105586000.0,1235000000.0,215870700.0,57539000.0,311599000.0,189085000000.0,117551000000.0,227760000000.0
South America,4.808333,1.3,9.8,35504080.0,259000.0,240251000.0,46414670.0,25733000.0,78496000.0,47486920000.0,87100000.0,318857000000.0
South Asia,3.85,1.9,7.0,72116120.0,432000.0,499136000.0,23506380.0,3380000.0,64082000.0,79075930000.0,673203000.0,569544000000.0


**Guardamos los Archivos**

In [63]:
df.to_csv("final_result.csv", index=False)
stats_by_region.to_csv("stats_by_region.csv")