## PASO 1: Plantear la pregunta.
        ¿Puede predecirse el consumo energético de un país en función de su población y su nivel de industrialización?
## PASOS 2 y 3: Concretar y buscar los datos necesarios.
        Cargamos los ficheros .csv utilizando las librerías y funciones correspondientes.           

In [1]:
import pandas as pd

#Ficheros de datos
pop_den = pd.read_csv("Datos\\population_density.csv")
urb_gro = pd.read_csv("Datos\\urban_growth.csv")
pop = pd.read_csv("Datos\\population.csv")
pop_gro = pd.read_csv("Datos\\population_growth.csv")
life_exp = pd.read_csv("Datos\\life_expectancy.csv")
co2_emi = pd.read_csv("Datos\\co2_emissions.csv")

#Fichero de comprobación
target = pd.read_csv("Datos\\energy_person_ratio.csv")

## PASOS 4 y 5: Limpieza y exploración de los datos..
        1) Transponemos las tablas utilizando la función data.melt_df().
        2) Juntamos las tablas mediante la función data.merge_all(), describiendo nuestro propio diccionario.
        3) Visualizamos la información básica de nuestra tabla global mediante las funciones .head() y .describe() 

In [2]:
import data #data es una librería de funciones propias que nos permiten unir y transponer los dataframes anteriores de manera fácil y rápida (Si quieres profundizar, abre el fichero data.py) 
pop_den_melt = data.melt_df(pop_den, "population_density")
urb_gro_melt = data.melt_df(urb_gro, "urban_growth")
pop_melt = data.melt_df(pop, "population")
pop_gro_melt = data.melt_df(pop_gro, "population_growth")
co2_emi_melt = data.melt_df(co2_emi, "co2_emissions")
life_exp_melt = data.melt_df(life_exp, "life_expectancy")

target_melt = data.melt_df(target, "target")

In [14]:
#Describimos un objeto tipo diccionario feature_dict, que usaremos como argumento en la función data.merge_all()
feature_dict = {"pop_den_melt":pop_den_melt, "urb_gro_melt": urb_gro_melt, "pop_melt": pop_melt, "pop_gro_melt": pop_gro_melt, "co2_emi_melt": co2_emi_melt, "life_exp_melt": life_exp_melt}

merged_data = data.merge_all(feature_dict = feature_dict, keys = ["country", "year"]).reset_index(drop=True)

In [15]:
merged_data.head()

Unnamed: 0,country,year,population_density,urban_growth,population,population_growth,co2_emissions,life_expectancy
0,Afghanistan,1961,14.0,0.0521,9170000,1.9,0.0536,40.0
1,Albania,1961,61.5,0.0389,1690000,3.12,1.35,63.3
2,Algeria,1961,4.76,0.0662,11300000,2.49,0.535,53.1
3,Angola,1961,4.44,0.0481,5530000,1.39,0.0821,41.1
4,Antigua and Barbuda,1961,125.0,0.000279,55000,1.59,0.867,63.8


In [16]:
merged_data.describe()

Unnamed: 0,population_density,urban_growth,population,population_growth,co2_emissions,life_expectancy
count,10501.0,10501.0,10501.0,10501.0,10501.0,10501.0
mean,132.850914,0.030194,28977460.0,1.80702,4.482479,65.505327
std,406.795876,0.030978,112053300.0,1.576097,7.341779,10.128384
min,0.632,-1.87,42200.0,-9.08,0.0,9.64
25%,17.7,0.013,1820000.0,0.748,0.385,58.3
50%,55.2,0.0277,5730000.0,1.77,1.73,68.1
75%,122.0,0.0435,17500000.0,2.7,6.27,73.2
max,8230.0,0.489,1430000000.0,17.6,101.0,85.0


## PASO 6: Preprocesamiento de datos.
        1) Generamos el objeto que realiza la normalización a través de Sklearn.
        2) Normalización de los datos.
        3) Visualización de los mismos.  

In [17]:
from sklearn.preprocessing import MinMaxScaler
#Generar un objeto de escalado
scaler = MinMaxScaler()

In [18]:
#Creamos una copia del DataFrame sobre la que ejecutaremos el objeto scaler.
scaled_data = merged_data.copy()

#Aplicamos la normalización SOLO en las columnas que lo necesitan. El resto se mantienen igual.
scaled_data[["population_density", "population", "population_growth", "life_expectancy"]] = scaler.fit_transform(scaled_data[["population_density", "population", "population_growth", "life_expectancy"]])

#Visualizamos los datos normalizados. 
scaled_data.head()

Unnamed: 0,country,year,population_density,urban_growth,population,population_growth,co2_emissions,life_expectancy
0,Afghanistan,1961,0.001624,0.0521,0.006383,0.411544,0.0536,0.402866
1,Albania,1961,0.007396,0.0389,0.001152,0.457271,1.35,0.712049
2,Algeria,1961,0.000502,0.0662,0.007873,0.433658,0.535,0.576699
3,Angola,1961,0.000463,0.0481,0.003838,0.392429,0.0821,0.417463
4,Antigua and Barbuda,1961,0.015113,0.000279,9e-06,0.399925,0.867,0.718684
