# Libraries

In [1]:
#Importar Bibliotecas

import pandas as pd
import numpy as np
import re


import warnings
warnings.filterwarnings("ignore")

 # Documentation

In [None]:
# Importar archivos
df_health = pd.read_csv('../data/HNP_StatsData Alfonso-Copy1.csv')
df_seaice = pd.read_csv('../data/seaice.csv')
df_temp = pd.read_csv('../data/GlobalLandTemperaturesByCountry.csv')
df_temp_change = pd.read_csv('../data/Temperatura_cambio.csv')
df_fires = pd.read_csv('../data/fires.csv')
df_iso = pd.read_csv('../data/iso.csv')
df_agri = pd.read_csv('../data/Agricultura.csv')
df_emissions_origen = pd.read_csv('../data/Emisiones_origen.csv')
df_emisiones = pd.read_csv('../data/owid-co2-data-Copy1.csv')

# Cleaning df_health

In [None]:
#selección de todas las filas que contengan los indicadores que me interesan

df_population = df_health[df_health['Indicator Name'].isin(['Urban population (% of total population)','Urban population growth (annual %)','Rural population (% of total population)', 'Rural population growth (annual %)', 'Population growth (annual %)', 'Population, total'])]
df_health_indicators = df_health[df_health['Indicator Name'].isin(['Treatment for hypertension (% of adults ages 30-79 with hypertension)',\
                                                                   'Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)',\
                                                                   'Suicide mortality rate (per 100,000 population)','People using at least basic drinking water services (% of population)', \
                                                                   'People using safely managed sanitation services (% of population)',\
                                                                   'Number of surgical procedures (per 100,000 population)',\
                                                                   'Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (per 100,000 population)',\
                                                                   'Mortality rate attributed to household and ambient air pollution (per 100,000 population)',\
                                                                   'Human capital index (HCI) (scale 0-1)', 'External health expenditure (% of current health expenditure)',\
                                                                   'Current health expenditure (% of GDP)', 'Current health expenditure per capita (current US$)',\
                                                                   'Domestic general government health expenditure (% of GDP)', 'Prevalence of overweight (% of adults)'])]


In [None]:
#Eliminar columnas que no me interesan
df_population = df_population.drop(['Unnamed: 66','Indicator Code'], axis = 1)
df_health_indicators = df_health_indicators.drop(['Unnamed: 66','Indicator Code'], axis = 1)

In [None]:
df_population.head(3)

In [None]:
df_health_indicators.head(3)

In [None]:
#Reorganizar los datos dejando 'Year' como columna
df_population = df_population.melt(id_vars = ['Country Name', 'Country Code', 'Indicator Name']).rename({'variable': 'Year'}, axis=1 )
df_health_indicators = df_health_indicators.melt(id_vars = ['Country Name', 'Country Code', 'Indicator Name']).rename({'variable': 'Year'}, axis=1 )

In [None]:
df_population.head()

In [None]:
df_health_indicators.head()

#### Data Population and health indicators

In [None]:
df_population.to_csv('../data/df_population.csv')

In [None]:
df_health_indicators.to_csv('../data/df_health_indicators.csv')

# Cleaning df_Seaice

In [None]:
df_seaice.head()

In [None]:
#Eliminar columnas que no me aportan valor
df_seaice = df_seaice.drop(['    Missing', ' Day', ' Month', ' Source Data'], axis = 1)

In [None]:
# Renombrar la columna '     Extent' a 'Extent'
df_seaice = df_seaice.rename(columns={'     Extent': 'Extent'})


In [None]:
#Agrupar por media y año, y reducir la tabla a 1 registro por año
mean_year = df_seaice.groupby('Year')['Extent'].mean()
df_mean_year_ice = pd.DataFrame({'Year': mean_year.index, 'Media': mean_year.values})
df_mean_year_ice = df_mean_year_ice.rename(columns={'Media': 'Ice Extent'})

In [None]:
df_mean_year_ice.head()


#### Seaice Mean

In [None]:
df_mean_year_ice.to_csv('../data/df_Deshielo_Media')

# Cleaning df_temp

In [None]:
df_temp.head()

In [None]:
# Eliminar los registros por debajo de 1960
df_temp = df_temp[df_temp['dt'].str[:4].astype(int) >= 1960]

In [None]:
df_temp.head()

In [None]:
# Convertir la columna 'dt' a tipo datetime
df_temp['dt'] = pd.to_datetime(df_temp['dt'])

# Extraer el año de la columna 'dt'
df_temp['Year'] = df_temp['dt'].dt.year

# Eliminar la columna 'dt'
df_temp = df_temp.drop('dt', axis=1)


In [None]:
#Reindexar las columnas
df_temp = df_temp.reindex(columns = ['Year','Country', 'AverageTemperature', 'AverageTemperatureUncertainty' ])

df_temp.head()

In [None]:
# Agrupar por year y country, encontrar la media para cada año y cada país y asignar valores.
df_temp = df_temp.groupby(['Year', 'Country']).agg({'AverageTemperature': 'mean', 'AverageTemperatureUncertainty': 'mean'}).reset_index()
df_temp = df_temp.rename(columns={'AverageTemperature': 'Average_Temperature'})
df_temp = df_temp.rename(columns={'AverageTemperatureUncertainty': 'Average_Temperature_Uncertainty'})


In [None]:
df_temp.head()

In [None]:
df_temp.to_csv('../data/df_temp_media')

# Cleaning df_temp_change

In [None]:
df_temp_change.head()

In [None]:
# Eliminar columnas que no me interesan
df_temp_change = df_temp_change.drop(['Unit','Months', 'Domain','Domain Code', 'Area Code (M49)', 'Element Code', 'Months Code', 'Year Code', 'Flag'], axis = 1)

In [None]:
df_temp_change.head()

In [None]:
#Reordenar las columnas
df_temp_change = df_temp_change.reindex(columns = ['Year', 'Area', 'Element', 'Value', 'Flag Description'])

In [None]:
#Renombrar las columnas
df_temp_change = df_temp_change.rename(columns = {'Area' : 'Country', 'Flag Description' : 'Description'})

In [None]:
# Añadir etiqueta de valor a value
df_temp_change["Value"] = df_temp_change["Value"].astype(str) + " ºC"


In [None]:
df_temp_change.head()

In [None]:
df_temp_change.to_csv('../data/df_temp_change.csv')

# Cleaning df_fires

In [None]:
df_fires.head()

In [None]:
df_fires = df_fires.drop(['Description', 'Element',  'Domain', 'Area Code (M49)', 'Element Code','Source', 'Unit', 'Flag', 'Item Code', 'Year Code', 'Source Code', 'Note', 'Domain Code'], axis = 1)

In [None]:
# Añadir etiqueta de valor a value
df_fires["Value"] = df_fires["Value"].astype(str) + " Ha"

In [None]:
df_fires.head()

In [None]:
df_fires = df_fires.rename(columns = {'Area' : 'Country'})
df_fires = df_fires.reindex(columns = ['Year', 'Country', 'Value'])

In [None]:
df_fires.head()

In [None]:
df_fires['Description'].unique()