In [None]:
import pandas as pd
import sqlalchemy
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sql_functions_sp as sfsp
import python_functions_sp as pf

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

### Import data from Postgres ###

In [None]:
schema = 'capstone_health_education'
table = 'HFA_data_p1'

sql_query = f'SELECT * FROM {schema}."{table}";'
HFA_data_p1 = sf.get_dataframe(sql_query)

In [None]:
schema = 'capstone_health_education'
table = 'HFA_data_p2'

sql_query2 = f'SELECT * FROM {schema}."{table}";'
HFA_data_p2 = sf.get_dataframe(sql_query2)

In [None]:
schema = 'capstone_health_education'
table = 'HFA_data_p3'

sql_query3 = f'SELECT * FROM {schema}."{table}";'
HFA_data_p3 = sf.get_dataframe(sql_query3)

In [None]:
dataframes = [HFA_data_p1, HFA_data_p2, HFA_data_p3]

pf.columns_lower_snake_case_2(dataframes)

In [None]:
HFA_data_p1.columns

In [None]:
HFA_data_p2.columns

In [None]:
HFA_data_p3.shape

In [None]:
frames = [HFA_data_p1, HFA_data_p2, HFA_data_p3]
HFA_df = pd.concat(frames)

In [None]:
HFA_df.shape

In [None]:
HFA_df.head()

In [None]:
HFA_df['place_residence'].info()

#### Reducing the dataset #####

In [None]:
columns_to_keep = ['measure_code', 'sex', 'country_region', '2000', '2001','2002',	'2003',	'2004',	'2005',	'2006',	'2007',	'2008',	'2009',	'2010', '2011', '2012',	'2013',	'2014',	'2015',	'2016',	'2017',	'2018',	'2019',	'2020',	'2021',	'2022']

hfa_df = HFA_df[columns_to_keep]

In [None]:
hfa_df

In [None]:
list_of_objects = [ 'HFA_1',
'HFA_2',
'HFA_3',
'HFA_10',
'HFA_11',
'HFA_12',
'HFA_13',
'HFA_14',
'HFA_15',
'HFA_26',
'HFA_27',
'HFA_28',
'HFA_29',
'HFA_30',
'HFA_32',
'HFA_33',
'HFA_35',
'HFA_36',
'HFA_37',
'HFA_38',
'HFA_39',
'HFA_40',
'HFA_41',
'HFA_43',
'HFA_44',
'HFA_45',
'HFA_357',
'HFA_391',
'HFA_617',
'HFA_618',
'HFA_636',
'HFA_634',
'HFA_635' ]

hfa_eduHealth[list_of_objects] = hfa_eduHealth[list_of_objects].apply(pd.to_numeric, errors='coerce')

#### Reducing measure codes ####

In [None]:
measure_codes_to_keep = ['HFA_1',
'HFA_2',
'HFA_3',
'HFA_10',
'HFA_11',
'HFA_12',
'HFA_13',
'HFA_14',
'HFA_15',
'HFA_26',
'HFA_27',
'HFA_28',
'HFA_29',
'HFA_30',
'HFA_32',
'HFA_33',
'HFA_35',
'HFA_36',
'HFA_37',
'HFA_38',
'HFA_39',
'HFA_40',
'HFA_41',
'HFA_43',
'HFA_44',
'HFA_45',
'HFA_357',
'HFA_391',
'HFA_617',
'HFA_618',
'HFA_636',
'HFA_634',
'HFA_635'
]


hfa_filtered = hfa_df[hfa_df['measure_code'].isin(measure_codes_to_keep)]

In [None]:
hfa_filtered.shape

In [None]:
hfa_filtered.sample(15)

In [None]:
hfa_filtered.info()

### Naming the measure codes ###

In [None]:
schema = 'capstone_health_education'
table = 'HFA_metadata_sh4'

# sql_query = f'SELECT * FROM {schema}."{table}";'
sql_query = f'SELECT "Measure labels", "Unnamed: 1" FROM {schema}."{table}" WHERE "Measure labels" LIKE \'HFA%%\';'
measure_names = sf.get_dataframe(sql_query)

In [None]:
measure_names

In [None]:
measure_names.rename(columns={'Measure labels': 'measure_code', 'Unnamed: 1': 'measure_label'}, inplace=True)

In [None]:
hfa_filtered = pd.merge(hfa_filtered, measure_names, on='measure_code', how='left')

In [None]:
hfa_filtered

#### Giving full names to countries and regions ####

In [None]:
schema = 'capstone_health_education'
table = 'HFA_metadata_sh5'

sql_query = f'SELECT * FROM {schema}."{table}";'
country_names = sf.get_dataframe(sql_query)

In [None]:
country_names.drop(['ISO 2', 'ISO 3', 'WHO code', 'Short name'], axis=1, inplace=True)

In [None]:
country_names.rename(columns={'Code': 'country_region', 'Full name': 'name'}, inplace=True)

In [None]:
schema = 'capstone_health_education'
table = 'HFA_metadata_sh6'

sql_query = f'SELECT * FROM {schema}."{table}";'
region_names = sf.get_dataframe(sql_query)

In [None]:
region_names.drop(['Full name'], axis=1, inplace=True)

In [None]:
region_names.rename(columns={'Code': 'country_region', 'Short name': 'name'}, inplace=True)

In [None]:
cat = pd.concat([country_names, region_names], ignore_index=True)

In [None]:
hfa_filtered = pd.merge(hfa_filtered, cat, on='country_region', how='left').copy()

In [None]:
hfa_filtered

### Melt and pivot the data frame ###

In [None]:
hfa_melted = pd.melt(hfa_filtered,
                      id_vars=['sex', 'country_region', 'name', 'measure_code', 'measure_label'],
                      value_vars=['2000','2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022'],
                      var_name='years',
                      value_name='value'
                      )

In [None]:
hfa_melted

In [None]:
hfa_melted_pivoted = pd.pivot(hfa_melted,
                       columns='measure_code',
                       index=['name', 'years', 'sex'],
                       values='value'
                       )

In [None]:
hfa_eduHealth = hfa_melted_pivoted

In [None]:
hfa_eduHealth

In [None]:
hfa_eduHealth.reset_index(inplace=True)

In [None]:
hfa_eduHealth

In [None]:
hfa_eduHealth.columns.name=None

In [None]:
hfa_eduHealth

In [None]:
hfa_eduHealth['sex']

In [None]:
hfa_eduHealth.info()

#### categorizing continents and regions ####

In [None]:
hfa_eduHealth.rename(columns={'name': 'country'}, inplace=True)
hfa_eduHealth

In [None]:
import pandas as pd
import numpy as np
import python_functions_sp as pfsp
import warnings
warnings.filterwarnings("ignore")
import sqlalchemy
import psycopg2
import sql_functions_sp as sfsp

In [None]:
hfa_eduHealth['continent'] = hfa_eduHealth['country'].apply(pfsp.assign_continent)

In [None]:
hfa_eduHealth['continent_region'] = hfa_eduHealth['country'].apply(pfsp.assign_europe_region)

In [None]:
hfa_eduHealth[hfa_eduHealth['continent'] == 'Other']['country'].unique()

In [None]:
hfa_eduHealth[hfa_eduHealth['continent_region'] == 'Other']['country'].unique()

In [None]:
pfsp.check_values(hfa_eduHealth)

In [None]:
sfsp.push_to_cloud(hfa_eduHealth, 'hfa_eduHealth')

In [None]:
hfa_eduHealth['HFA_36']

In [None]:
schema = 'capstone_health_education'
table = 'hfa_eduHealth'

sql_query = f'SELECT * FROM {schema}."{table}";'
hfa_eduHealth= sfsp.get_dataframe(sql_query)

In [None]:
hfa_eduHealth.info()

In [None]:
hfa_eduHealth.rename(columns={'years': 'year'}, inplace=True)

In [None]:
#hfa_eduHealth['year'] = hfa_eduHealth['year'].apply(pd.to_numeric, errors='coerce')

In [None]:
hfa_eduHealth.info()

In [None]:
sfsp.push_to_cloud(hfa_eduHealth, 'hfa_eduHealth')

In [None]:
# schema = 'capstone_health_education'
# table = 'hfa_eduHealth'

# sql_query = f'SELECT * FROM {schema}."{table}";'
# hfa_eduHealth= sfsp.get_dataframe(sql_query)

In [None]:
Gini = hfa_eduHealth['HFA_617']

In [None]:
Gini_t = Gini[Gini['country']=='Turkmenistan']