In [1]:
import pandas as pd
import numpy as np
import python_functions_sp as pfsp
import warnings
warnings.filterwarnings("ignore")
import sqlalchemy
import psycopg2
import sql_functions_sp as sfsp

In [2]:
schema = 'capstone_health_education'

hepa_continents = sfsp.get_dataframe(f'SELECT * FROM {schema}.hepa_filtered_final')

In [3]:
hepa_continents

Unnamed: 0,measure code,country,year,value
0,Recommendations on physical activity,AUT,2015,1
1,Recommendations on physical activity,AUT,2018,1
2,Recommendations on physical activity,AUT,2021,1
3,Recommendations on physical activity,BEL,2015,1
4,Recommendations on physical activity,BEL,2018,1
...,...,...,...,...
1881,Awareness campaign on physical activity,SVN,2018,1
1882,Awareness campaign on physical activity,SVN,2021,1
1883,Awareness campaign on physical activity,SWE,2015,0
1884,Awareness campaign on physical activity,SWE,2018,1


Changing the country-code to the country-name:

In [4]:
hepa_continents.rename(columns={'country': 'country_code'}, inplace=True)
hepa_continents

Unnamed: 0,measure code,country_code,year,value
0,Recommendations on physical activity,AUT,2015,1
1,Recommendations on physical activity,AUT,2018,1
2,Recommendations on physical activity,AUT,2021,1
3,Recommendations on physical activity,BEL,2015,1
4,Recommendations on physical activity,BEL,2018,1
...,...,...,...,...
1881,Awareness campaign on physical activity,SVN,2018,1
1882,Awareness campaign on physical activity,SVN,2021,1
1883,Awareness campaign on physical activity,SWE,2015,0
1884,Awareness campaign on physical activity,SWE,2018,1


Add a column with the country name by using the convert-method in country_converter:

In [5]:
import country_converter as coco

hepa_continents['country_name'] = hepa_continents['country_code'].apply(lambda x: coco.convert(names=x, to='name_short', not_found=None))

Checking if it worked:

In [6]:
unique_country_codes = hepa_continents['country_code'].unique()
unique_country_names = hepa_continents['country_name'].unique()

print("unique country-codes:")
print(unique_country_codes)

print("\nunique country-names:")
print(unique_country_names)


unique country-codes:
['AUT' 'BEL' 'BGR' 'CYP' 'CZE' 'DEU' 'DNK' 'ESP' 'EST' 'FIN' 'FRA' 'GBR'
 'GRC' 'HRV' 'HUN' 'IRL' 'ITA' 'LTU' 'LUX' 'LVA' 'MLT' 'NLD' 'POL' 'PRT'
 'ROU' 'SVK' 'SVN' 'SWE']

unique country-names:
['Austria' 'Belgium' 'Bulgaria' 'Cyprus' 'Czechia' 'Germany' 'Denmark'
 'Spain' 'Estonia' 'Finland' 'France' 'United Kingdom' 'Greece' 'Croatia'
 'Hungary' 'Ireland' 'Italy' 'Lithuania' 'Luxembourg' 'Latvia' 'Malta'
 'Netherlands' 'Poland' 'Portugal' 'Romania' 'Slovakia' 'Slovenia'
 'Sweden']


Delete unneeded column and rename:

In [7]:
hepa_continents.drop('country_code', axis=1, inplace=True)

In [8]:
hepa_continents.rename(columns={'country_name': 'country'}, inplace=True)

In [9]:
hepa_continents.sample(10)

Unnamed: 0,measure code,year,value,country
1664,Community interventions for older adults,2015,1,Estonia
1469,Infrastructures for leisure-time physical acti...,2021,0,Slovakia
483,Sports for All,2015,1,Slovakia
1387,Level of cycling and walking,2021,0,Slovakia
848,Counselling on physical activity,2018,1,Finland
520,Sports Club for Health,2018,1,Finland
698,Special target groups,2021,1,Hungary
614,Access for socially disadvantaged groups,2015,1,Hungary
553,Sports Club for Health,2015,0,Netherlands
1081,Physical activity promotion in schools,2015,0,Germany


## Splitting the countries into the continents
- northern america
- southern america
- europe (4 parts for further analysis)
- africa
- asia
- pacific
- middle-east

Assigning the continent to the countries in a new created column:

In [10]:
hepa_continents['continent'] = hepa_continents['country'].apply(pfsp.assign_continent)

Assigning the four parts of europe to the countries in another new created column:

In [11]:
hepa_continents['continent_region'] = hepa_continents['country'].apply(pfsp.assign_europe_region)

In [12]:
hepa_continents.columns

Index(['measure code', 'year', 'value', 'country', 'continent',
       'continent_region'],
      dtype='object')

Changing the order of the columns:

In [13]:
columns = ['measure code', 'country', 'continent', 'continent_region', 'year', 'value']

hepa_continents = hepa_continents[columns]

Checking the result:

In [14]:
hepa_continents.sample(10)

Unnamed: 0,measure code,country,continent,continent_region,year,value
1878,Awareness campaign on physical activity,Slovakia,Europe,East Europe,2018,1
1340,Level of cycling and walking,Finland,Europe,North Europe,2018,1
1357,Level of cycling and walking,Ireland,Europe,North Europe,2021,1
338,Funding for HEPA promotion,Bulgaria,Europe,East Europe,2021,1
829,Counselling on physical activity,Cyprus,Europe,South Europe,2015,0
1091,Physical activity promotion in schools,Estonia,Europe,North Europe,2018,1
1677,Community interventions for older adults,Croatia,Europe,South Europe,2015,0
235,Levels of physical activity in children and ad...,Portugal,Europe,South Europe,2021,1
536,Sports Club for Health,Ireland,Europe,North Europe,2018,1
275,Coordination mechanism on HEPA promotion,Finland,Europe,North Europe,2015,1


---

Check if some countries couldn't be assigned to a continent:

In [15]:
hepa_continents[hepa_continents['continent'] == 'Other']['country'].unique()

array([], dtype=object)

In [16]:
hepa_continents[hepa_continents['continent_region'] == 'Other']['country'].unique()

array([], dtype=object)

---

Check if the new created columns match:

In [17]:
pfsp.check_values(hepa_continents)

All values match.


---

# Writing the table into the database

In [18]:
# sfsp.push_to_cloud(hepa_continents, 'hepa_continents')

In [20]:
hepa_continents = hepa_continents

%store hepa_continents

Stored 'hepa_continents' (DataFrame)
