In [1]:
import pandas as pd
import numpy as np
import python_functions_sp as pfsp
import warnings
warnings.filterwarnings("ignore")
import sqlalchemy
import psycopg2
import sql_functions_sp as sfsp

In [2]:
# schema = 'capstone_health_education'

# PISA_m = sfsp.get_dataframe(f'SELECT * FROM {schema}.pisa_m')
# PISA_r = sfsp.get_dataframe(f'SELECT * FROM {schema}.pisa_r')
# PISA_sc = sfsp.get_dataframe(f'SELECT * FROM {schema}.pisa_sc')

In [3]:
%store -r pisa_m
%store -r pisa_r
%store -r pisa_sc

## Splitting the countries into the continents
- northern america
- southern america
- europe (4 parts for further analysis)
- africa
- asia
- pacific
- middle-east

Creating a copy with a meaningful name:

In [6]:
PISA_m_continents = pisa_m.copy()
PISA_r_continents = pisa_r.copy()
PISA_sc_continents = pisa_sc.copy()

In [7]:
PISA_m_continents.rename(columns={'jurisdiction': 'country'}, inplace=True)
PISA_r_continents.rename(columns={'jurisdiction': 'country'}, inplace=True)
PISA_sc_continents.rename(columns={'jurisdiction': 'country'}, inplace=True)

Check if it worked:

In [8]:
PISA_m_continents.head()

Unnamed: 0,year,country,average,standard_error
2,2022,Australia,487.08,1.78
3,2022,Austria,487.27,2.34
4,2022,Belgium,489.49,2.2
5,2022,Canada,496.95,1.56
6,2022,Chile,411.7,2.08


In [9]:
PISA_r_continents.head()

Unnamed: 0,year,country,average,standard_error
2,2022,Australia,498.05,2.01
3,2022,Austria,480.41,2.67
4,2022,Belgium,478.85,2.52
5,2022,Canada,507.13,1.97
6,2022,Chile,447.98,2.63


In [10]:
PISA_sc_continents.head()

Unnamed: 0,year,country,average,standard_error
2,2022,Australia,507.0,1.93
3,2022,Austria,491.27,2.65
4,2022,Belgium,490.58,2.48
5,2022,Canada,515.02,1.93
6,2022,Chile,443.54,2.47


---

Assigning the continent to the countries in a new created column:

In [11]:
PISA_m_continents['continent'] = PISA_m_continents['country'].apply(pfsp.assign_continent)
PISA_r_continents['continent'] = PISA_r_continents['country'].apply(pfsp.assign_continent)
PISA_sc_continents['continent'] = PISA_sc_continents['country'].apply(pfsp.assign_continent)

Assigning the four parts of europe to the countries in another new created column:

In [12]:
PISA_m_continents['continent_region'] = PISA_m_continents['country'].apply(pfsp.assign_europe_region)
PISA_r_continents['continent_region'] = PISA_r_continents['country'].apply(pfsp.assign_europe_region)
PISA_sc_continents['continent_region'] = PISA_sc_continents['country'].apply(pfsp.assign_europe_region)

Changing the order of the columns:

In [13]:
columns = ['country', 'continent', 'continent_region', 'year', 'average', 'standard_error']

PISA_m_continents = PISA_m_continents[columns]
PISA_r_continents = PISA_r_continents[columns]
PISA_sc_continents = PISA_sc_continents[columns]

Checking the result:

In [14]:
PISA_m_continents.sample(10)

Unnamed: 0,country,continent,continent_region,year,average,standard_error
47,Bosnia and Herzegovina,Europe,South Europe,2022,,
461,Macao (China),Pacific,Pacific,2009,525.28,0.92
185,Serbia,Europe,South Europe,2018,448.28,3.16
528,United Kingdom,Europe,North Europe,2006,495.44,2.14
209,France,Europe,West Europe,2015,492.92,2.1
366,Malta,Europe,South Europe,2012,,
37,Türkiye,Asia,Asia,2022,453.15,1.59
774,Singapore,Pacific,Pacific,2000,,
148,B-S-J-G (China),Pacific,Pacific,2018,,
35,Sweden,Europe,North Europe,2022,481.77,2.06


In [15]:
PISA_r_continents.sample(10)

Unnamed: 0,country,continent,continent_region,year,average,standard_error
201,Canada,North America,North America,2015,526.67,2.3
29,Norway,Europe,North Europe,2022,476.52,2.54
20,Italy,Europe,South Europe,2022,481.6,2.68
212,Hungary,Europe,East Europe,2015,469.52,2.66
268,Malta,Europe,South Europe,2015,446.67,1.78
99,International Average (OECD),Other,Other,2018,485.49,0.41
475,Qatar,Middle East,Middle East,2009,371.72,0.76
72,Malta,Europe,South Europe,2022,445.3,1.9
214,Ireland,Europe,North Europe,2015,520.81,2.47
695,Czech Republic,Europe,East Europe,2000,491.58,2.37


In [16]:
PISA_sc_continents.sample(10)

Unnamed: 0,country,continent,continent_region,year,average,standard_error
653,Kazakhstan,Asia,Asia,2003,,
203,Colombia,South America,South America,2015,415.73,2.36
16,Hungary,Europe,East Europe,2022,485.89,2.71
687,International Average (OECD),Other,Other,2000,,
97,Viet Nam,Pacific,Pacific,2022,472.38,3.59
217,Japan,Pacific,Pacific,2015,538.39,2.97
269,Moldova,Europe,East Europe,2015,428.0,1.97
309,Greece,Europe,South Europe,2012,466.72,3.12
666,Palestinian Authority,Asia,Asia,2003,,
168,Malaysia,Pacific,Pacific,2018,437.62,2.71


Check if some countries couldn't be assigned to a continent:

In [17]:
PISA_m_continents[PISA_m_continents['continent'] == 'Other']['country'].unique()

array(['Selected countries and jurisdictions',
       'International Average (OECD)'], dtype=object)

In [18]:
PISA_m_continents[PISA_m_continents['continent_region'] == 'Other']['country'].unique()

array(['Selected countries and jurisdictions',
       'International Average (OECD)'], dtype=object)

In [19]:
PISA_r_continents[PISA_r_continents['continent'] == 'Other']['country'].unique()

array(['Selected countries and jurisdictions',
       'International Average (OECD)'], dtype=object)

In [20]:
PISA_r_continents[PISA_r_continents['continent_region'] == 'Other']['country'].unique()

array(['Selected countries and jurisdictions',
       'International Average (OECD)'], dtype=object)

In [21]:
PISA_sc_continents[PISA_sc_continents['continent'] == 'Other']['country'].unique()

array(['Selected countries and jurisdictions',
       'International Average (OECD)'], dtype=object)

In [22]:
PISA_sc_continents[PISA_sc_continents['continent_region'] == 'Other']['country'].unique()

array(['Selected countries and jurisdictions',
       'International Average (OECD)'], dtype=object)

---

Check if the new created columns match:

In [23]:
pfsp.check_values(PISA_m_continents)
pfsp.check_values(PISA_r_continents)
pfsp.check_values(PISA_sc_continents)

All values match.
All values match.
All values match.


---

# Writing the table into the database

In [24]:
# sfsp.push_to_cloud(PISA_m_continents, 'pisa_m_continents')
# sfsp.push_to_cloud(PISA_r_continents, 'pisa_r_continents')
# sfsp.push_to_cloud(PISA_sc_continents, 'pisa_sc_continents')

In [28]:
pisa_m_continents = PISA_m_continents
pisa_r_continents = PISA_r_continents
pisa_sc_continents = PISA_sc_continents

%store pisa_m_continents
%store pisa_r_continents
%store pisa_sc_continents

Stored 'pisa_m_continents' (DataFrame)
Stored 'pisa_r_continents' (DataFrame)
Stored 'pisa_sc_continents' (DataFrame)


---
---

In [25]:
PISA_m_continents.sample(10)

Unnamed: 0,country,continent,continent_region,year,average,standard_error
404,Finland,Europe,North Europe,2009,540.5,2.17
484,Ukraine,Europe,East Europe,2009,,
779,Ukraine (18 of 27 Regions),Europe,East Europe,2000,,
773,Serbia,Europe,South Europe,2000,,
31,Portugal,Europe,South Europe,2022,471.91,2.35
440,Brazil,South America,South America,2009,385.81,2.39
729,Argentina,South America,South America,2000,,
631,Argentina,South America,South America,2003,,
561,Malaysia (2015),Pacific,Pacific,2006,,
654,Kazakhstan (2015),Asia,Asia,2003,,


In [26]:
PISA_r_continents.sample(10)

Unnamed: 0,country,continent,continent_region,year,average,standard_error
604,Hungary,Europe,East Europe,2003,481.87,2.47
619,Portugal,Europe,South Europe,2003,477.57,3.73
23,Latvia,Europe,North Europe,2022,474.57,2.46
564,Mongolia,Asia,Asia,2006,,
694,Costa Rica,South America,South America,2000,,
500,Denmark,Europe,North Europe,2006,494.48,3.18
168,Malaysia,Pacific,Pacific,2018,414.98,2.87
774,Singapore,Pacific,Pacific,2000,,
165,Kosovo,Europe,South Europe,2018,353.07,1.14
260,Jordan,Asia,Asia,2015,408.1,2.93


In [27]:
PISA_sc_continents.sample(10)

Unnamed: 0,country,continent,continent_region,year,average,standard_error
77,North Macedonia,Europe,South Europe,2022,379.88,0.93
769,Qatar,Middle East,Middle East,2000,,
756,Malaysia,Pacific,Pacific,2000,,
595,Colombia,South America,South America,2003,,
316,Korea,Pacific,Pacific,2012,537.79,3.66
328,Spain,Europe,South Europe,2012,496.45,1.83
306,Finland,Europe,North Europe,2012,545.44,2.2
333,United States,North America,North America,2012,497.41,3.78
144,Belarus,Europe,East Europe,2018,471.26,2.45
586,Uzbekistan,Asia,Asia,2006,,
