## Create the PM2.5 Database Table

In [1]:
# Dependencies.
import pandas as pd

In [2]:
# Greenhouse gas cleaned data.
pm_df = pd.read_csv('../resources/pm2.5_per_cubic_metre.csv')
pm_df.drop(columns=['Unnamed: 0', 'Code'], inplace=True)
pm_df.rename(columns={'pm_2.5_per_cubic_metre': 'PM2.5'}, inplace=True)
pm_df.head()

Unnamed: 0,Country,Year,PM2.5
0,Afghanistan,2010,52.49585
1,Afghanistan,2011,57.09972
2,Afghanistan,2012,55.46611
3,Afghanistan,2013,59.62277
4,Afghanistan,2014,62.72192


In [3]:
# Find corrected names in dataset, and rename.
pm_df.set_index('Country', inplace=True)
pm_df.rename(index={
    'Chinese Taipei': 'Taiwan',
    'Serbia': 'Kosovo',
    'North Macedonia': 'Macedonia',
    'Slovak Republic': 'Slovakia',
    'Korea': 'South Korea',
    'Viet Nam': 'Vietnam'
}, inplace=True)
pm_df.head()

Unnamed: 0_level_0,Year,PM2.5
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2010,52.49585
Afghanistan,2011,57.09972
Afghanistan,2012,55.46611
Afghanistan,2013,59.62277
Afghanistan,2014,62.72192


In [4]:
# Keep only those 100 countries we need.
countries = pd.read_csv('MAIN_table.csv', index_col='Country').index.unique().tolist()
pm_df_1 = pm_df.loc[pm_df.index.isin(countries)]
pm_df_1

Unnamed: 0_level_0,Year,PM2.5
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2010,52.49585
Afghanistan,2011,57.09972
Afghanistan,2012,55.46611
Afghanistan,2013,59.62277
Afghanistan,2014,62.72192
...,...,...
Zimbabwe,2015,25.98430
Zimbabwe,2016,25.39554
Zimbabwe,2017,22.61333
Zimbabwe,2018,22.10827


In [5]:
# Reset index, and add MultiIndex.
pm_df_1.reset_index(inplace=True)
pm_df_1.set_index(['Country', 'Year'], inplace=True)
pm_df_1.to_csv('pm25_table.csv')
pm_df_1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,PM2.5
Country,Year,Unnamed: 2_level_1
Afghanistan,2010,52.49585
Afghanistan,2011,57.09972
Afghanistan,2012,55.46611
Afghanistan,2013,59.62277
Afghanistan,2014,62.72192


In [6]:
# Unstack also.
unstack_df = pm_df_1.unstack()
unstack_df = unstack_df.loc[:, 'PM2.5']
unstack_df.columns.name = None
unstack_df.to_csv('country_index/pm25_table.csv')
unstack_df.head()

Unnamed: 0_level_0,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Afghanistan,52.49585,57.09972,55.46611,59.62277,62.72192,61.41157,57.96634,54.03117,53.43221,52.99812
Albania,21.55726,23.31385,21.45644,19.93915,19.81741,19.42523,17.69079,18.8694,18.65998,18.45981
Argentina,14.05373,14.52667,13.68543,15.06927,14.1147,14.93674,14.45197,13.95186,13.97303,13.84671
Armenia,36.20056,40.79481,41.39294,39.19509,38.75478,38.11764,34.87167,35.55658,34.63884,34.24106
Australia,6.78718,6.71166,7.00126,6.96483,6.87749,6.84756,6.82232,6.67271,6.7411,6.7487
