## Fill in Missing Countries Greenhouse Gas Data.

Some of the countries are in the dataset, but not named the same as the main dataset, this aims to fix that and fill in missing data.

In [1]:
# Dependencies.
import pandas as pd

In [2]:
# Greenhouse gas cleaned data.
gg_df = pd.read_csv('resources/greenhouse_gasses_clean.csv', index_col='Country Name')
gg_df

Unnamed: 0_level_0,Date,Value
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2010,44910.0
Afghanistan,2011,58650.0
Afghanistan,2012,66750.0
Afghanistan,2013,74800.0
Afghanistan,2014,84620.0
...,...,...
Zimbabwe,2016,29120.0
Zimbabwe,2017,28800.0
Zimbabwe,2018,31380.0
Zimbabwe,2019,


In [3]:
# Get list of countries in GG data.
gg_countries = gg_df.index.unique().tolist()

In [4]:
# Get main list of countries.
df = pd.read_csv('MAIN_country_happ_temp_water.csv')
countries = df['Country'].unique().tolist()

In [5]:
# List the countries "missing" in Greenhouse Gas dataset.
[c for c in countries if c not in gg_countries]

['Egypt',
 'Kyrgyzstan',
 'Macedonia',
 'Russia',
 'Slovakia',
 'South Korea',
 'Taiwan',
 'Venezuela',
 'Yemen']

In [6]:
# Find corrected names in dataset, and rename.
gg_df.rename(index={
    'Egypt, Arab Rep.': 'Egypt',
    'Kyrgyz Republic': 'Kyrgyzstan',
    'North Macedonia': 'Macedonia',
    'Russian Federation': 'Russia',
    'Slovak Republic': 'Slovakia',
    'Korea, Rep.': 'South Korea',
    'Venezuela, RB': 'Venezuela',
    'Yemen, Rep.': 'Yemen'
}, inplace=True)

In [7]:
# Merge the datasets - code from Najeeb.
merged_df = pd.merge(df, gg_df,  how='left', left_on=['Country', 'Year'], right_on = [gg_df.index, 'Date'])
clean_df = merged_df.drop(columns=['Date'])
final_df = clean_df.rename(columns={"Value": "Greenhouse Gas Emissions"})
final_df

Unnamed: 0,Country,Year,Life Ladder,Temperature,Clean Water,Greenhouse Gas Emissions
0,Afghanistan,2010,4.758,14.629,48.28708,44910.0
1,Afghanistan,2011,3.832,16.487,50.82785,58650.0
2,Afghanistan,2012,3.783,14.373,53.40352,66750.0
3,Afghanistan,2013,3.572,16.156,56.01404,74800.0
4,Afghanistan,2014,3.131,15.647,58.65937,84620.0
...,...,...,...,...,...,...
1095,Zimbabwe,2016,3.735,22.488,64.46896,29120.0
1096,Zimbabwe,2017,3.638,22.735,63.99662,28800.0
1097,Zimbabwe,2018,3.616,22.550,63.53877,31380.0
1098,Zimbabwe,2019,2.694,22.338,63.09496,


In [8]:
# Save dataset.
final_df.to_csv('MAIN_happ_temp_water_gg.csv')