## Add Clean Water Data to Reordered Dataset

Using the cleaned dataset from Suleiman's branch, we add the water data to the reordered dataset.

In [1]:
# Dependencies.
import pandas as pd
import numpy as np

In [2]:
# Get water dataset.
water_df = pd.read_csv('resources/Cleaned_Water_Data.csv', index_col='COUNTRY (DISPLAY)')
water_df.drop(columns='Unnamed: 0', inplace=True)
water_df.head()

Unnamed: 0_level_0,YEAR (DISPLAY),REGION (DISPLAY),COUNTRY (CODE),Display Value,Numeric
COUNTRY (DISPLAY),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,2000,Eastern Mediterranean,AFG,21.0,21.39
Afghanistan,2000,Eastern Mediterranean,AFG,28.0,28.17142
Afghanistan,2000,Eastern Mediterranean,AFG,52.0,52.10572
Afghanistan,2001,Eastern Mediterranean,AFG,21.0,21.39
Afghanistan,2001,Eastern Mediterranean,AFG,28.0,28.19937


In [3]:
# Rename countries to match MAIN data.
water_df.rename(index={
    'United Republic of Tanzania': 'Tanzania',
    'United States of America': 'United States',
    'Viet Nam': 'Vietnam',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom',
    'North Macedonia': 'Macedonia',
    'Bolivia (Plurinational State of)': 'Bolivia',
    'Republic of Moldova': 'Moldova',
    'Serbia': 'Kosovo',
    'Russian Federation': 'Russia',
    'Republic of Korea': 'South Korea'
}, inplace=True)

In [4]:
# Get reordered dataset.
df = pd.read_csv('MAIN2_countries_happ_temps_reorder.csv', index_col=['Country', 'Year'])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Life Ladder,Temperature
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,2010,4.758,14.629
Afghanistan,2011,3.832,16.487
Afghanistan,2012,3.783,14.373
Afghanistan,2013,3.572,16.156
Afghanistan,2014,3.131,15.647


In [5]:
# Create list of years and countries to look up in water data.
years = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2021]
countries = pd.read_csv('MAIN_countries_happiness_temps.csv', index_col='Country name').index.tolist()

In [6]:
# Store the water values.
water_values = []

# Loop through each country and year and pull data.
for country in countries:
    for year in years:
        try:
            vals = water_df.loc[(water_df.index == country) & (water_df['YEAR (DISPLAY)'] == year)]['Numeric'].tolist()[:3]
            
            # Find the usable value in the list.
            try:
                val = int(vals[1])
                val = vals[1]
            except:
                try:
                    val = int(vals[2])
                    val = vals[2]
                except:
                    val = int(vals[0])
                    val = vals[0]
            # Save value.
            water_values.append(val)
        except:
            water_values.append(np.nan)
            if year != 2021:
                print(f'COUNTRY ISSUE: {country}, {year}')
                
water_values[:12]

COUNTRY ISSUE: Taiwan, 2010
COUNTRY ISSUE: Taiwan, 2011
COUNTRY ISSUE: Taiwan, 2012
COUNTRY ISSUE: Taiwan, 2013
COUNTRY ISSUE: Taiwan, 2014
COUNTRY ISSUE: Taiwan, 2015
COUNTRY ISSUE: Taiwan, 2016
COUNTRY ISSUE: Taiwan, 2017
COUNTRY ISSUE: Taiwan, 2018
COUNTRY ISSUE: Taiwan, 2019


[48.28708,
 50.82785,
 53.40351999999999,
 56.01404,
 58.65937,
 61.339780000000005,
 64.05488000000001,
 66.8087,
 69.60193000000001,
 72.43422,
 nan,
 91.37948]

In [7]:
# Add water values.
df['Clean Water'] = water_values

In [8]:
# View DF.
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Life Ladder,Temperature,Clean Water
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,2010,4.758,14.629,48.28708
Afghanistan,2011,3.832,16.487,50.82785
Afghanistan,2012,3.783,14.373,53.40352
Afghanistan,2013,3.572,16.156,56.01404
Afghanistan,2014,3.131,15.647,58.65937


In [9]:
# Save DataFrame.
df.to_csv('M_country_happ_temp_water.csv')