# For cleaning the data to export into one .csv for use in R Shiny app

In [4]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import statsmodels.api as sm
import statsmodels.formula.api as smf 

In [5]:
happiness = pd.read_csv('../data/hapiscore_whr.csv') # happiness score
fish = pd.read_csv('../data/fisfod_cons_pc.csv') # fish consumption per capita
sugar = pd.read_csv('../data/sugar_per_person_g_per_day.csv') # g per day
continents = pd.read_csv('../data/continents.csv') 
disorders = pd.read_csv('../data/depressive_anxiety_IHME.csv') # depressive and anxiety disorders

In [6]:
# Dict for keeping names consistent across sets
name_changes = (
    {'Hong Kong SAR, China':'Hong Kong',
     'Macao SAR, China':'Macao',
     'Korea, Rep.':'South Korea',
     'Korea, Dem. Rep.':'North Korea',
     'Czechia':'Czech Republic',
     'Slovak Republic':'Slovakia',
     'Kyrgyz Republic':'Kyrgyzstan',
     'Moldova, Republic of':'Moldova',
     'Tanzania, United Republic of':'Tanzania',
     'United Republic of Tanzania':'Tanzania',
     'UK':'United Kingdom',
     'USA':'United States',
     'United States of America':'United States',
     'Russian Federation':'Russia',
     'Congo, Dem. Rep.':'Democratic Republic of the Congo',
     'Congo, Rep.':'Congo',
     'Lao PDR':'Laos',
     "Lao People's Democratic Republic":'Laos',
     'Bahamas, The':'Bahamas',
     'Brunei Darussalam':'Brunei',
     'Congo (Democratic Republic of the)': 'Democratic Republic of the Congo',
     "Cote d'Ivoire": "Ivory Coast",
     'Gambia, The': 'Gambia',
     'The Gambia': 'Gambia',
     'Iran, Islamic Rep.': 'Iran',
     'Iran (Islamic Republic of)':'Iran',
     "Korea, Dem. People's Rep.": 'North Korea',
     "Democratic People's Republic of Korea": 'North Korea',
     'Micronesia, Fed. Sts.': 'Micronesia',
     'Turkey':'Turkiye',
     'Turks and Caicos Islands':'Turks and Caicos',
     'Syrian Arab Republic':'Syria',
     'Venezuela, RB':'Venezuela',
     'Venezuela (Bolivarian Republic of)':'Venezuela',
     'Viet Nam':'Vietnam',
     'Yemen, Rep.':'Yemen',
     'Curacao':'Cura√ßao',
     'Burma (Myanmar)': 'Myanmar',
     'Dem. Rep. Congo': 'Democratic Republic of the Congo',
     'Korea, North': 'North Korea',
     'Korea, South': 'South Korea',
     "Republic of Korea": 'South Korea',
     'Sint Maarten (Dutch part)': 'Sint Maarten',
     'St. Martin (French part)': 'Sint Maarten',
     'Swaziland': 'Eswatini',
     'Bahamas, The': 'Bahamas',
     'The Bahamas': 'Bahamas',
     'Egypt, Arab Rep.': 'Egypt',
     "Bolivia (Plurinational State of)": 'Bolivia',
     "Republic of Moldova": 'Moldova',
     "American Samoa": 'Samoa',
     
     }
)

In [7]:
# melt happiness from wide to long
happiness_long = happiness.melt(id_vars=['country'], var_name='year', value_name='happiness_score')
happiness_long['year'] = pd.to_numeric(happiness_long['year'])
happiness_long['country'] = happiness_long['country'].replace(name_changes)

fish_long = fish.melt(id_vars=['country'], var_name='year', value_name='pcp_fish_consumption')
fish_long['year'] = pd.to_numeric(fish_long['year'])
fish_long['country'] = fish_long['country'].replace(name_changes)

sugar_long = sugar.melt(id_vars=['country'], var_name='year', value_name='sugar_per_cap')
sugar_long['year'] = pd.to_numeric(sugar_long['year'])
sugar_long['country'] = sugar_long['country'].replace(name_changes)

disorders_long = disorders[['location', 'cause', 'year', 'val']].rename(columns={'location':'country', 'cause':'disorder', 'val':'pct_new_per_pop'}).replace(name_changes)
disorders_long['year'] = pd.to_numeric(disorders_long['year'])
disorders_long['country'] = disorders_long['country'].replace(name_changes)


In [8]:
# merge dfs on country and year
happy_fish = pd.merge(happiness_long, fish_long, on=['country', 'year'], how='inner').dropna()
happy_sugar_fish = pd.merge(happy_fish, sugar_long, on=['country', 'year'], how='inner').dropna()

