# This notebook extracts and includes column definitions and units

In [1]:
import pandas as pd

### Extract units for life indicators from Better Life Index (OECD) data

In [87]:
bl_df = pd.read_csv('betterlifeindex_final.csv')
bl_df.head(3)

Unnamed: 0.1,Unnamed: 0,Country Code,Country,Indicator Code,Indicator,Measure Code,Measure,Inequality Code,Inequality,Unit Code,Unit,PowerCode Code,PowerCode,Value,CountryName
0,0,AUS,Australia,JE_LMIS,Labour market insecurity,L,Value,TOT,Total,PC,Percentage,0,Units,5.4,Australia
1,1,AUT,Austria,JE_LMIS,Labour market insecurity,L,Value,TOT,Total,PC,Percentage,0,Units,3.5,Austria
2,2,BEL,Belgium,JE_LMIS,Labour market insecurity,L,Value,TOT,Total,PC,Percentage,0,Units,3.7,Belgium


In [88]:
units_df = bl_df[['Indicator', 'Unit']].drop_duplicates().reset_index()
units_df.drop(columns=['index'], inplace=True)
units_df

Unnamed: 0,Indicator,Unit
0,Labour market insecurity,Percentage
1,Stakeholder engagement for developing regulations,Average score
2,Dwellings without basic facilities,Percentage
3,Housing expenditure,Percentage
4,Feeling safe walking alone at night,Percentage
5,Rooms per person,Ratio
6,Household net adjusted disposable income,US Dollar
7,Household net wealth,US Dollar
8,Employment rate,Percentage
9,Long-term unemployment rate,Percentage


In [89]:
# add definition to indicators
life_ind_dict = {'Labour market insecurity':'The % of earnings loss of previous earnings.',
                 'Stakeholder engagement for developing regulations':'An average composite score that measures the quality of stakeholder engagements in law adoptions.',
                 'Dwellings without basic facilities':'The % of people living in dwelling without indoor flushing toilet for the sole use of their households.',
                 'Housing expenditure':'The % of gross adjusted household income spent on house maintenance and furnishings.',
                 'Feeling safe walking alone at night':'The % of people who feel safe walking alone at night in the area they live in.',
                 'Rooms per person':'The number of bedrooms divided by the number of persons living in the dwelling.',
                 'Household net adjusted disposable income':'The maximum amount of income that can be spent without reducing assets or increase its liabilities.',
                 'Household net wealth':'The total wealth including financial/non-financial assets and net of liabilities.',
                 'Employment rate':'The % of employed persons aged 15 and 64 over the population of the same age.',
                 'Long-term unemployment rate':'The % of people who have been unemployed for at least one year in the labor force.',
                 'Personal earnings':'The average annual wages per full-time employee in the economy.',
                 'Quality of support network':'The % of people surveyed who believe they have the support of relatives/friends when in trouble.',
                 'Educational attainment':'The % of adults aged 25 to 64 holdering at least an upper secondary degree.',
                 'Student skills':'The average score of students in reading, math, and science assessed by the OECD PISA program.',
                 'Years in education':'The average number of years of education between 5 - 39 years old.',
                 'Air pollution':'The population weighted average of annual concentrations of particulate matters less than PM2.5 in the air.',
                 'Water quality':'The % of people surveyed who are satisfied with the water quality they receive in the household.',
                 'Voter turnout':'The % of people who hold valid votes during election.',
                 'Life expectancy':'The average number of years people could expect to live.',
                 'Self-reported health':'The % of people at least 15 years old who responded to have good health.',
                 'Life satisfaction':"The average score of the people's evaluation of life as a whole on a scale of 0-10.",
                 'Homicide rate':'The rate of deaths due to assault per 100,000 population.',
                 'Employees working very long hours':'The % of employees whose usual work hours per week is >50 hours.',
                 'Time devoted to leisure and personal care':'The number of hours spent per day on leisure and personal care.',
                 'death_rate_perc':'The rate of all deaths per 100,000 population.',
                 'Suicide rate':'The rate of suicide per 100,000 population.',
                 'SuicideRate':'The rate of suicide per 100,000 population.'
                 }

life_ind_dict['Labour market insecurity']

'The % of earnings loss of previous earnings.'

In [90]:
# add 'death_rate_perc' and 'Suicide rate' to units table
death_rate_dict = {'Indicator':'death_rate_perc',
                   'Unit':'Ratio'}

suicide_rate_dict = {'Indicator':'Suicide rate',
                     'Unit':'Ratio'}

suicideRate_dict = {'Indicator':'SuicideRate',
                     'Unit':'Ratio'}

units_df = units_df.append(death_rate_dict, ignore_index=True)
units_df = units_df.append(suicide_rate_dict, ignore_index=True)
units_df = units_df.append(suicideRate_dict, ignore_index=True)
units_df

Unnamed: 0,Indicator,Unit
0,Labour market insecurity,Percentage
1,Stakeholder engagement for developing regulations,Average score
2,Dwellings without basic facilities,Percentage
3,Housing expenditure,Percentage
4,Feeling safe walking alone at night,Percentage
5,Rooms per person,Ratio
6,Household net adjusted disposable income,US Dollar
7,Household net wealth,US Dollar
8,Employment rate,Percentage
9,Long-term unemployment rate,Percentage


### Extract units from the World Happiness data

In [91]:
wh_df = pd.read_csv('HappinessDataPanelWHR2021C2.csv')
wh_df.head(3)

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.37,0.451,50.8,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.54,0.552,51.2,0.679,0.19,0.85,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.6,0.6,0.121,0.707,0.618,0.275


In [92]:
wh_df.columns

Index(['Country name', 'year', 'Life Ladder', 'Log GDP per capita',
       'Social support', 'Healthy life expectancy at birth',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption', 'Positive affect', 'Negative affect'],
      dtype='object')

In [93]:
# add world happiness indicators to units df

wh1_dict = {'Indicator':'Life Ladder', 'Unit':'Unit Index'}
wh2_dict = {'Indicator':'Log GDP per capita', 'Unit':'Log Unit'}
wh3_dict = {'Indicator':'Social support', 'Unit':'Percentage'}
wh4_dict = {'Indicator':'Healthy life expectancy at birth', 'Unit':'Years'}
wh5_dict = {'Indicator':'Freedom to make life choices', 'Unit':'Percentage'}
wh6_dict = {'Indicator':'Generosity', 'Unit':'Unit Index'}
wh7_dict = {'Indicator':'Perceptions of corruption', 'Unit':'Unit Index'}
wh8_dict = {'Indicator':'Positive affect', 'Unit':'Frequency'}
wh9_dict = {'Indicator':'Negative affect', 'Unit':'Frequency'}
wh10_dict = {'Indicator':'Population', 'Unit':'Integer Unit'}

wh_dict_list = [wh1_dict,wh2_dict,wh3_dict,wh4_dict,wh5_dict,wh6_dict,wh7_dict,wh8_dict,wh9_dict,wh10_dict]

for wh_dict in wh_dict_list:
    units_df = units_df.append(wh_dict, ignore_index=True)

In [94]:
# add definitions for world happiness indicators
life_ind_dict['Life Ladder'] = 'This is the same as happiness index.'
life_ind_dict['Log GDP per capita'] = 'The log of GDP per capita.'
life_ind_dict['Social support'] = 'The % of people who can get support from relatives/friends.'
life_ind_dict['Healthy life expectancy at birth'] = 'The average number of years people could expect to live.'
life_ind_dict['Freedom to make life choices'] = 'The % of people who responded they are free to make their own life choices.'
life_ind_dict['Generosity'] = 'The residual of regressing the national average of people who have donated money to a charity in the past month.'
life_ind_dict['Perceptions of corruption'] = 'The average of binary responses to the question of whether there is corruption in the government.'
life_ind_dict['Positive affect'] = 'The average frequency of happiness, laughter, and enjoyment on the previous day.'
life_ind_dict['Negative affect'] = 'The average frequency of worry, sadness, and anger on the previous day.'
life_ind_dict['Population'] = 'The estimated number of people in a nation.'

life_ind_dict

{'Labour market insecurity': 'The % of earnings loss of previous earnings.',
 'Stakeholder engagement for developing regulations': 'An average composite score that measures the quality of stakeholder engagements in law adoptions.',
 'Dwellings without basic facilities': 'The % of people living in dwelling without indoor flushing toilet for the sole use of their households.',
 'Housing expenditure': 'The % of gross adjusted household income spent on house maintenance and furnishings.',
 'Feeling safe walking alone at night': 'The % of people who feel safe walking alone at night in the area they live in.',
 'Rooms per person': 'The number of bedrooms divided by the number of persons living in the dwelling.',
 'Household net adjusted disposable income': 'The maximum amount of income that can be spent without reducing assets or increase its liabilities.',
 'Household net wealth': 'The total wealth including financial/non-financial assets and net of liabilities.',
 'Employment rate': 'The %

### Add the definitions to the units dataframe

In [95]:
# add definitions column in units df
units_df['Definition'] = [life_ind_dict[x] for x in units_df['Indicator']]

In [96]:
units_df

Unnamed: 0,Indicator,Unit,Definition
0,Labour market insecurity,Percentage,The % of earnings loss of previous earnings.
1,Stakeholder engagement for developing regulations,Average score,An average composite score that measures the q...
2,Dwellings without basic facilities,Percentage,The % of people living in dwelling without ind...
3,Housing expenditure,Percentage,The % of gross adjusted household income spent...
4,Feeling safe walking alone at night,Percentage,The % of people who feel safe walking alone at...
5,Rooms per person,Ratio,The number of bedrooms divided by the number o...
6,Household net adjusted disposable income,US Dollar,The maximum amount of income that can be spent...
7,Household net wealth,US Dollar,The total wealth including financial/non-finan...
8,Employment rate,Percentage,The % of employed persons aged 15 and 64 over ...
9,Long-term unemployment rate,Percentage,The % of people who have been unemployed for a...


In [97]:
# export units dataframe
units_df.to_csv('cleaned_data/indicator_units.csv', index=False)