In [5]:
import pandas as pd
import re

df = pd.read_csv("../data/data.csv")

## Useful resource: A 10,000-mile Happiness Walk Identifies Wellbeing Domains of the USA
https://gnhusa.org/wp-content/uploads/2022/03/Happiness-Walk-Report.pdf

# Q6: "what makes you happy?"

In [6]:
def evalFreeResponse(freeResponseAnswer, arrayOfWords):
    """
    if the free response answer contains any words in the arrayOfWords param, return 1
    """
    for word in arrayOfWords:
        search = r'(?:^|\W){}(?:$|\W)'.format(word)
        if re.search(search, freeResponseAnswer.lower()):
            return 1
    return 0

## family and religion
### Two factors which raise happiness

In [7]:
# create array of words for each happiness 'domain' to group free response questions by
family = ['family', 'husband', 'wife', 'daughter', 'son', 'granddaughter', 'grandson', 'grandchildren', 'my children']
religion = ['god', 'lord', 'church', 'prayer', 'bible', 'jesus', 'faith', 'christian', 'temple', 'religion']

df['family'] = df['Q6'].apply(evalFreeResponse, args=(family,))
df['religion'] = df['Q6'].apply(evalFreeResponse, args=(religion,))

In [8]:
print(len(df[df['family']==1]) / len(df) * 100) # 45% of respondents mentioned family
print(len(df[df['religion']==1]) / len(df) * 100) # 4% mentioned religion

45.06
4.54


In [9]:
# older age groups are more likely to mention family, religion
df.groupby('Age_groups').mean()[['family', 'religion']] * 100

Unnamed: 0_level_0,family,religion
Age_groups,Unnamed: 1_level_1,Unnamed: 2_level_1
45+,48.441511,6.343968
<45,41.00308,2.375715


In [10]:
# older age groups are more likely to mention family, religion
df.groupby('MRK_Age').mean()[['family', 'religion']] * 100

Unnamed: 0_level_0,family,religion
MRK_Age,Unnamed: 1_level_1,Unnamed: 2_level_1
18-24,31.967213,3.442623
25-34,38.760632,0.972053
35-44,49.761905,2.97619
45-54,48.327566,5.074971
55-64,45.764576,4.510451
65+,51.104101,9.253417


In [11]:
# those who mention family in Q6 are happier than those who didn't
df.groupby('family').mean()[['happiness', 'anxiety', 'lifeSat']]

Unnamed: 0_level_0,happiness,anxiety,lifeSat
family,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6.23735,4.443757,6.126684
1,7.229916,3.964936,7.05992


In [12]:
# those who mention religion in Q6 are happier and less anxious than those who didn't
df.groupby('religion').mean()[['happiness', 'anxiety', 'lifeSat']]

Unnamed: 0_level_0,happiness,anxiety,lifeSat
religion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6.618898,4.276556,6.488372
1,8.066079,3.207048,7.784141


## financials
### A factor which lowers happiness

In [13]:
# create array of words for each happiness 'domain' to group free response questions by
money = ['money', 'financially', 'finances', 'debt', 'bills', 'economy']

df['money'] = df['Q6'].apply(evalFreeResponse, args=(money,))

In [14]:
print(len(df[df['money']==1]) / len(df) * 100) # 6% of respondents mentioned money

6.18


In [15]:
# younger age group more likely to mention money
df.groupby('Age_groups').mean()[['money']] * 100

Unnamed: 0_level_0,money
Age_groups,Unnamed: 1_level_1
45+,4.620462
<45,8.051034


In [16]:
# younger age group more likely to mention money
df.groupby('MRK_Age').mean()[['money']] * 100

Unnamed: 0_level_0,money
MRK_Age,Unnamed: 1_level_1
18-24,7.868852
25-34,7.654921
35-44,8.571429
45-54,6.459054
55-64,4.40044
65+,3.154574


In [17]:
# those who mention money in Q6 are less happy and more anxious than those who didn't
df.groupby('money').mean()[['happiness', 'anxiety', 'lifeSat']]

Unnamed: 0_level_0,happiness,anxiety,lifeSat
money,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6.765508,4.169473,6.61991
1,5.456311,5.116505,5.443366


## Health
### Not great evidence for having much of an impact on happiness. Possibly because 'health' and 'healthy' can come in negatives i.e. 'not being as healthy as I once was' or something along those lines.

In [18]:
# create array of words for each happiness 'domain' to group free response questions by
health = ['health', 'healthy', 'exercise', 'exercising', 'working out', 'gym', 'being active', 'hiking', 'biking']

df['health'] = df['Q6'].apply(evalFreeResponse, args=(health,))

In [19]:
print(len(df[df['health']==1]) / len(df) * 100) # 6% of respondents mentioned health

6.32


In [20]:
# older age group more likely to mention health / exercise
df.groupby('Age_groups').mean()[['health']] * 100

Unnamed: 0_level_0,health
Age_groups,Unnamed: 1_level_1
45+,7.73744
<45,4.619446


In [21]:
# those who mention money in Q6 are statistically indistinguishable those who didn't
df.groupby('health').mean()[['happiness', 'anxiety', 'lifeSat']]

Unnamed: 0_level_0,happiness,anxiety,lifeSat
health,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6.662895,4.260248,6.527754
1,7.006329,3.75,6.835443
