# Python for GNHUSA Happiness Report
## Author: Oliver Gladfelter
## Date: October 2022

In [1]:
import pandas as pd

## Remove text from scale responses

In [2]:
df = pd.read_csv("../data/data.csv")

# 10 is labeled as '10 - completely', so let's get rid of that
df['lifeSat'] = df['Q5_a__1'].apply(lambda x: int(x.split(' ')[0]))
df['worthwhile'] = df['Q5_b__1'].apply(lambda x: float(x.split(' ')[0]))
df['happiness'] = df['Q5_c__1'].apply(lambda x: int(x.split(' ')[0]))
df['anxiety'] = df['Q5_d__1'].apply(lambda x: int(x.split(' ')[0]))

df.to_csv('../data/data.csv', index=False)

In [13]:
df = pd.read_csv("../data/data.csv")

In [21]:
print(df.std()['lifeSat'])
print(df.mean()['lifeSat'])
print(df.std()['happiness'])
print(df.mean()['happiness'])
print(df.std()['anxiety'])
print(df.mean()['anxiety'])

2.316386256447251
6.5472
2.4943069485191227
6.6846
2.977681869821506
4.228


## Compute life satisfaction averages for each age

In [53]:
# get average life satisfaction score for each age
age_and_happiness = df[['Q2','lifeSat']].groupby('Q2').mean().reset_index()
age_and_happiness.to_csv('../data/age_and_happiness.csv', index=False) # save as new dataset

## State averages

In [44]:
for e in ['lifeSat','worthwhile','happiness','anxiety']:
    print(e)
    print(df[['State',e]].groupby('State').mean().sort_values(e,ascending=False).head(50))
    print()

lifeSat
                lifeSat
State                  
Delaware           7.12
Georgia            7.11
South Carolina     7.09
Florida            7.01
South Dakota       6.92
Wisconsin          6.85
New York           6.83
North Dakota       6.83
Nebraska           6.83
North Carolina     6.81
California         6.79
Alabama            6.78
New Jersey         6.77
Illinois           6.76
Arizona            6.71
Michigan           6.67
Tennessee          6.66
Montana            6.65
Virginia           6.62
Maryland           6.62
Kentucky           6.62
Missouri           6.61
Nevada             6.59
Louisiana          6.57
Wyoming            6.52
Indiana            6.51
New Mexico         6.51
Iowa               6.49
Utah               6.45
Idaho              6.45
Mississippi        6.44
Ohio               6.43
Minnesota          6.43
Colorado           6.42
Alaska             6.40
Texas              6.40
Pennsylvania       6.38
Massachusetts      6.37
Vermont            6.37
New Hamp

In [21]:
stateData = df.groupby('State').mean()[['lifeSat','worthwhile','happiness','anxiety']].reset_index()

In [None]:
lifeSatRank = 1
stateData['lifeSatRank'] = 0
for i in stateData.sort_values('lifeSat', ascending = False).index:
    stateData['lifeSatRank'][i] = lifeSatRank
    lifeSatRank += 1
    
worthwhileRank = 1
stateData['worthwhileRank'] = 0
for i in stateData.sort_values('worthwhile', ascending = False).index:
    stateData['worthwhileRank'][i] = worthwhileRank
    worthwhileRank += 1
    
happyRank = 1
stateData['happyRank'] = 0
for i in stateData.sort_values('happiness', ascending = False).index:
    stateData['happyRank'][i] = happyRank
    happyRank += 1
    
anxietyRank = 1
stateData['anxietyRank'] = 0
for i in stateData.sort_values('anxiety', ascending = False).index:
    stateData['anxietyRank'][i] = anxietyRank
    anxietyRank += 1

In [42]:
stateData.to_csv("../data/stateData.csv", index=False)

In [43]:
stateData.sort_values('lifeSat', ascending = False)

Unnamed: 0,State,lifeSat,worthwhile,happiness,anxiety,lifeSatRank,worthwhileRank,happyRank,anxietyRank
7,Delaware,7.12,7.28,7.23,3.68,1,6,2,48
9,Georgia,7.11,7.54,7.43,4.62,2,1,1,7
39,South Carolina,7.09,7.5,7.14,4.13,3,2,3,32
8,Florida,7.01,7.44,6.88,4.06,4,3,18,37
40,South Dakota,6.92,7.21,6.88,3.94,5,10,17,43
48,Wisconsin,6.85,7.3,7.0,4.01,6,5,8,41
31,New York,6.83,7.23,6.79,4.14,7,8,21,31
33,North Dakota,6.83,6.94,6.68,4.08,8,23,27,36
26,Nebraska,6.83,7.24,7.07,3.92,9,7,7,44
32,North Carolina,6.81,7.03,7.07,3.69,10,17,6,47


## Standard deviation and mean for life satisfaction by state

In [18]:
states = pd.read_csv('../data/stateData.csv')
print(states.std()['lifeSat'])
print(states.mean()['lifeSat'])

0.3349587284265216
6.524270300000003


## Aggregate contributions data and assign titles to each contributor

In [32]:
def rank(value):
    if value >= 1000:
        return 'Happiness Pioneer'
    elif value >= 500:
        return 'Happiness Champion'
    elif value >= 250:
        return 'Happiness Innovator'
    elif value >= 100:
        return 'Happiness Activist'
    else:
        return 'Other'
    
# donation data
donations = pd.read_csv("../data/contributions.csv")
donations = donations[['Name', 'Amount', 'Email']]
donations['Amount'] = donations['Amount'].replace('[\$,]', '', regex=True).astype(float)
donations = donations.groupby(['Name', 'Email']).sum().reset_index().sort_values('Amount', ascending=False)
donations['title'] = donations['Amount'].apply(rank)
donations = donations[['Name','title']]
donations.to_csv("../data/contributions.csv", index=False)

donations.to_csv("../data/contributions.csv", index=False)