# Background-Dependent Wellbeing Research

## Data Processing

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('Human_Wellbeing_Raw_Data_Columns_Renamed.csv')

In [4]:
df.columns

Index(['Timestamp', 'AGE', 'GENDER', 'CULTURE', 'RELIGION', 'ECO1', 'ECO2',
       'ECO3', 'ECO4', 'ECO5', 'SOC1', 'SOC2', 'SOC3', 'SOC4', 'SOC5', 'SOC51',
       'ENV1', 'ENV2', 'ENV3', 'ENV4', 'ENV5', 'POL1', 'POL2', 'POL3', 'POL4',
       'POL5', 'TEC1', 'TEC2', 'TEC3', 'TEC4', 'TEC5', 'TEC6', 'ECO_RANK',
       'SOC_RANK', 'ENV_RANK', 'POL_RANK', 'TEC_RANK'],
      dtype='object')

In [5]:
df.head()

Unnamed: 0,Timestamp,AGE,GENDER,CULTURE,RELIGION,ECO1,ECO2,ECO3,ECO4,ECO5,...,TEC2,TEC3,TEC4,TEC5,TEC6,ECO_RANK,SOC_RANK,ENV_RANK,POL_RANK,TEC_RANK
0,11/5/2022 20:55:53,15 - 16,Male,Chinese,Atheist (Non-Religious),3,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$130001 - $200000,...,9,10,"Communication, Share information, Entertainment",Public transport,No,2,1,5,4,3
1,11/5/2022 20:56:17,15 - 16,Male,Australian,Mormon,10,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$200001 - $500000,...,9,9,"Communication, Share information, Entertainmen...",Private car,Yes,4,3,1,5,2
2,11/5/2022 20:56:27,15 - 16,Male,Australian,Christianity,8,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$200001 - $500000,...,9,8,"Communication, Share information, Entertainmen...",Private car,Yes,1,4,5,3,2
3,11/5/2022 21:06:23,15 - 16,Male,Chinese,Catholic,8,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,"$80,001 - $130,000",...,7,7,"Communication, Entertainment",Public transport,No,4,3,2,1,5
4,11/5/2022 21:08:32,15 - 16,Male,Chinese,Atheist (Non-Religious),4,"Unemployed, wanting a job but not actively loo...","$0 – $18,200",No,"$130,001 - $200,000",...,3,8,Share information,Public transport,Yes,4,3,1,5,2


In [6]:
cols = list(df.columns)
cols = [x.lower().strip() for x in cols]
df.columns=cols
df.drop('timestamp', inplace = True, axis = 1)
df = df[df['age'] != '71']

In [7]:
df.head()

Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
0,15 - 16,Male,Chinese,Atheist (Non-Religious),3,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$130001 - $200000,9,...,9,10,"Communication, Share information, Entertainment",Public transport,No,2,1,5,4,3
1,15 - 16,Male,Australian,Mormon,10,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$200001 - $500000,7,...,9,9,"Communication, Share information, Entertainmen...",Private car,Yes,4,3,1,5,2
2,15 - 16,Male,Australian,Christianity,8,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$200001 - $500000,9,...,9,8,"Communication, Share information, Entertainmen...",Private car,Yes,1,4,5,3,2
3,15 - 16,Male,Chinese,Catholic,8,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,"$80,001 - $130,000",5,...,7,7,"Communication, Entertainment",Public transport,No,4,3,2,1,5
4,15 - 16,Male,Chinese,Atheist (Non-Religious),4,"Unemployed, wanting a job but not actively loo...","$0 – $18,200",No,"$130,001 - $200,000",3,...,3,8,Share information,Public transport,Yes,4,3,1,5,2


## Finding Trends and Plotting Charts

In [8]:
import matplotlib.pyplot as plt
import seaborn as sns

### Setting up different groups

In [9]:
# 15-16
youngdf = df[df['age'] == "15 - 16"]
print(youngdf.shape[0])
youngdf.head(1)

27


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
0,15 - 16,Male,Chinese,Atheist (Non-Religious),3,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$130001 - $200000,9,...,9,10,"Communication, Share information, Entertainment",Public transport,No,2,1,5,4,3


In [10]:
# 30-60
olddf = df[df['age'] == '30 - 60']
print(olddf.shape[0])
olddf.head(1)

20


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
28,30 - 60,Female,Chinese,Atheist (Non-Religious),9,"In paid work (or away temporarily) (employee, ...","$45,001 – $120,000",Yes,"$130,001 - $200,000",10,...,7,5,Entertainment,Public transport,Yes,2,4,1,3,5


In [11]:
# Australians
ausdf = df[df['culture']=='Australian']
print(ausdf.shape[0])
ausdf.head(1)

22


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
1,15 - 16,Male,Australian,Mormon,10,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$200001 - $500000,7,...,9,9,"Communication, Share information, Entertainmen...",Private car,Yes,4,3,1,5,2


In [12]:
#Chinese
chidf = df[df['culture']=='Chinese']
print(chidf.shape[0])
chidf.head(1)

13


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
0,15 - 16,Male,Chinese,Atheist (Non-Religious),3,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$130001 - $200000,9,...,9,10,"Communication, Share information, Entertainment",Public transport,No,2,1,5,4,3


In [13]:
#Male
mdf = df[df['gender'] == 'Male']
print(mdf.shape[0])
mdf.head(1)

27


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
0,15 - 16,Male,Chinese,Atheist (Non-Religious),3,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$130001 - $200000,9,...,9,10,"Communication, Share information, Entertainment",Public transport,No,2,1,5,4,3


In [14]:
#Female
fdf = df[df['gender'] == 'Female']
print(fdf.shape[0])
fdf.head(1)

19


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
8,15 - 16,Female,Chinese,Atheist (Non-Religious),5,In education (not paid for by employer) even i...,"$0 – $18,200",No,Below $20000,9,...,5,8,Entertainment,Private car,Yes,1,2,5,4,3


In [15]:
#Christian
chrdf = df[df['religion'] == 'Christianity']
print(chrdf.shape[0])
chrdf.head(1)

23


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
2,15 - 16,Male,Australian,Christianity,8,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$200001 - $500000,9,...,9,8,"Communication, Share information, Entertainmen...",Private car,Yes,1,4,5,3,2


In [16]:
#Athiest
athdf = df[df['religion'] == 'Atheist (Non-Religious)']
print(athdf.shape[0])
athdf.head(1)

12


Unnamed: 0,age,gender,culture,religion,eco1,eco2,eco3,eco4,eco5,soc1,...,tec2,tec3,tec4,tec5,tec6,eco_rank,soc_rank,env_rank,pol_rank,tec_rank
0,15 - 16,Male,Chinese,Atheist (Non-Religious),3,In education (not paid for by employer) even i...,"$0 – $18,200",Yes,$130001 - $200000,9,...,9,10,"Communication, Share information, Entertainment",Public transport,No,2,1,5,4,3


### Finding Trends in their rankings

In [17]:
rank_cols = ['eco_rank', 'soc_rank', 'env_rank', 'pol_rank', 'tec_rank']

In [18]:
def rank_df_to_np(df, rank_cols):
    df = df[['eco_rank', 'soc_rank', 'env_rank', 'pol_rank', 'tec_rank']]
    A = df['eco_rank'].to_numpy()
    B = df['soc_rank'].to_numpy()
    C = df['env_rank'].to_numpy()
    D = df['pol_rank'].to_numpy()
    E = df['tec_rank'].to_numpy()
    print(df.shape[0], 'responses found...')
    return A,B,C,D,E

def array_to_mode(array):
    vals, counts = np.unique(array, return_counts=True)
    mode_value = np.argwhere(counts == np.max(counts))
    print("The mode for", array, 'is',vals[mode_value].flatten().tolist(), 'for', np.max(counts), 'times')
    return None

# turns a specific column in a dataframe into frequency data
def df_to_stats(df, unique, col): 
    new_df = df[col]
    df_np = new_df.to_numpy()
    
    values = []
    
    for record in unique: 
        count = np.count_nonzero(df_np == record)
        print("The number of", record, "is", count)
    print('\n')

In [19]:
#15-16 Rankings
A,B,C,D,E = rank_df_to_np(youngdf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

27 responses found...
The mode for [2 4 1 4 4 2 1 1 1 2 2 4 2 4 3 3 2 3 4 4 4 3 3 5 2 3 3] is [4] for 8 times
The mode for [1 3 4 3 3 1 2 2 2 1 4 5 1 1 1 2 3 1 1 2 5 4 4 3 5 1 4] is [1] for 9 times
The mode for [5 1 5 2 1 4 3 4 5 5 3 2 4 2 2 1 4 2 2 5 2 1 5 1 4 4 2] is [2] for 8 times
The mode for [4 5 3 1 5 5 5 3 4 4 1 1 5 5 4 4 5 5 5 3 1 5 2 2 1 5 5] is [5] for 12 times
The mode for [3 2 2 5 2 3 4 5 3 3 5 3 3 3 5 5 1 4 3 1 3 2 1 4 3 2 1] is [3] for 10 times


In [20]:
#30-60 Rankings
A,B,C,D,E = rank_df_to_np(olddf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

20 responses found...
The mode for [2 1 1 5 2 4 2 3 4 3 1 1 3 3 2 3 3 1 3 3] is [3] for 8 times
The mode for [4 4 2 3 5 1 1 5 1 1 2 4 1 1 1 5 1 2 1 1] is [1] for 10 times
The mode for [1 2 3 1 4 2 4 4 2 4 4 2 2 2 4 2 4 4 2 4] is [4] for 9 times
The mode for [3 5 4 4 3 3 3 2 3 2 3 5 5 5 5 1 2 3 4 5] is [3] for 7 times
The mode for [5 3 5 2 1 5 5 1 5 5 5 3 4 4 3 4 5 5 5 2] is [5] for 10 times


In [21]:
#Australian Rankings
A,B,C,D,E = rank_df_to_np(ausdf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

22 responses found...
The mode for [4 1 4 2 4 3 3 4 4 2 1 2 4 3 3 1 3 3 2 3 3 3] is [3] for 9 times
The mode for [3 4 5 1 1 1 2 1 5 5 2 5 1 5 1 2 1 1 1 5 1 1] is [1] for 11 times
The mode for [1 5 2 4 2 2 1 2 2 4 3 4 2 4 4 4 2 2 4 2 4 4] is [2, 4] for 9 times
The mode for [5 3 1 5 5 4 4 5 1 1 4 3 3 2 2 3 5 5 5 1 2 5] is [5] for 8 times
The mode for [2 2 3 3 3 5 5 3 3 3 5 1 5 1 5 5 4 4 3 4 5 2] is [3, 5] for 7 times


In [22]:
#Chinese Rankings
A,B,C,D,E = rank_df_to_np(chidf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

13 responses found...
The mode for [2 4 4 1 1 1 2 3 2 1 5 2 1] is [1] for 5 times
The mode for [1 3 3 2 2 2 3 4 4 4 3 1 4] is [3, 4] for 4 times
The mode for [5 2 1 3 4 5 4 5 1 2 1 4 2] is [1, 2, 4, 5] for 3 times
The mode for [4 1 5 5 3 4 5 2 3 5 4 3 5] is [5] for 5 times
The mode for [3 5 2 4 5 3 1 1 5 3 2 5 3] is [3, 5] for 4 times


In [23]:
#Male Rankings
A,B,C,D,E = rank_df_to_np(mdf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

27 responses found...
The mode for [2 4 1 4 4 1 1 2 2 4 2 2 3 4 4 3 3 5 2 3 3 1 1 3 3 1 3] is [3] for 8 times
The mode for [1 3 4 3 3 2 2 1 4 5 1 3 1 2 5 4 4 3 5 1 4 4 4 1 1 2 1] is [1] for 8 times
The mode for [5 1 5 2 1 3 4 5 3 2 4 4 2 5 2 1 5 1 4 4 2 2 2 2 4 4 2] is [2] for 9 times
The mode for [4 5 3 1 5 5 3 4 1 1 5 5 5 3 1 5 2 2 1 5 5 5 5 5 2 3 4] is [5] for 12 times
The mode for [3 2 2 5 2 4 5 3 5 3 3 1 4 1 3 2 1 4 3 2 1 3 3 4 5 5 5] is [3] for 8 times


In [24]:
#Female Rankings
A,B,C,D,E = rank_df_to_np(fdf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

19 responses found...
The mode for [1 4 3 3 4 2 1 5 2 4 2 3 4 3 1 3 2 3 3] is [3] for 7 times
The mode for [2 1 1 2 1 4 2 3 5 1 1 5 1 1 2 1 1 5 1] is [1] for 10 times
The mode for [5 2 2 1 2 1 3 1 4 2 4 4 2 4 4 2 4 2 4] is [2, 4] for 7 times
The mode for [4 5 4 4 5 3 4 4 3 3 3 2 3 2 3 5 5 1 5] is [3] for 6 times
The mode for [3 3 5 5 3 5 5 2 1 5 5 1 5 5 5 4 3 4 2] is [5] for 9 times


In [25]:
#Christian Rankings
A,B,C,D,E = rank_df_to_np(chrdf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

23 responses found...
The mode for [1 2 2 4 3 3 2 4 5 3 2 4 2 3 4 3 1 3 3 2 3 3 3] is [3] for 10 times
The mode for [4 4 1 1 1 2 3 1 3 1 5 1 1 5 1 1 2 1 1 1 5 1 1] is [1] for 14 times
The mode for [5 3 4 2 2 1 4 2 1 4 4 2 4 4 2 4 4 2 2 4 2 4 4] is [4] for 11 times
The mode for [3 1 5 5 4 4 5 5 2 5 3 3 3 2 3 2 3 5 5 5 1 2 5] is [5] for 9 times
The mode for [2 5 3 3 5 5 1 3 4 2 1 5 5 1 5 5 5 4 4 3 4 5 2] is [5] for 9 times


In [26]:
#Athiest Rankings
A,B,C,D,E = rank_df_to_np(athdf, rank_cols)
array_to_mode(A)
array_to_mode(B)
array_to_mode(C)
array_to_mode(D)
array_to_mode(E)

12 responses found...
The mode for [2 4 1 1 4 4 3 2 2 1 5 1] is [1] for 4 times
The mode for [1 3 2 2 5 5 4 5 4 4 3 4] is [4] for 4 times
The mode for [5 1 3 5 2 2 5 4 1 2 1 2] is [2] for 4 times
The mode for [4 5 5 4 1 1 2 1 3 5 4 5] is [5] for 4 times
The mode for [3 2 4 3 3 3 1 3 5 3 2 3] is [3] for 7 times


### Graphing

In [27]:
import matplotlib.pyplot as plt
import seaborn as sns

In [28]:
# eco1
# soc1
# env1-5
# pol2,3
# tec 1,2,3

scale0 = [0,1,2,3,4,5,6,7,8,9,10]

In [29]:
# eco1 young
df_to_stats(youngdf, scale0, 'eco1')

The number of 0 is 0
The number of 1 is 1
The number of 2 is 0
The number of 3 is 4
The number of 4 is 2
The number of 5 is 4
The number of 6 is 2
The number of 7 is 6
The number of 8 is 6
The number of 9 is 0
The number of 10 is 2




In [30]:
# eco1 old
df_to_stats(olddf, scale0, 'eco1')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 0
The number of 3 is 1
The number of 4 is 4
The number of 5 is 4
The number of 6 is 2
The number of 7 is 6
The number of 8 is 2
The number of 9 is 1
The number of 10 is 0




In [32]:
# soc1 young
df_to_stats(youngdf, scale0, 'soc1')

The number of 0 is 2
The number of 1 is 0
The number of 2 is 0
The number of 3 is 1
The number of 4 is 1
The number of 5 is 3
The number of 6 is 4
The number of 7 is 7
The number of 8 is 2
The number of 9 is 5
The number of 10 is 2




In [34]:
# soc1 old
df_to_stats(olddf, scale0, 'soc1')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 0
The number of 3 is 0
The number of 4 is 1
The number of 5 is 0
The number of 6 is 1
The number of 7 is 5
The number of 8 is 5
The number of 9 is 7
The number of 10 is 1




In [37]:
# env1 young
df_to_stats(youngdf, scale0, 'env1')

The number of 0 is 1
The number of 1 is 2
The number of 2 is 2
The number of 3 is 1
The number of 4 is 1
The number of 5 is 5
The number of 6 is 5
The number of 7 is 4
The number of 8 is 3
The number of 9 is 1
The number of 10 is 2




In [39]:
# env1 old
df_to_stats(olddf, scale0, 'env1')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 1
The number of 3 is 0
The number of 4 is 0
The number of 5 is 0
The number of 6 is 1
The number of 7 is 8
The number of 8 is 4
The number of 9 is 1
The number of 10 is 5




In [41]:
# env2 young
df_to_stats(youngdf, scale0, 'env2')

The number of 0 is 0
The number of 1 is 4
The number of 2 is 3
The number of 3 is 5
The number of 4 is 2
The number of 5 is 3
The number of 6 is 5
The number of 7 is 2
The number of 8 is 3
The number of 9 is 0
The number of 10 is 0




In [43]:
# env2 old
df_to_stats(olddf, scale0, 'env2')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 2
The number of 3 is 2
The number of 4 is 2
The number of 5 is 1
The number of 6 is 4
The number of 7 is 4
The number of 8 is 4
The number of 9 is 1
The number of 10 is 0




In [46]:
# env5 young
df_to_stats(youngdf, scale0, 'env5')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 0
The number of 3 is 0
The number of 4 is 1
The number of 5 is 0
The number of 6 is 0
The number of 7 is 1
The number of 8 is 11
The number of 9 is 7
The number of 10 is 7




In [48]:
# env5 old
df_to_stats(olddf, scale0, 'env5')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 0
The number of 3 is 0
The number of 4 is 0
The number of 5 is 0
The number of 6 is 2
The number of 7 is 2
The number of 8 is 5
The number of 9 is 7
The number of 10 is 4




In [49]:
# pol2 young
df_to_stats(youngdf, scale0, 'pol2')

The number of 0 is 1
The number of 1 is 0
The number of 2 is 0
The number of 3 is 1
The number of 4 is 2
The number of 5 is 6
The number of 6 is 0
The number of 7 is 4
The number of 8 is 3
The number of 9 is 6
The number of 10 is 4




In [51]:
# pol2 old
df_to_stats(olddf, scale0, 'pol2')

The number of 0 is 0
The number of 1 is 0
The number of 2 is 0
The number of 3 is 1
The number of 4 is 0
The number of 5 is 0
The number of 6 is 0
The number of 7 is 0
The number of 8 is 2
The number of 9 is 2
The number of 10 is 15




In [52]:
# tec1 young
df_to_stats(youngdf, scale0, 'tec1')

The number of 0 is 0
The number of 1 is 1
The number of 2 is 0
The number of 3 is 0
The number of 4 is 1
The number of 5 is 1
The number of 6 is 3
The number of 7 is 1
The number of 8 is 6
The number of 9 is 10
The number of 10 is 4




### The data above is used to make histograms, line graphs and pie charts with Google Sheets. The full research findings is to be found in the paper: 
### "Wellbeing Indicator: A Background-Dependent Approach "