In [1]:
import os
import pandas as pd

In [2]:
file = "./Impact_of_Remote_Work_on_Mental_Health.csv"

mentalHealthDf = pd.read_csv(file)
mentalHealthDf.head()

Unnamed: 0,Employee_ID,Age,Gender,Job_Role,Industry,Years_of_Experience,Work_Location,Hours_Worked_Per_Week,Number_of_Virtual_Meetings,Work_Life_Balance_Rating,Stress_Level,Mental_Health_Condition,Access_to_Mental_Health_Resources,Productivity_Change,Social_Isolation_Rating,Satisfaction_with_Remote_Work,Company_Support_for_Remote_Work,Physical_Activity,Sleep_Quality,Region
0,EMP0001,32,Non-binary,HR,Healthcare,13,Hybrid,47,7,2,Medium,Depression,No,Decrease,1,Unsatisfied,1,Weekly,Good,Europe
1,EMP0002,40,Female,Data Scientist,IT,3,Remote,52,4,1,Medium,Anxiety,No,Increase,3,Satisfied,2,Weekly,Good,Asia
2,EMP0003,59,Non-binary,Software Engineer,Education,22,Hybrid,46,11,5,Medium,Anxiety,No,No Change,4,Unsatisfied,5,,Poor,North America
3,EMP0004,27,Male,Software Engineer,Finance,20,Onsite,32,8,4,High,Depression,Yes,Increase,3,Unsatisfied,3,,Poor,Europe
4,EMP0005,49,Male,Sales,Consulting,32,Onsite,35,12,2,High,,Yes,Decrease,3,Unsatisfied,3,Weekly,Average,North America


In [3]:
mentalHealthDf.columns

Index(['Employee_ID', 'Age', 'Gender', 'Job_Role', 'Industry',
       'Years_of_Experience', 'Work_Location', 'Hours_Worked_Per_Week',
       'Number_of_Virtual_Meetings', 'Work_Life_Balance_Rating',
       'Stress_Level', 'Mental_Health_Condition',
       'Access_to_Mental_Health_Resources', 'Productivity_Change',
       'Social_Isolation_Rating', 'Satisfaction_with_Remote_Work',
       'Company_Support_for_Remote_Work', 'Physical_Activity', 'Sleep_Quality',
       'Region'],
      dtype='object')

In [4]:
#group ages and add column to dataframe
bins = [20, 30, 40, 50, 60, 70]
labels = ['20-29', '30-39', '40-49', '50-59', '60-69']

mentalHealthDf['ageGroup'] = pd.cut(mentalHealthDf['Age'], bins=bins, labels=labels, right = False)

mentalHealthDf

Unnamed: 0,Employee_ID,Age,Gender,Job_Role,Industry,Years_of_Experience,Work_Location,Hours_Worked_Per_Week,Number_of_Virtual_Meetings,Work_Life_Balance_Rating,...,Mental_Health_Condition,Access_to_Mental_Health_Resources,Productivity_Change,Social_Isolation_Rating,Satisfaction_with_Remote_Work,Company_Support_for_Remote_Work,Physical_Activity,Sleep_Quality,Region,ageGroup
0,EMP0001,32,Non-binary,HR,Healthcare,13,Hybrid,47,7,2,...,Depression,No,Decrease,1,Unsatisfied,1,Weekly,Good,Europe,30-39
1,EMP0002,40,Female,Data Scientist,IT,3,Remote,52,4,1,...,Anxiety,No,Increase,3,Satisfied,2,Weekly,Good,Asia,40-49
2,EMP0003,59,Non-binary,Software Engineer,Education,22,Hybrid,46,11,5,...,Anxiety,No,No Change,4,Unsatisfied,5,,Poor,North America,50-59
3,EMP0004,27,Male,Software Engineer,Finance,20,Onsite,32,8,4,...,Depression,Yes,Increase,3,Unsatisfied,3,,Poor,Europe,20-29
4,EMP0005,49,Male,Sales,Consulting,32,Onsite,35,12,2,...,,Yes,Decrease,3,Unsatisfied,3,Weekly,Average,North America,40-49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,EMP4996,32,Male,Sales,Consulting,4,Onsite,24,2,5,...,Burnout,Yes,Decrease,4,Neutral,1,Weekly,Average,Asia,30-39
4996,EMP4997,39,Female,Sales,Healthcare,27,Onsite,48,15,1,...,Depression,Yes,Decrease,1,Satisfied,1,,Average,Africa,30-39
4997,EMP4998,42,Female,Sales,Healthcare,21,Hybrid,34,1,4,...,Burnout,No,Increase,3,Satisfied,1,Daily,Poor,Oceania,40-49
4998,EMP4999,27,Female,Sales,Healthcare,26,Remote,58,0,5,...,,Yes,Increase,3,Unsatisfied,4,Daily,Average,Asia,20-29


In [5]:
#setup variables for mental health conditions and concatenate
mentalHealthbyCond = pd.get_dummies(mentalHealthDf['Mental_Health_Condition'])

mentalHealthDf = pd.concat([mentalHealthDf, mentalHealthbyCond], axis=1)

In [6]:
#setup separate dataframes
hybrid_df = mentalHealthDf[mentalHealthDf['Work_Location'] == 'Hybrid']
onsite_df = mentalHealthDf[mentalHealthDf['Work_Location'] == 'Onsite']
remote_df = mentalHealthDf[mentalHealthDf['Work_Location'] == 'Remote']

In [7]:
#group each dataframe by agegroup
hybrid_grouped = hybrid_df.groupby('ageGroup').sum(numeric_only=True).reset_index()
onsite_grouped = onsite_df.groupby('ageGroup').sum(numeric_only=True).reset_index()
remote_grouped = remote_df.groupby('ageGroup').sum(numeric_only=True).reset_index()

  hybrid_grouped = hybrid_df.groupby('ageGroup').sum(numeric_only=True).reset_index()
  onsite_grouped = onsite_df.groupby('ageGroup').sum(numeric_only=True).reset_index()
  remote_grouped = remote_df.groupby('ageGroup').sum(numeric_only=True).reset_index()


In [8]:
#reduce columns and review each df
hybrid_result = hybrid_grouped[['ageGroup', 'Anxiety', 'Burnout', 'Depression']]
onsite_result = onsite_grouped[['ageGroup', 'Anxiety', 'Burnout', 'Depression']]
remote_result = remote_grouped[['ageGroup', 'Anxiety', 'Burnout', 'Depression']]

In [9]:
print("Hybrid Work Location DataFrame:")
hybrid_result

Hybrid Work Location DataFrame:


Unnamed: 0,ageGroup,Anxiety,Burnout,Depression
0,20-29,95,88,93
1,30-39,107,81,107
2,40-49,106,111,105
3,50-59,107,110,104
4,60-69,13,10,12


In [10]:
print("Onsite Work Location DataFrame:")
onsite_result

Onsite Work Location DataFrame:


Unnamed: 0,ageGroup,Anxiety,Burnout,Depression
0,20-29,83,93,79
1,30-39,87,106,111
2,40-49,116,121,115
3,50-59,114,107,94
4,60-69,7,15,13


In [11]:
print("Remote Work Location DataFrame:")
remote_result

Remote Work Location DataFrame:


Unnamed: 0,ageGroup,Anxiety,Burnout,Depression
0,20-29,95,91,75
1,30-39,105,116,114
2,40-49,125,114,104
3,50-59,105,109,116
4,60-69,13,8,4
