In [1]:
import os
import pandas as pd

file = "./Impact_of_Remote_Work_on_Mental_Health.csv"

mentalHealthDf = pd.read_csv(file)
mentalHealthDf.head()

bins = [20, 30, 40, 50, 60, 70]
labels = ['20-29', '30-39', '40-49', '50-59', '60-69']

mentalHealthDf['hoursWorkedGrouped'] = pd.cut(mentalHealthDf['Hours_Worked_Per_Week'], bins=bins, labels=labels, right = False)

mentalHealthDf['Mental_Health_Condition'] = mentalHealthDf['Mental_Health_Condition'].fillna('Healthy')

In [2]:
#setup variables for mental health conditions and concatenate
mentalHealthbyCond = pd.get_dummies(mentalHealthDf['Mental_Health_Condition'])

mentalHealthDf = pd.concat([mentalHealthDf, mentalHealthbyCond], axis=1)

In [3]:
#setup separate dataframes
hybrid_df = mentalHealthDf[mentalHealthDf['Work_Location'] == 'Hybrid']
onsite_df = mentalHealthDf[mentalHealthDf['Work_Location'] == 'Onsite']
remote_df = mentalHealthDf[mentalHealthDf['Work_Location'] == 'Remote']

In [4]:
#group each dataframe by hours worked
hybrid_grouped = hybrid_df.groupby('hoursWorkedGrouped').sum(numeric_only=True).reset_index()
onsite_grouped = onsite_df.groupby('hoursWorkedGrouped').sum(numeric_only=True).reset_index()
remote_grouped = remote_df.groupby('hoursWorkedGrouped').sum(numeric_only=True).reset_index()

  hybrid_grouped = hybrid_df.groupby('hoursWorkedGrouped').sum(numeric_only=True).reset_index()
  onsite_grouped = onsite_df.groupby('hoursWorkedGrouped').sum(numeric_only=True).reset_index()
  remote_grouped = remote_df.groupby('hoursWorkedGrouped').sum(numeric_only=True).reset_index()


In [5]:
hybrid_grouped['Total'] = hybrid_grouped[['Anxiety', 'Burnout', 'Depression', 'Healthy']].sum(axis=1)
onsite_grouped['Total'] = onsite_grouped[['Anxiety', 'Burnout', 'Depression', 'Healthy']].sum(axis=1)
remote_grouped['Total'] = remote_grouped[['Anxiety', 'Burnout', 'Depression', 'Healthy']].sum(axis=1)

In [6]:
#reduce columns and review each df
hybrid_result = hybrid_grouped[['hoursWorkedGrouped', 'Anxiety', 'Burnout', 'Depression', 'Healthy', 'Total']]
onsite_result = onsite_grouped[['hoursWorkedGrouped', 'Anxiety', 'Burnout', 'Depression', 'Healthy','Total']]
remote_result = remote_grouped[['hoursWorkedGrouped', 'Anxiety', 'Burnout', 'Depression', 'Healthy','Total']]

In [7]:
print("Hybrid Work Location DataFrame:")
hybrid_result

Hybrid Work Location DataFrame:


Unnamed: 0,hoursWorkedGrouped,Anxiety,Burnout,Depression,Healthy,Total
0,20-29,101,98,119,121,439
1,30-39,108,89,94,79,370
2,40-49,121,112,95,94,422
3,50-59,92,92,94,103,381
4,60-69,6,9,19,3,37


In [8]:
print("Onsite Work Location DataFrame:")
onsite_result

Onsite Work Location DataFrame:


Unnamed: 0,hoursWorkedGrouped,Anxiety,Burnout,Depression,Healthy,Total
0,20-29,106,108,98,87,399
1,30-39,123,104,95,95,417
2,40-49,81,101,107,111,400
3,50-59,88,115,104,68,375
4,60-69,9,14,8,15,46


In [9]:
print("Remote Work Location DataFrame:")
remote_result

Remote Work Location DataFrame:


Unnamed: 0,hoursWorkedGrouped,Anxiety,Burnout,Depression,Healthy,Total
0,20-29,106,116,112,111,445
1,30-39,120,109,100,97,426
2,40-49,102,89,98,113,402
3,50-59,109,116,95,92,412
4,60-69,6,8,8,7,29


In [10]:
hybrid_result['Unhealthy'] = hybrid_result[['Anxiety', 'Burnout', 'Depression']].sum(axis=1)

remote_result['Unhealthy'] = remote_result[['Anxiety', 'Burnout', 'Depression']].sum(axis=1)

onsite_result['Unhealthy'] = onsite_result[['Anxiety', 'Burnout', 'Depression']].sum(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hybrid_result['Unhealthy'] = hybrid_result[['Anxiety', 'Burnout', 'Depression']].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  remote_result['Unhealthy'] = remote_result[['Anxiety', 'Burnout', 'Depression']].sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  onsite_result['Unhe

In [11]:
hybrid_result = hybrid_result.drop(columns=['Anxiety', 'Burnout', 'Depression'])

remote_result = remote_result.drop(columns=['Anxiety', 'Burnout', 'Depression'])

onsite_result = onsite_result.drop(columns=['Anxiety', 'Burnout', 'Depression'])

In [12]:
onsite_result = onsite_result[['hoursWorkedGrouped', 'Healthy', 'Unhealthy', 'Total']]

remote_result = remote_result[['hoursWorkedGrouped', 'Healthy', 'Unhealthy', 'Total']]

hybrid_result = hybrid_result[['hoursWorkedGrouped', 'Healthy', 'Unhealthy', 'Total']]

In [13]:
onsite_result

Unnamed: 0,hoursWorkedGrouped,Healthy,Unhealthy,Total
0,20-29,87,312,399
1,30-39,95,322,417
2,40-49,111,289,400
3,50-59,68,307,375
4,60-69,15,31,46


In [14]:
hybrid_result

Unnamed: 0,hoursWorkedGrouped,Healthy,Unhealthy,Total
0,20-29,121,318,439
1,30-39,79,291,370
2,40-49,94,328,422
3,50-59,103,278,381
4,60-69,3,34,37


In [15]:
remote_result

Unnamed: 0,hoursWorkedGrouped,Healthy,Unhealthy,Total
0,20-29,111,334,445
1,30-39,97,329,426
2,40-49,113,289,402
3,50-59,92,320,412
4,60-69,7,22,29
