# Do people with mental health issues have disadvantages in education and work?

In [8]:
# import library and dataset

import pandas as pd

df = pd.read_csv('./data/stack-overflow-developer-survey-2021/2021 Stack Overflow Survey Responses.csv', encoding="ISO-8859-1")

df.head()

Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,
2,3,"I am not primarily a developer, but I write co...","Student, full-time",Russian Federation,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,...,18-24 years old,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,
3,4,I am a developer by profession,Employed full-time,Austria,,,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,
4,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",United Kingdom of Great Britain and Northern I...,,England,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0,...,25-34 years old,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,


In [9]:
# display all given unique answers regarding mental health
# multiple answers were possible

df.MentalHealth.unique()

array(['None of the above', nan,
       'I have a concentration and/or memory disorder (e.g. ADHD)',
       'Prefer not to say',
       'I have a mood or emotional disorder (e.g. depression, bipolar disorder);I have an anxiety disorder',
       "I have a concentration and/or memory disorder (e.g. ADHD);I have autism / an autism spectrum disorder (e.g. Asperger's);Or, in your own words:",
       'I have an anxiety disorder',
       'I have a concentration and/or memory disorder (e.g. ADHD);I have a mood or emotional disorder (e.g. depression, bipolar disorder);I have an anxiety disorder',
       "I have autism / an autism spectrum disorder (e.g. Asperger's)",
       "I have a mood or emotional disorder (e.g. depression, bipolar disorder);I have an anxiety disorder;I have autism / an autism spectrum disorder (e.g. Asperger's)",
       'I have a mood or emotional disorder (e.g. depression, bipolar disorder)',
       "I have a concentration and/or memory disorder (e.g. ADHD);I have autism 

In [10]:
# divide dataset in survey participants with and without limitation

df_temp = df.dropna(subset=['MentalHealth']) # filter out Null values
df_temp = df_temp[~df_temp.MentalHealth.isin(['Or, in your own words:', 'Prefer not to say'])] # filter out values we cannot work with

# df_mh = df_temp[df_temp['MentalHealth'].str.contains('autism')]
# df_not_mh = df_temp[~df_temp['MentalHealth'].str.contains('autism')]

df_mh = df_temp[df_temp.MentalHealth != 'None of the above'] # Dataset with participants with bad mental health
df_not_mh = df_temp[df_temp.MentalHealth == 'None of the above'] # Dataset with participants with good mental health

In [11]:
print('Number Mentally Battered:', str(df_mh.shape[0])+',', str(round(df_mh.shape[0]/(df_mh.shape[0]+df_not_mh.shape[0]),2))+'%,', '\nNumber Not Mentally Battered:', str(df_not_mh.shape[0])+',', str(round(df_not_mh.shape[0]/(df_mh.shape[0]+df_not_mh.shape[0]),2))+'%')

Number Mentally Battered: 15479, 0.22%, 
Number Not Mentally Battered: 56459, 0.78%


In [14]:
# calculate the mean compensation for both groups 

mean_mh = round(df_mh.ConvertedCompYearly.mean(), 2)
mean_not_mh = round(df_not_mh.ConvertedCompYearly.mean(), 2)
 
print('Mean Salary Mentally Battered:', mean_mh, '\nMean Salary Not Mentally Battered:', mean_not_mh, '\nDifference:', round(mean_mh-mean_not_mh, 2))

Mean Salary Mentally Battered: 141374.86 
Mean Salary Not Mentally Battered: 110175.55 
Difference: 31199.31


In [13]:
# visualize the differences in degree of education of both groups

def clean(df):
    # calculates the distribution of the degrees of education in the dataset
    ed_dist = df['EdLevel'].value_counts(normalize=True).reset_index()
    ed_dist.rename(columns={'index': 'EdLevel', 'EdLevel': 'count'}, inplace=True)
    ed_dist.set_index('EdLevel', inplace=True)
    return ed_dist

mean_mh_perc = clean(df_mh) # get the distribution of the degrees of education of participants mentally battered
mean_not_mh_perc = clean(df_not_mh) # get the distribution of the degrees of education of participants not mentally battered

comp_df = pd.merge(mean_mh_perc, mean_not_mh_perc, left_index=True, right_index=True) # merge both datasets
comp_df.columns = ['Mentally Limited', 'Not Mentally Limited'] # rename columns
comp_df['Diff_Ed_Vals'] = comp_df['Mentally Limited'] - comp_df['Not Mentally Limited'] # calculate the differences
comp_df.style.bar(subset=['Diff_Ed_Vals'], align='mid', color=['#d65f5f', '#5fba7d']) # plot the columns and the differences

Unnamed: 0_level_0,Mentally Limited,Not Mentally Limited,Diff_Ed_Vals
EdLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",0.401528,0.433777,-0.032248
Some college/university study without earning a degree,0.178615,0.114689,0.063927
"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",0.146428,0.228685,-0.082257
"Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",0.139563,0.108586,0.030977
"Associate degree (A.A., A.S., etc.)",0.040218,0.023541,0.016677
Primary/elementary school,0.035231,0.026876,0.008355
"Other doctoral degree (Ph.D., Ed.D., etc.)",0.022732,0.03328,-0.010548
Something else,0.022602,0.015274,0.007328
"Professional degree (JD, MD, etc.)",0.013082,0.015292,-0.00221
