# Do not heterosexual people have disadvantages in education and work?

In [15]:
# import library and dataset

import pandas as pd

df = pd.read_csv('./data/stack-overflow-developer-survey-2021/2021 Stack Overflow Survey Responses.csv', encoding="ISO-8859-1")

df.head()

Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,
2,3,"I am not primarily a developer, but I write co...","Student, full-time",Russian Federation,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,...,18-24 years old,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,
3,4,I am a developer by profession,Employed full-time,Austria,,,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,
4,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",United Kingdom of Great Britain and Northern I...,,England,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0,...,25-34 years old,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,


In [16]:
# display all given unique answers regarding sexuality
# multiple answers were possible

df.Sexuality.unique()

array(['Straight / Heterosexual', 'Prefer not to say', nan, 'Bisexual',
       'Straight / Heterosexual;Bisexual', 'Prefer to self-describe:',
       'Straight / Heterosexual;Bisexual;Gay or Lesbian;Queer',
       'Gay or Lesbian',
       'Straight / Heterosexual;Prefer to self-describe:',
       'Bisexual;Queer', 'Queer', 'Prefer to self-describe:;Queer',
       'Bisexual;Prefer to self-describe:', 'Bisexual;Gay or Lesbian',
       'Gay or Lesbian;Queer',
       'Straight / Heterosexual;Bisexual;Gay or Lesbian',
       'Straight / Heterosexual;Queer',
       'Straight / Heterosexual;Bisexual;Prefer to self-describe:;Gay or Lesbian;Queer',
       'Bisexual;Gay or Lesbian;Queer',
       'Bisexual;Prefer to self-describe:;Queer',
       'Straight / Heterosexual;Bisexual;Prefer to self-describe:',
       'Straight / Heterosexual;Gay or Lesbian',
       'Bisexual;Prefer to self-describe:;Gay or Lesbian;Queer',
       'Straight / Heterosexual;Bisexual;Queer',
       'Prefer to self-describe

In [17]:
# divide dataset in survey participants Heterosexual and not Heterosexual

df_temp = df.dropna(subset=['Sexuality']) # filter out Null values
df_temp = df_temp[~df_temp.Sexuality.isin(['Or, in your own words:', 'Prefer not to say'])] # filter out values we cannot work with

df_not_str = df_temp[df_temp.Sexuality != 'Straight / Heterosexual'] # Dataset with not Heterosexual participants
df_str = df_temp[df_temp.Sexuality == 'Straight / Heterosexual'] # Dataset with Heterosexual participants

In [18]:
print('Number Not Straight / Heterosexual Participants', str(df_not_str.shape[0])+',', str(round(df_not_str.shape[0]/(df_not_str.shape[0]+df_str.shape[0]),2))+'%,', '\nNumber Straight / Heterosexual Participants', str(df_str.shape[0])+',', str(round(df_str.shape[0]/(df_not_str.shape[0]+df_str.shape[0]),2))+'%')

Number Not Straight / Heterosexual Participants 7489, 0.11%, 
Number Straight / Heterosexual Participants 61094, 0.89%


In [19]:
# calculate the mean compensation for both groups 

mean_not_str = round(df_not_str.ConvertedCompYearly.mean(), 2)
mean_str = round(df_str.ConvertedCompYearly.mean(), 2)
 
print('Mean Salary Not Straight / Heterosexual Participants:', mean_not_str, '\nMean Salary Straight / Heterosexual Participants:', mean_str, '\nDifference:', round(mean_not_str-mean_str, 2))

Mean Salary Not Straight / Heterosexual Participants: 138403.13 
Mean Salary Straight / Heterosexual Participants: 115701.71 
Difference: 22701.42


In [20]:
# visualize the differences in degree of education of both groups

def clean(df):
    # calculates the distribution of the degrees of education in the dataset
    ed_dist = df['EdLevel'].value_counts(normalize=True).reset_index()
    ed_dist.rename(columns={'index': 'EdLevel', 'EdLevel': 'count'}, inplace=True)
    ed_dist.set_index('EdLevel', inplace=True)
    return ed_dist

mean_not_str_perc = clean(df_not_str) # get the distribution of the degrees of education of Not Straight / Heterosexual participants
mean_str_perc = clean(df_str) # get the distribution of the degrees of education of Straight / Heterosexual participants

comp_df = pd.merge(mean_not_str_perc, mean_str_perc, left_index=True, right_index=True) # merge both datasets
comp_df.columns = ['Not Straight / Heterosexual', 'Straight / Heterosexual'] # rename columns
comp_df['Diff_Ed_Vals'] = comp_df['Not Straight / Heterosexual'] - comp_df['Straight / Heterosexual'] # calculate the differences
comp_df.style.bar(subset=['Diff_Ed_Vals'], align='mid', color=['#d65f5f', '#5fba7d']) # plot the columns and the differences

Unnamed: 0_level_0,Not Straight / Heterosexual,Straight / Heterosexual,Diff_Ed_Vals
EdLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",0.371536,0.435967,-0.064431
Some college/university study without earning a degree,0.166421,0.128361,0.03806
"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",0.1612,0.217148,-0.055948
"Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",0.152095,0.108566,0.04353
Primary/elementary school,0.046726,0.021303,0.025423
"Associate degree (A.A., A.S., etc.)",0.034007,0.027596,0.006411
"Other doctoral degree (Ph.D., Ed.D., etc.)",0.027982,0.031447,-0.003465
Something else,0.027179,0.014503,0.012676
"Professional degree (JD, MD, etc.)",0.012853,0.015109,-0.002256
