# Do trans people have disadvantages in education and work?

In [18]:
# import library and dataset

import pandas as pd

df = pd.read_csv('./data/stack-overflow-developer-survey-2021/2021 Stack Overflow Survey Responses.csv', encoding="ISO-8859-1")

df.head()

Unnamed: 0,ResponseId,MainBranch,Employment,Country,US_State,UK_Country,EdLevel,Age1stCode,LearnCode,YearsCode,...,Age,Gender,Trans,Sexuality,Ethnicity,Accessibility,MentalHealth,SurveyLength,SurveyEase,ConvertedCompYearly
0,1,I am a developer by profession,"Independent contractor, freelancer, or self-em...",Slovakia,,,"Secondary school (e.g. American high school, G...",18 - 24 years,Coding Bootcamp;Other online resources (ex: vi...,,...,25-34 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,62268.0
1,2,I am a student who is learning to code,"Student, full-time",Netherlands,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",7.0,...,18-24 years old,Man,No,Straight / Heterosexual,White or of European descent,None of the above,None of the above,Appropriate in length,Easy,
2,3,"I am not primarily a developer, but I write co...","Student, full-time",Russian Federation,,,"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",11 - 17 years,"Other online resources (ex: videos, blogs, etc...",,...,18-24 years old,Man,No,Prefer not to say,Prefer not to say,None of the above,None of the above,Appropriate in length,Easy,
3,4,I am a developer by profession,Employed full-time,Austria,,,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",11 - 17 years,,,...,35-44 years old,Man,No,Straight / Heterosexual,White or of European descent,I am deaf / hard of hearing,,Appropriate in length,Neither easy nor difficult,
4,5,I am a developer by profession,"Independent contractor, freelancer, or self-em...",United Kingdom of Great Britain and Northern I...,,England,"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",5 - 10 years,Friend or family member,17.0,...,25-34 years old,Man,No,,White or of European descent,None of the above,,Appropriate in length,Easy,


In [19]:
# display all given unique answers regarding being trans

df.Trans.unique()

array(['No', 'Prefer not to say', 'Yes', nan, 'Or, in your own words:'],
      dtype=object)

In [20]:
# divide dataset in trans and not trans survey participants

df_temp = df.dropna(subset=['Trans']) # filter out Null values
df_temp = df_temp[~df_temp.Trans.isin(['Or, in your own words:', 'Prefer not to say'])] # filter out values we cannot work with

df_Trans = df_temp[df_temp.Trans == 'Yes'] # Dataset with trans participants
df_not_Trans = df_temp[df_temp.Trans == 'No'] # Dataset without trans participants

In [21]:
print('Number Trans Participants:', str(df_Trans.shape[0])+',', str(round(df_Trans.shape[0]/(df_Trans.shape[0]+df_not_Trans.shape[0]),2))+'%,', '\nNumber Not Trans Participants:', str(df_not_Trans.shape[0])+',', str(round(df_not_Trans.shape[0]/(df_Trans.shape[0]+df_not_Trans.shape[0]),2))+'%')

Number Trans Participants: 1035, 0.01%, 
Number Not Trans Participants: 77275, 0.99%


In [22]:
# calculate the mean compensation for both groups 

mean_Trans = round(df_Trans.ConvertedCompYearly.mean(), 2)
mean_not_Trans = round(df_not_Trans.ConvertedCompYearly.mean(), 2)
 
print('Mean Salary Trans Participants:', mean_Trans, '\nMean Salary Not Trans Participants:', mean_not_Trans, '\nDifference:', round(mean_Trans-mean_not_Trans, 2))

Mean Salary Trans Participants: 310826.22 
Mean Salary Not Trans Participants: 115441.12 
Difference: 195385.1


In [23]:
# visualize the differences in degree of education of both groups

def clean(df):
    # calculates the distribution of the degrees of education in the dataset
    mh = df['EdLevel'].value_counts(normalize=True).reset_index()
    mh.rename(columns={'index': 'EdLevel', 'EdLevel': 'count'}, inplace=True)
    mh.set_index('EdLevel', inplace=True)
    return mh

mean_Trans_perc = clean(df_Trans) # get the distribution of the degrees of education of trans participants
mean_not_Trans_perc = clean(df_not_Trans) # get the distribution of the degrees of education of not trans participants

comp_df = pd.merge(mean_Trans_perc, mean_not_Trans_perc, left_index=True, right_index=True) # merge both datasets
comp_df.columns = ['Trans', 'Not Trans'] # rename columns
comp_df['Diff_Ed_Vals'] = comp_df['Trans'] - comp_df['Not Trans'] # calculate the differences
comp_df.style.bar(subset=['Diff_Ed_Vals'], align='mid', color=['#d65f5f', '#5fba7d']) # plot the columns and the differences

Unnamed: 0_level_0,Trans,Not Trans,Diff_Ed_Vals
EdLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Bachelorâs degree (B.A., B.S., B.Eng., etc.)",0.310311,0.42944,-0.119129
"Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)",0.182879,0.114384,0.068495
Some college/university study without earning a degree,0.167315,0.127869,0.039446
"Masterâs degree (M.A., M.S., M.Eng., MBA, etc.)",0.120623,0.211368,-0.090746
Primary/elementary school,0.099222,0.027786,0.071436
"Associate degree (A.A., A.S., etc.)",0.046693,0.026878,0.019815
Something else,0.035019,0.016881,0.018138
"Other doctoral degree (Ph.D., Ed.D., etc.)",0.020428,0.03056,-0.010132
"Professional degree (JD, MD, etc.)",0.01751,0.014833,0.002677
