## Job Satisfaction for dvelopers:

Stackoverflow, one of the largest online communities for developers, has been conducting surveys on developers every year since 2010, targeting a variety of questions. In this article, I will focus on job satisfaction for developers in the developer survey of 2018 and try to extract insights into this matter. The data for this was taken from Kaggle datasets.

In [1]:
import pandas as pd
import itertools
import numpy as np
import pandas as pd
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected=True)  
from plotly.tools import FigureFactory as ff
import pycountry
import random
import squarify
from collections import Counter
import warnings


In [10]:
# Auxilary functions
def remove_coma(val):
    value = val.replace(",","")
    return value

def get_merged_catColumns(df,col1,col2,colcounter,col1sorter = None, col2sorter = None):
    
    tempdf = df.copy()
    if col1sorter:
        tempdf[col1] = tempdf[col1].astype("category")
        tempdf[col1] = tempdf[col1].cat.set_categories(col1sorter)
        tempdf = tempdf.sort_values(by=col1,ascending=False)
    if col2sorter:
        tempdf[col2] = tempdf[col2].astype("category")
        tempdf[col2] = tempdf[col2].cat.set_categories(col2sorter)
        tempdf = tempdf.sort_values(by=col2,ascending=False)
    
    gp = tempdf.groupby([col1,col2])[colcounter].agg(["count"]).reset_index()
    g2 = tempdf[tempdf[col2].notnull()].groupby(col1)[colcounter].agg(["count"]).reset_index()
    merged = pd.merge(gp,g2,on=col1, how = "left")
    merged["percentage"] =  (merged["count_x"]/merged["count_y"]) * 100
    gp = merged.groupby([col1,col2])["percentage"].sum()
    return gp


def random_colors(number_of_colors):
    color = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
                 for i in range(number_of_colors)]
    return color



def graph(datacolumns,type_of_graph, top = 0, title = None):
    data_frame = datacolumns.value_counts()
    layout = go.Layout()
    
    if type_of_graph == 'gpbarv':
        data = [go.Bar(
                x=datacolumns.index,
                y=datacolumns.values,
                marker=dict(color=random_colors(10), line=dict(color='rgb(8,48,107)',width=1.5,)),
                opacity = 0.6
        )]  

    elif type_of_graph == 'group':
        data = []
        marker = dict(colors = random_colors(20)) 
        layout = go.Layout(barmode='group')
        for g in datacolumns.index.levels[1]:
            gforsats = datacolumns.xs(g, level=1, drop_level=False)
            data.append(go.Bar(
                x= datacolumns.index.levels[0],
                y= gforsats.values,
                name = g))
            
    elif type_of_graph == 'stack':
        data = []
        marker = dict(colors = random_colors(20)) 
        layout = go.Layout(barmode='stack')
        for g in datacolumns.index.levels[1]:
            gforsats = datacolumns.xs(g, level=1, drop_level=False)
            data.append(go.Bar(
                x= datacolumns.index.levels[0],
                y= gforsats.values,
                name = g))    

    elif type_of_graph == 'pie':
        data = [go.Pie(
            labels = data_frame.index,
            values = data_frame.values,
            marker = dict(colors = random_colors(20)),
            textfont = dict(size = 20)
        )]
        
    layout.title = title  
    fig = go.Figure(data = data, layout = layout)
    py.iplot(fig)
    
def bubble_chart(col):
    data = get_list(df[col].dropna())
    data = data[:10]
    data = data.reindex(index=data.index[::-1])

    size = np.array(data[1]*0.001)
    size
    trace0 = go.Scatter(
        x=data[0],
        y=data[1],
        mode='markers',
        marker=dict(color = random_colors(10),size= size)
    )

    data = [trace0]
    py.iplot(data)

In [3]:
surveydf = pd.read_csv("survey_results_public.csv", low_memory=False)
surveydf["JobSatisfaction"].head(10)

0                   Extremely satisfied
1               Moderately dissatisfied
2                  Moderately satisfied
3    Neither satisfied nor dissatisfied
4                    Slightly satisfied
5                  Moderately satisfied
6                    Slightly satisfied
7                    Slightly satisfied
8                  Moderately satisfied
9                                   NaN
Name: JobSatisfaction, dtype: object

In [21]:
graph(surveydf["JobSatisfaction"],"pie", title="Percentage of respondents by their levels of job satisfaction")

In [11]:
satsorter = ['Extremely dissatisfied','Moderately dissatisfied','Slightly dissatisfied','Neither satisfied nor dissatisfied',
             'Slightly satisfied','Moderately satisfied','Extremely satisfied']

agesorter = ['Under 18 years old','18 - 24 years old','25 - 34 years old', '35 - 44 years old','45 - 54 years old', '55 - 64 years old',
             '65 years or older']

lastjobsorter = ["I've never had a job",'Less than a year ago','Between 1 and 2 years ago', 
                 'Between 2 and 4 years ago', 'More than 4 years ago']

companysizesorter = ['Fewer than 10 employees','10 to 19 employees','20 to 99 employees','100 to 499 employees', 
                     '500 to 999 employees', '1,000 to 4,999 employees','5,000 to 9,999 employees',
                     '10,000 or more employees']

merged = get_merged_catColumns(surveydf, "Age", "JobSatisfaction", "Respondent", col1sorter = agesorter, col2sorter=satsorter)
graph(merged,"group", title = "Job Satisfaction for different age groups")

merged = get_merged_catColumns(surveydf, "Employment", "JobSatisfaction", "Respondent", col1sorter = None, col2sorter=satsorter)
graph(merged,"group", title = "Job Satisfaction for different types of employment")

merged = get_merged_catColumns(surveydf, "LastNewJob", "JobSatisfaction", "Respondent", col1sorter = lastjobsorter, col2sorter=satsorter)
graph(merged,"group")

merged = get_merged_catColumns(surveydf, "CompanySize", "JobSatisfaction", "Respondent", col1sorter = companysizesorter, col2sorter=satsorter)
graph(merged,"group", title = "Job Satisfaction per company size")

merged = get_merged_catColumns(surveydf, "EthicsChoice", "JobSatisfaction", "Respondent", col1sorter = None, col2sorter=satsorter)
graph(merged,"group")


In [12]:
gp = surveydf.groupby("JobSatisfaction")["ConvertedSalary"].median()
graph(gp,"gpbarv",title = "Median salary by levels of job satisfaction")

In [13]:
Jobsat_Encoding = { 'Extremely dissatisfied' : 1,
                    'Moderately dissatisfied' : 5,
                    'Slightly dissatisfied':8,
                    'Neither satisfied nor dissatisfied':11,
                    'Slightly satisfied':14,
                    'Moderately satisfied' : 17,
                    'Extremely satisfied' : 21,
                     }

countries = surveydf[surveydf["JobSatisfaction"].notnull()][['Country','JobSatisfaction']]
countries["JobSatisfaction"] = countries["JobSatisfaction"].apply(lambda x: Jobsat_Encoding[x])
countries = countries.groupby("Country")["JobSatisfaction"].agg(['mean']).reset_index()


countries.loc[2]['code'] = ''
for i,country in enumerate(countries['Country']):
    user_input = country
    mapping = {country.name: country.alpha_3 for country in pycountry.countries}
    countries.set_value(i, 'code', mapping.get(user_input))
data = [ dict(
        type = 'choropleth',
        locations = countries['code'],
        z = countries['mean'],
        text = countries['Country'],
        autocolorscale = True,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            title = 'Job Satisfaction Degree'),
      ) ]

layout = dict(
    title = 'Average Satisfaction Degree',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead



In [14]:
cc = countries.sort_values(by="mean",ascending = False).reset_index()
cc

Unnamed: 0,index,Country,mean,code
0,94,Marshall Islands,21.000000,MHL
1,149,Tajikistan,19.666667,TJK
2,26,Burundi,17.000000,BDI
3,128,San Marino,17.000000,SMR
4,143,Suriname,17.000000,SUR
5,4,Angola,17.000000,AGO
6,136,Solomon Islands,17.000000,SLB
7,132,Sierra Leone,17.000000,SLE
8,50,Fiji,16.666667,FJI
9,3,Andorra,16.333333,AND


In [15]:
Jobsat_Encoding = {'Extremely dissatisfied' : 1,
                    'Moderately dissatisfied' : 5,
                    'Slightly dissatisfied':8,
                    'Neither satisfied nor dissatisfied':11,
                    'Slightly satisfied':14,
                    'Moderately satisfied' : 17,
                    'Extremely satisfied' : 21,
                     }
yearcCoding_Encoding = {'3-5 years':2, '30 or more years':11, '24-26 years':9, '18-20 years':7,
       '6-8 years':3, '9-11 years':4, '0-2 years':1, '15-17 years':6,
       '12-14 years':5, '21-23 years':8, '27-29 years':10 }

skipMealsDict = {'Never':0, '3 - 4 times per week':3.5, '1 - 2 times per week':1.5,
       'Daily or almost every day':7}

companySizedict = {'20 to 99 employees':3, '10,000 or more employees':15000,
       '100 to 499 employees':300, '10 to 19 employees':15,
       '500 to 999 employees':750, '1,000 to 4,999 employees':3000,
       '5,000 to 9,999 employees':7500, 'Fewer than 10 employees':5}

layout = go.Layout(
                    scene = dict(
                    xaxis = dict(
                        ticktext= list(yearcCoding_Encoding.keys()),
                        tickvals= list(yearcCoding_Encoding.values()),
                        ticks='outside'),
                    yaxis = dict(
                        ticktext= list(Jobsat_Encoding.keys()),
                        tickvals= list(Jobsat_Encoding.values()),
                        ticks='outside')
                    ),
                    margin=dict(
                    r=15, l=15,
                    b=10, t=10)
                )


df = surveydf[(surveydf["CurrencySymbol"] == "USD") & (surveydf["ConvertedSalary"] <800000)][["ConvertedSalary","YearsCoding","JobSatisfaction","CareerSatisfaction","CompanySize"]].dropna()
df["jsNumber"] = df["JobSatisfaction"].apply(lambda x: Jobsat_Encoding[x])


trace1 = go.Scatter3d(
    x=df["YearsCoding"].values,
    y=df["jsNumber"].values,
    z=df["ConvertedSalary"].values,
    mode='markers',
    marker=dict(
        size=4,
        color=df["jsNumber"].values * 2,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)

fig2 = go.Figure(data=[trace1], layout=layout)
py.iplot(fig2)

In [16]:
import itertools 
devtypes = surveydf[surveydf['PlatformWorkedWith'].notnull()]['PlatformWorkedWith'].apply(lambda x: x.split(";"))
devtypeslist = list(set(itertools.chain(*devtypes)))

for t in devtypeslist:
    print(t)
    graph(surveydf[(surveydf["FrameworkWorkedWith"].notnull()) & (surveydf["FrameworkWorkedWith"].str.contains(t))]["JobSatisfaction"],"pie")

In [18]:
import plotly.figure_factory as ff

satisfaction_levels = ['Extremely dissatisfied' ,
                    'Moderately dissatisfied',
                    'Slightly dissatisfied',
                    'Neither satisfied nor dissatisfied',
                    'Slightly satisfied',
                    'Moderately satisfied',
                    'Extremely satisfied'
     ]


tups = dict({})
for j in satisfaction_levels:
    for c in satisfaction_levels:
        tups[(j,c)] = df[(df["JobSatisfaction"] == j) & (df["CareerSatisfaction"] == c)]["JobSatisfaction"].count()
        
zcolors=[list(tups.values())[(i*7):(i+1)*7] for i in range(0,7)]
fig = ff.create_annotated_heatmap(showscale=True, z = (zcolors),x=list(satisfaction_levels),y=list(satisfaction_levels), colorscale='Viridis')

fig.layout.xaxis = dict(
        title='Career Satisfaction',
        automargin = True
    )
fig.layout.yaxis = dict(
        title='Job Satisfaction',
        automargin = True
    )
fig.layout.title = "Heatmap for the relationship between job and career satisfaction"
py.iplot(fig)