In [1]:
# Importing Libraries

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sb
import plotly.offline as po
import plotly.graph_objs as go
po.init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings('ignore')

# Loading Data
mulchrep = pd.read_csv('/datasets/multipleChoiceResponses.csv', encoding='ISO-8859-1')
mulchrep.head()

Unnamed: 0,GenderSelect,Country,Age,EmploymentStatus,StudentStatus,LearningDataScience,CodeWriter,CareerSwitcher,CurrentJobTitleSelect,TitleFit,...,JobFactorExperienceLevel,JobFactorDepartment,JobFactorTitle,JobFactorCompanyFunding,JobFactorImpact,JobFactorRemote,JobFactorIndustry,JobFactorLeaderReputation,JobFactorDiversity,JobFactorPublishingOpportunity
0,"Non-binary, genderqueer, or gender non-conforming",,,Employed full-time,,,Yes,,DBA/Database Engineer,Fine,...,,,,,,,,,,
1,Female,United States,30.0,"Not employed, but looking for work",,,,,,,...,,,,,,,,Somewhat important,,
2,Male,Canada,28.0,"Not employed, but looking for work",,,,,,,...,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important
3,Male,United States,56.0,"Independent contractor, freelancer, or self-em...",,,Yes,,Operations Research Practitioner,Poorly,...,,,,,,,,,,
4,Male,Taiwan,38.0,Employed full-time,,,Yes,,Computer Scientist,Fine,...,,,,,,,,,,


In [2]:
colors = ['aqua', 'lightgrey', 'lightgreen', '#D0F9B1', 'khaki', 'grey']
mulchrep['GenderSelect'] = np.where(mulchrep['GenderSelect'] == 'Non-binary, genderqueer, or gender non-conforming', 'Non-binary',
                                    mulchrep['GenderSelect'])

gender = mulchrep['GenderSelect'].value_counts()
label = gender.index
size = gender.values

trace = go.Pie(
    labels=label,
    values=size,
    marker=dict(colors=colors)
)

data = [trace]
layout = go.Layout(title = 'Gender Distribution')

fig = go.Figure(data = data, layout=layout)
po.iplot(fig)

**Top 5 countries**
1. USA - 4197 participants
2. India - 2704 participants
3. Russia - 578 participants
4. United Kingdom - 535  participants
5. China - 471 participants

In [5]:

df = pd.DataFrame(mulchrep.Country.value_counts())
df['country'] = df.index
df.columns = ['number', 'country']
df = df.reset_index().drop('index', axis=1)

data = [dict(
    type = 'choropleth',
    locations = df['country'],
    locationmode = 'country names',
    z = df['number'],
    text = df['country'],
    colorscale = [[
        0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],
        [0.5,"rgb(70, 100, 245)"],[0.6,"rgb(90, 120, 245)"],
        [0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
    autocolorscale = False,
    reversescale = True,
    marker = dict(
        line = dict(
            color = 'rgb(180,180,180)',
            width = 0.5
        )
    ),
    colorbar = dict(
        autotick = False,
        tickprefix = '$',
        title = 'Survey Respondents'
    )
)]

layout = dict(
    title = 'The Nationality of Respondents',
    geo = dict(
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict(data=data, layout=layout)
po.iplot(fig, validate=False)

**Age Distribution in 2017 (left) vs 2018 (right):**

* 18-21: **7.2% - 12.7%**
* 22-24: **14.9% - 21.5%**
* 25-29: **25.9% - 25.8%**
* 30-34: **18.5% - 15.8%**
* 35-39: **12.6% - 9.4%**
* 40-44: **7.7% - 5.7%**
* 44+: **12.9% - 8.4%**

In [6]:
mulchrep = mulchrep[(mulchrep['Age'] < 75) & (mulchrep['Age'] > 18)]
mulchrep

Unnamed: 0,GenderSelect,Country,Age,EmploymentStatus,StudentStatus,LearningDataScience,CodeWriter,CareerSwitcher,CurrentJobTitleSelect,TitleFit,...,JobFactorExperienceLevel,JobFactorDepartment,JobFactorTitle,JobFactorCompanyFunding,JobFactorImpact,JobFactorRemote,JobFactorIndustry,JobFactorLeaderReputation,JobFactorDiversity,JobFactorPublishingOpportunity
1,Female,United States,30.0,"Not employed, but looking for work",,,,,,,...,,,,,,,,Somewhat important,,
2,Male,Canada,28.0,"Not employed, but looking for work",,,,,,,...,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important,Very Important
3,Male,United States,56.0,"Independent contractor, freelancer, or self-em...",,,Yes,,Operations Research Practitioner,Poorly,...,,,,,,,,,,
4,Male,Taiwan,38.0,Employed full-time,,,Yes,,Computer Scientist,Fine,...,,,,,,,,,,
5,Male,Brazil,46.0,Employed full-time,,,Yes,,Data Scientist,Fine,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16710,Male,People 's Republic of China,23.0,"Not employed, but looking for work",,,,,,,...,,,,,,,,,,
16711,Female,Other,24.0,"Not employed, but looking for work",,,,,,,...,,,,,,,,,,
16712,Male,Indonesia,25.0,Employed full-time,,,Yes,,Programmer,Fine,...,,,,,,,,,,
16713,Female,Taiwan,25.0,Employed part-time,,,No,Yes,,,...,,,,,,,,,,
