# **Analysing Gender Pay Gap**

# *Importing Libraries*

In [1]:
import numpy as np # linear algebra
import pandas as pd
import plotly.graph_objs as go

%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.offline as py
import seaborn as sns

import pandas_profiling

import random
plt.style.use("fivethirtyeight")

import warnings
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# *Loading Dataset*

In [2]:
df = pd.read_csv('/kaggle/input/glassdoor-analyze-gender-pay-gap/Glassdoor Gender Pay Gap.csv')

# *Basic Data Analysis*

In [3]:
df.profile_report()

In [4]:
df.head()

In [5]:
df.describe()

In [6]:
df.info()

In [7]:
df['TotalPay'] = df['BasePay'] + df['Bonus']

In [8]:
print('Job Titles: ')
df.JobTitle.value_counts()

In [9]:
print('Education Level: ')
df.Education.value_counts()

# *Visualisation*

In [10]:
gender = df.groupby('Gender').count()

fig = go.Figure(data=[go.Bar(
            x = gender.index,
            y = gender['JobTitle'],
            #text=y,
            width=0.3,
            textposition='auto',
            marker=dict()
 )])

fig.data[0].marker.line.width = 1
fig.data[0].marker.line.color = "black"
fig.update_layout(yaxis=dict(title=''),width=500,height=500,
                  title= 'No of Male and Female Job Entries on the Dataset',
                  xaxis=dict(title='Gender'))
fig.show()

In [11]:
gender_dept = df.groupby(['Dept','Gender']).size().reset_index(name='counts')
fig = px.bar(gender_dept, x='Dept',y='counts',color='Gender',barmode='group', title ='Count Gender per Department')
fig.show()

In [12]:
title = pd.get_dummies(df, columns=['Gender']).groupby('Dept').sum()

female = go.Pie(labels=title.index,values=title['Gender_Female'],name="Female",hole=0.5,domain={'x': [0,0.46]})
male = go.Pie(labels=title.index,values=title['Gender_Male'],name="Male",hole=0.5,domain={'x': [0.52,1]})

layout = dict(title = 'Department Distribution', font=dict(size=14), legend=dict(orientation="h"),
              annotations = [dict(x=0.2, y=0.5, text='Female', showarrow=False, font=dict(size=20)),
                             dict(x=0.8, y=0.5, text='Male', showarrow=False, font=dict(size=20)) ])

fig = dict(data=[female, male], layout=layout)
py.iplot(fig)

In [13]:
gender_job = df.groupby(['JobTitle','Gender']).size().reset_index(name='counts')
fig = px.bar(gender_job, x='JobTitle',y='counts',color='Gender',barmode='group', title ='Count Gender per JobTitle')
fig.show()

In [14]:
title = pd.get_dummies(df, columns=['Gender']).groupby('JobTitle').sum()

female = go.Pie(labels=title.index,values=title['Gender_Female'],name="Female",hole=0.5,domain={'x': [0,0.46]})
male = go.Pie(labels=title.index,values=title['Gender_Male'],name="Male",hole=0.5,domain={'x': [0.52,1]})

layout = dict(title = 'Job Title Distribution', font=dict(size=14), legend=dict(orientation="h"),
              annotations = [dict(x=0.2, y=0.5, text='Female', showarrow=False, font=dict(size=20)),
                             dict(x=0.8, y=0.5, text='Male', showarrow=False, font=dict(size=20)) ])

fig = dict(data=[female, male], layout=layout)
py.iplot(fig)