In [41]:
# Employee Attrition Analysis
# Attrition--> employee ,company leave
# Quaterly basis, year basis
# Employee attrition analysis is a type of behavioural analysis where we study the behaviour and characteristics of 
# the employees who left the organization and compare their characteristics with the current employees to find the 
# employees who may leave the organization soon.

# A high rate of attrition of employees can be expensive for any company in terms of recruitment and training costs 
# Loss of productivity and morale reduction of employees. By identifying the causes of attrition, a company
# can take measures to reduce the attrition of employees and maintain precious employees

In [42]:
import pandas as pd

In [43]:
df=pd.read_csv("Attrition - Attrition.csv")

In [44]:
df.head(2)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7


In [45]:
df.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField', 'EmployeeCount',
       'EmployeeNumber', 'EnvironmentSatisfaction', 'Gender', 'HourlyRate',
       'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',
       'MaritalStatus', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',
       'Over18', 'OverTime', 'PercentSalaryHike', 'PerformanceRating',
       'RelationshipSatisfaction', 'StandardHours', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')

In [46]:
df.shape

(1470, 35)

In [47]:
# !pip install plotly

In [48]:
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default="plotly_white"

In [49]:
# Filter the data to show only "Yes" values in th "Attrition" column

In [50]:
attr_df=df[df["Attrition"]=="Yes"]

In [51]:
attr_df

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
14,28,Yes,Travel_Rarely,103,Research & Development,24,3,Life Sciences,1,19,...,2,80,0,6,4,3,4,2,0,3
21,36,Yes,Travel_Rarely,1218,Sales,9,4,Life Sciences,1,27,...,2,80,0,10,4,3,5,3,0,3
24,34,Yes,Travel_Rarely,699,Research & Development,6,1,Medical,1,31,...,3,80,0,8,2,3,4,2,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1438,23,Yes,Travel_Frequently,638,Sales,9,3,Marketing,1,2023,...,1,80,1,1,3,2,1,0,1,0
1442,29,Yes,Travel_Rarely,1092,Research & Development,1,4,Medical,1,2027,...,2,80,3,4,3,4,2,2,2,2
1444,56,Yes,Travel_Rarely,310,Research & Development,7,2,Technical Degree,1,2032,...,4,80,1,14,4,1,10,9,9,8
1452,50,Yes,Travel_Frequently,878,Sales,1,4,Life Sciences,1,2044,...,4,80,2,12,3,3,6,3,0,1


In [52]:
# Department Wise Attrition

In [53]:
atr_by_dpt=attr_df.groupby("Department").size().reset_index(name="count")

In [54]:
atr_by_dpt

Unnamed: 0,Department,count
0,Human Resources,12
1,Research & Development,133
2,Sales,92


In [66]:
# Create a donut chart
fig = go.Figure(data=[go.Pie(
    labels=atr_by_dpt["Department"],
    values=atr_by_dpt["count"],
    hole=0.4,
    marker=dict(colors=["red","green"]),
    textposition="inside"
)])
#Update the layout
fig.update_layout(title="Employee Attrition by Department")
#show teh chart
fig.show()

In [56]:
# Attrition by EducationField

In [57]:
atr_by_edu=attr_df.groupby("EducationField").size().reset_index(name="count")

In [58]:
atr_by_edu

Unnamed: 0,EducationField,count
0,Human Resources,7
1,Life Sciences,89
2,Marketing,35
3,Medical,63
4,Other,11
5,Technical Degree,32


In [59]:
# Create a donut chart
fig = go.Figure(data=[go.Pie(
    labels=atr_by_edu["EducationField"],
    values=atr_by_edu["count"],
    hole=0.4,
    marker=dict(colors=["red","green"]),
    textposition="inside"
)])
#Update the layout
fig.update_layout(title="Employee Attrition by EducationField")
#show the chart
fig.show()

In [60]:
# Attrition by MaritalStatus


In [61]:
attr_df.head(4)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
14,28,Yes,Travel_Rarely,103,Research & Development,24,3,Life Sciences,1,19,...,2,80,0,6,4,3,4,2,0,3
21,36,Yes,Travel_Rarely,1218,Sales,9,4,Life Sciences,1,27,...,2,80,0,10,4,3,5,3,0,3


In [62]:
attr_df_MS=attr_df.groupby("MaritalStatus").size().reset_index(name="count")

In [63]:
# Create a donut chart
fig = go.Figure(data=[go.Pie(
    labels=attr_df_MS["MaritalStatus"],
    values=attr_df_MS["count"],
    hole=0.4,
    marker=dict(colors=["red","green"]),
    textposition="inside"
)])
#Update the layout
fig.update_layout(title="Employee Attrition by Department")
#show teh chart
fig.show()