# ***Deep Learning***

In [None]:
import numpy as np
import pandas as pd


# ***Employee Attrition Analysis***

***Employee Attrition Analysis is a type of behavioural analysis where we study the behavious and characterstics of the employees who left the organization and compare their charactestics with the current employees to find the employees who may leave the organization soon.***

In [None]:

df=pd.read_csv("/content/Attrition.csv")
df.head(3)


Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"


In [None]:
df.isnull().sum()

Age                         0
Attrition                   0
BusinessTravel              0
DailyRate                   0
Department                  0
DistanceFromHome            0
Education                   0
EducationField              0
EmployeeCount               0
EmployeeNumber              0
EnvironmentSatisfaction     0
Gender                      0
HourlyRate                  0
JobInvolvement              0
JobLevel                    0
JobRole                     0
JobSatisfaction             0
MaritalStatus               0
MonthlyIncome               0
MonthlyRate                 0
NumCompaniesWorked          0
Over18                      0
OverTime                    0
PercentSalaryHike           0
PerformanceRating           0
RelationshipSatisfaction    0
StandardHours               0
StockOptionLevel            0
TotalWorkingYears           0
TrainingTimesLastYear       0
WorkLifeBalance             0
YearsAtCompany              0
YearsInCurrentRole          0
YearsSince

In [None]:
# Check percentage of Attrition by department


In [None]:
# Filter the data to show only "Yes" values in "Attririon" Column.

In [None]:
attr_df = df[df["Attrition"] == "Yes"]

In [None]:
# Calculate the attrition by department

In [None]:
attrition_by_dpt = attr_df.groupby(['Department']).size().reset_index(name='Count')
attrition_by_dpt

Unnamed: 0,Department,Count
0,Human Resources,12
1,Research & Development,133
2,Sales,92


In [None]:
# Create a Donut chart
fig = go.Figure(data=[go.Pie(
                               labels=attrition_by_dpt['Department'],
                               values=attrition_by_dpt['Count'],
                               hole = 0.4,
                               marker = dict(colors=['#3CAEA3', '#F6D55C']),
                               textposition='inside')])


# Update the Layout
fig.update_layout(title='Attrition by Department', font = dict(size=16),
                  legend=dict(orientation="h",yanchor="bottom",y=1.02,xanchor="right",x=1))

# Show the plot
fig.show()

In [None]:


attrition_by_dpt = attr_df.groupby(['EducationField']).size().reset_index(name='Count')
attrition_by_dpt

Unnamed: 0,EducationField,Count
0,Human Resources,7
1,Life Sciences,89
2,Marketing,35
3,Medical,63
4,Other,11
5,Technical Degree,32


In [None]:
# Create a Donut chart
fig = go.Figure(data=[go.Pie(
                               labels=attrition_by_dpt['EducationField'],
                               values=attrition_by_dpt['Count'],
                               hole = 0.4,
                               marker = dict(colors=['pink', '#F6D55C']),
                               textposition='inside')])


# Update the Layout
fig.update_layout(title='Attrition by EducationField', font = dict(size=16),
                  legend=dict(orientation="h",yanchor="bottom",y=1.02,xanchor="right",x=1))

# Show the plot
fig.show()

***We can see that the employees with Life Sciences as an education field have a high attrition rate. Now let's have a look at the percentage of attrition by number of years at the company***

In [None]:
attrition_by_dpt = attr_df.groupby(['YearsAtCompany']).size().reset_index(name='Count')
attrition_by_dpt



Unnamed: 0,YearsAtCompany,Count
0,0,16
1,1,59
2,2,27
3,3,20
4,4,19
5,5,21
6,6,9
7,7,11
8,8,9
9,9,8


In [None]:
attrition_by_dpt = attr_df.groupby(['YearsSinceLastPromotion']).size().reset_index(name='Count')
attrition_by_dpt

Unnamed: 0,YearsSinceLastPromotion,Count
0,0,110
1,1,49
2,2,27
3,3,9
4,4,5
5,5,2
6,6,6
7,7,16
8,9,4
9,10,1


***We can see that the employees who don't get promotions leave the organization more compared to the employees who got promotion.Now let's have a look at the percentage of the attrition of gender***

In [None]:
attrition_by_dpt = attr_df.groupby(['Gender']).size().reset_index(name='Count')
attrition_by_dpt

Unnamed: 0,Gender,Count
0,Female,87
1,Male,150


In [None]:
# Create a Donut chart
fig = go.Figure(data=[go.Pie(
                               labels=attrition_by_dpt['Gender'],
                               values=attrition_by_dpt['Count'],
                               hole = 0.4,
                               marker = dict(colors=['blue', '#F6D55C']),
                               textposition='inside')])


# Update the Layout
fig.update_layout(title='Attrition by Gender', font = dict(size=16),
                  legend=dict(orientation="h",yanchor="bottom",y=1.02,xanchor="right",x=1))

# Show the plot
fig.show()

**Men have a high attrition rate compared to women. Now let's have a look at the attrition by analyzing the relationship between monthly income and the age of the employees:**

In [None]:
import plotly.express as px

In [None]:
fig = px.scatter(df, x="Age", y="MonthlyIncome", color="Attrition", trendline="ols")

fig.update_layout(title='Age vs Monthly income by Attrition', font = dict(size=16))
fig.show()

**We can see that as the age of the person increases, monthly income increases. We can also see a high rate of attrition among the employees with low monthly income**

In [None]:
!pip install tensorflow



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
df=pd.read_csv("/content/Attrition.csv")
df.head(3)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

In [None]:
df.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField', 'EmployeeCount',
       'EmployeeNumber', 'EnvironmentSatisfaction', 'Gender', 'HourlyRate',
       'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',
       'MaritalStatus', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',
       'Over18', 'OverTime', 'PercentSalaryHike', 'PerformanceRating',
       'RelationshipSatisfaction', 'StandardHours', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')

In [None]:
df = df.dropna()

In [None]:
x = df.drop('Attrition', axis=1)
y = df['Attrition']

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 35 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Age                       1470 non-null   int64 
 1   Attrition                 1470 non-null   object
 2   BusinessTravel            1470 non-null   object
 3   DailyRate                 1470 non-null   int64 
 4   Department                1470 non-null   object
 5   DistanceFromHome          1470 non-null   int64 
 6   Education                 1470 non-null   int64 
 7   EducationField            1470 non-null   object
 8   EmployeeCount             1470 non-null   int64 
 9   EmployeeNumber            1470 non-null   int64 
 10  EnvironmentSatisfaction   1470 non-null   int64 
 11  Gender                    1470 non-null   object
 12  HourlyRate                1470 non-null   int64 
 13  JobInvolvement            1470 non-null   int64 
 14  JobLevel                

In [None]:
categoraical_features = ['BusinessTravel', 'Department', 'EducationField', 'Gender', 'JobRole', 'MaritalStatus', 'Over18', 'OverTime']

In [None]:
x_encoded = pd.get_dummies(x, columns=categoraical_features, drop_first=True)

In [None]:
x_encoded

Unnamed: 0,Age,DailyRate,DistanceFromHome,Education,EmployeeCount,EmployeeNumber,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,...,JobRole_Laboratory Technician,JobRole_Manager,JobRole_Manufacturing Director,JobRole_Research Director,JobRole_Research Scientist,JobRole_Sales Executive,JobRole_Sales Representative,MaritalStatus_Married,MaritalStatus_Single,OverTime_Yes
0,41,1102,1,2,1,1,2,94,3,2,...,False,False,False,False,False,True,False,False,True,True
1,49,279,8,1,1,2,3,61,2,2,...,False,False,False,False,True,False,False,True,False,False
2,37,1373,2,2,1,4,4,92,2,1,...,True,False,False,False,False,False,False,False,True,True
3,33,1392,3,4,1,5,4,56,3,1,...,False,False,False,False,True,False,False,True,False,True
4,27,591,2,1,1,7,1,40,3,1,...,True,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,884,23,2,1,2061,3,41,4,2,...,True,False,False,False,False,False,False,True,False,False
1466,39,613,6,1,1,2062,4,42,2,3,...,False,False,False,False,False,False,False,True,False,False
1467,27,155,4,3,1,2064,2,87,4,2,...,False,False,True,False,False,False,False,True,False,True
1468,49,1023,2,3,1,2065,4,63,2,2,...,False,False,False,False,False,True,False,True,False,False


In [None]:
# encode target data
lb = LabelEncoder()

In [None]:
y_encoded = lb.fit_transform(y)
y_encoded

array([1, 0, 1, ..., 0, 0, 0])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_encoded, y_encoded, test_size=0.2, random_state=42)

In [None]:
x_train.head()

Unnamed: 0,Age,DailyRate,DistanceFromHome,Education,EmployeeCount,EmployeeNumber,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,...,JobRole_Laboratory Technician,JobRole_Manager,JobRole_Manufacturing Director,JobRole_Research Director,JobRole_Research Scientist,JobRole_Sales Executive,JobRole_Sales Representative,MaritalStatus_Married,MaritalStatus_Single,OverTime_Yes
1097,24,350,21,2,1,1551,3,57,2,1,...,True,False,False,False,False,False,False,False,False,False
727,18,287,5,2,1,1012,2,73,3,1,...,False,False,False,False,True,False,False,False,True,False
254,29,1247,20,2,1,349,4,45,3,2,...,False,False,False,False,False,True,False,False,False,False
1175,39,492,12,3,1,1654,4,66,3,2,...,False,False,True,False,False,False,False,True,False,False
1341,31,311,20,3,1,1881,2,89,3,2,...,True,False,False,False,False,False,False,False,False,False


In [None]:
sc = StandardScaler()

In [None]:
x_train_scaled = sc.fit_transform(x_train)
x_test_scaled = sc.transform(x_test)

***Now We will create architecture of our model***

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units = 128, activation='relu', input_shape=(x_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(units = 64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(units = 1, activation='sigmoid')
])

In [None]:
# Compile the model


In [None]:
loss_fn = tf.keras.losses.BinaryCrossentropy()
metrics = ['accuracy']

In [None]:
learing_rate= 0.001
momentum =0.9
optimizer = tf.keras.optimizers.SGD(learning_rate=learing_rate, momentum=momentum , nesterov=True)

In [None]:
model.compile(optimizer = optimizer, loss = loss_fn, metrics = metrics)

In [None]:
# Train the model

In [None]:
model.fit(x_train_scaled, y_train , epochs = 50 ,
          batch_size = 8 ,
          validation_split = 0.1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x78b71bf7af50>

In [None]:
loss , accuracy = model.evaluate(x_test_scaled, y_test)



In [None]:
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')


Test Loss: 0.3454
Test Accuracy: 0.8878
