## Part 1: Preprocessing

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [2]:
# Determine the number of unique values in each column.
attrition_df.nunique()

Age                         43
Attrition                    2
BusinessTravel               3
Department                   3
DistanceFromHome            29
Education                    5
EducationField               6
EnvironmentSatisfaction      4
HourlyRate                  71
JobInvolvement               4
JobLevel                     5
JobRole                      9
JobSatisfaction              4
MaritalStatus                3
NumCompaniesWorked          10
OverTime                     2
PercentSalaryHike           15
PerformanceRating            2
RelationshipSatisfaction     4
StockOptionLevel             4
TotalWorkingYears           40
TrainingTimesLastYear        7
WorkLifeBalance              4
YearsAtCompany              37
YearsInCurrentRole          19
YearsSinceLastPromotion     16
YearsWithCurrManager        18
dtype: int64

In [3]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[['Attrition', 'Department']]


In [4]:
# Create a list of at least 10 column names to use as X data
feature_cols = ['Education', 'Age', 'DistanceFromHome', 'JobSatisfaction', 'OverTime', 
                'StockOptionLevel', 'WorkLifeBalance', 'YearsAtCompany', 
                'YearsSinceLastPromotion', 'NumCompaniesWorked']


# Create X_df using your selected columns
X_df = attrition_df[feature_cols]

# Show the data types for X_df
data_types = X_df.dtypes
print(data_types)


Education                   int64
Age                         int64
DistanceFromHome            int64
JobSatisfaction             int64
OverTime                   object
StockOptionLevel            int64
WorkLifeBalance             int64
YearsAtCompany              int64
YearsSinceLastPromotion     int64
NumCompaniesWorked          int64
dtype: object


In [5]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=42)


In [6]:
# Convert your X data to numeric data types however you see fit
# Add new code cells as necessary


In [32]:
# Convert 'OverTime' to numeric
# Convert 'OverTime' to numeric
attrition_df['OverTime'] = attrition_df['OverTime'].map({'Yes': 1, 'No': 0})
print(attrition_df['OverTime'].value_counts())

OverTime
0    1054
1     416
Name: count, dtype: int64


In [8]:
# Create a StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler to the training data
scaler.fit(X_train)

# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [19]:
# Create a OneHotEncoder for the Department column
from sklearn.preprocessing import OneHotEncoder
dept_encoder = OneHotEncoder()

# Fit the encoder to the training data
dept_encoder.fit(y_train[['Department']])

# Create two new variables by applying the encoder
# to the training and testing data
dept_train = dept_encoder.transform(y_train[['Department']]).toarray()
dept_test = dept_encoder.transform(y_test[['Department']]).toarray()



In [20]:
# Create a OneHotEncoder for the Attrition column
attr_encoder = OneHotEncoder()

# Fit the encoder to the training data
attr_encoder.fit(y_train[['Attrition']])

# Create two new variables by applying the encoder to the training and testing data
attr_train = attr_encoder.transform(y_train[['Attrition']]).toarray()
attr_test = attr_encoder.transform(y_test[['Attrition']]).toarray()

## Create, Compile, and Train the Model

In [21]:
# Find the number of columns in the X training data
n_cols = X_train.shape[1]

# Create the input layer
input = layers.Input(shape=(n_cols,))

# Create the shared layers
shared1 = layers.Dense(64, activation='relu')(input)
shared2 = layers.Dense(128, activation='relu')(shared1)

In [23]:
# Create the output layer
# Create a branch for Department with a hidden layer and an output layer
dept_hidden = layers.Dense(32, activation='relu')(shared2)
dept_output = layers.Dense(3, activation='softmax', name='department_output')(dept_hidden)



In [22]:
# Create a branch for Attrition
# with a hidden layer and an output layer

# Create the hidden layer


# Create the output layer
# Create a branch for Attrition with a hidden layer and an output layer
attr_hidden = layers.Dense(32, activation='relu')(shared2)
attr_output = layers.Dense(2, activation='sigmoid', name='attrition_output')(attr_hidden)


In [26]:
# Create the model
model = Model(inputs=input, outputs=[dept_output, attr_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'department_output': 'categorical_crossentropy',
                    'attrition_output': 'categorical_crossentropy'},
              metrics={'department_output': 'accuracy',
                       'attrition_output': 'accuracy'})
# Summarize the model
model.summary()


In [27]:
# Train the model
model.fit(X_train_scaled, [dept_train, attr_train], epochs=100)


Epoch 1/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - attrition_output_accuracy: 0.6868 - department_output_accuracy: 0.5404 - loss: 1.5398
Epoch 2/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.8403 - department_output_accuracy: 0.6645 - loss: 1.1711 
Epoch 3/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.8754 - department_output_accuracy: 0.6345 - loss: 1.1368 
Epoch 4/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.8522 - department_output_accuracy: 0.6495 - loss: 1.1075 
Epoch 5/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.8628 - department_output_accuracy: 0.6442 - loss: 1.1050 
Epoch 6/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.8659 - departme

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.9830 - department_output_accuracy: 0.9181 - loss: 0.3148 
Epoch 48/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.9921 - department_output_accuracy: 0.9068 - loss: 0.3190 
Epoch 49/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.9887 - department_output_accuracy: 0.9247 - loss: 0.2894
Epoch 50/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.9918 - department_output_accuracy: 0.9255 - loss: 0.2686
Epoch 51/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.9951 - department_output_accuracy: 0.9354 - loss: 0.2497
Epoch 52/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.9921 - department_output

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 0.9987 - department_output_accuracy: 0.9997 - loss: 0.0359 
Epoch 94/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 1.0000 - department_output_accuracy: 0.9982 - loss: 0.0296 
Epoch 95/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 1.0000 - department_output_accuracy: 0.9979 - loss: 0.0348 
Epoch 96/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 1.0000 - department_output_accuracy: 0.9976 - loss: 0.0313 
Epoch 97/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 1.0000 - department_output_accuracy: 0.9962 - loss: 0.0257
Epoch 98/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - attrition_output_accuracy: 1.0000 - department_outp

<keras.src.callbacks.history.History at 0x1e6814e1050>

In [16]:
# Evaluate the model with the testing data
model.evaluate(X_test_scaled, [dept_test, attr_test])

In [29]:
# Print the accuracy for both department and attrition
print("Department predictions accuracy:", model.evaluate(X_test_scaled, [dept_test, attr_test])[1])
print("Attrition predictions accuracy:", model.evaluate(X_test_scaled, [dept_test, attr_test])[2])

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.7867 - department_output_accuracy: 0.5509 - loss: 4.7897 
Department predictions accuracy: 0.795918345451355
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.7867 - department_output_accuracy: 0.5509 - loss: 4.7897 
Attrition predictions accuracy: 0.5782312750816345


# Summary

In the provided space below, briefly answer the following questions.

1. Is accuracy the best metric to use on this data? Why or why not?

2. What activation functions did you choose for your output layers, and why?

3. Can you name a few ways that this model might be improved?

YOUR ANSWERS HERE

1. If the model is trying to predict things like department and attrition, accuracy can be a good measuring stick when the groups are about the same size. But if one group is way bigger or smaller than the others, accuracy might not give you the full picture.

You also have to think about what it means for the business. Like with attrition, saying an employee will stay when they actually quit could cost more than the other way around. So in cases like that, it's better to look at other metrics that focus on the smaller attrition group to really see how well the model performs.

2.For the department output layer, I chose the softmax activation function because it is commonly used for multi-class classification tasks. Softmax outputs a probability distribution over the classes, ensuring that the sum of the probabilities adds up to 1, making it suitable for predicting one department out of multiple possible departments.
For the attrition output layer, I chose the sigmoid activation function because it is commonly used for binary classification tasks. Sigmoid maps the input values to a range between 0 and 1, representing the probability of the positive class (attrition). A threshold is then applied to make the final classification decision, making it suitable for predicting whether an employee will leave the company or not.

3. To improve this multi-output model, one could experiment with different architectures, perform feature engineering, tune hyperparameters, use ensemble methods or advanced models like CNNs or RNNs, and collect more diverse training data.