## Part 1: Preprocessing

In [47]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [48]:
# Determine the number of unique values in each column
attrition_df.nunique()

Unnamed: 0,0
Age,43
Attrition,2
BusinessTravel,3
Department,3
DistanceFromHome,29
Education,5
EducationField,6
EnvironmentSatisfaction,4
HourlyRate,71
JobInvolvement,4


In [49]:
attrition_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Age                       1470 non-null   int64 
 1   Attrition                 1470 non-null   object
 2   BusinessTravel            1470 non-null   object
 3   Department                1470 non-null   object
 4   DistanceFromHome          1470 non-null   int64 
 5   Education                 1470 non-null   int64 
 6   EducationField            1470 non-null   object
 7   EnvironmentSatisfaction   1470 non-null   int64 
 8   HourlyRate                1470 non-null   int64 
 9   JobInvolvement            1470 non-null   int64 
 10  JobLevel                  1470 non-null   int64 
 11  JobRole                   1470 non-null   object
 12  JobSatisfaction           1470 non-null   int64 
 13  MaritalStatus             1470 non-null   object
 14  NumCompaniesWorked      

In [50]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[['Attrition', 'Department']]
y_df.head()


Unnamed: 0,Attrition,Department
0,Yes,Sales
1,No,Research & Development
2,Yes,Research & Development
3,No,Research & Development
4,No,Research & Development


In [51]:
# Create a list of at least 10 column names to use as X data
column_names = ['Age', 'OverTime', 'DistanceFromHome', 'Education', 'EnvironmentSatisfaction', 'HourlyRate', 'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'YearsSinceLastPromotion']



# Create X_df using your selected columns
X_df = attrition_df[column_names]

X_df.head()


# Show the data types for X_df
X_df.dtypes


Unnamed: 0,0
Age,int64
OverTime,object
DistanceFromHome,int64
Education,int64
EnvironmentSatisfaction,int64
HourlyRate,int64
JobInvolvement,int64
JobLevel,int64
JobSatisfaction,int64
YearsSinceLastPromotion,int64


In [52]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X_df, y_df, random_state=78)


In [53]:
# Convert your X data to numeric data types however you see fit
X_df = pd.get_dummies(X_df,dtype="int")
X_df.head()
# Add new code cells as necessary



Unnamed: 0,Age,DistanceFromHome,Education,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,YearsSinceLastPromotion,OverTime_No,OverTime_Yes
0,41,1,2,2,94,3,2,4,0,0,1
1,49,8,1,3,61,2,2,2,1,1,0
2,37,2,2,4,92,2,1,3,0,0,1
3,33,3,4,4,56,3,1,3,3,0,1
4,27,2,1,1,40,3,1,2,2,1,0


In [54]:
X_df.dtypes

Unnamed: 0,0
Age,int64
DistanceFromHome,int64
Education,int64
EnvironmentSatisfaction,int64
HourlyRate,int64
JobInvolvement,int64
JobLevel,int64
JobSatisfaction,int64
YearsSinceLastPromotion,int64
OverTime_No,int64


In [55]:
# Create a StandardScaler
from sklearn.preprocessing import StandardScaler


# Fit the StandardScaler to the training data
X_df = StandardScaler().fit_transform(X_df)




# Scale the training and testing data
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, random_state=78)



In [56]:
from sklearn.preprocessing import OneHotEncoder

# Create a OneHotEncoder for the Department column
enc = OneHotEncoder(sparse_output=False, handle_unknown='ignore')


# Fit the encoder to the training data
enc.fit(y_train[['Department', 'Attrition']])


# Create two new variables by applying the encoder
# to the training and testing data
y_train_encoded = enc.transform(y_train[['Department', 'Attrition']])
y_test_encoded = enc.transform(y_test[['Department', 'Attrition']])


In [57]:
# Create a OneHotEncoder for the Attrition column
attrition_encoder = OneHotEncoder(sparse_output=False)


# Fit the encoder to the training data
y_train_attrition = attrition_encoder.fit(y_train[['Attrition']])




# Create two new variables by applying the encoder
# to the training and testing data
y_test_attrition = attrition_encoder.transform(y_test[['Attrition']])
y_train_attrition = attrition_encoder.transform(y_train[['Attrition']])

In [58]:
# Create a OneHotEncoder for the Attrition column
department_encoder = OneHotEncoder(sparse_output=False)


# Fit the encoder to the training data
y_train_department = department_encoder.fit(y_train[['Department']])




# Create two new variables by applying the encoder
# to the training and testing data
y_test_department = department_encoder.transform(y_test[['Department']])
y_train_department = department_encoder.transform(y_train[['Department']])

## Part 2: Create, Compile, and Train the Model

In [59]:
# Find the number of columns in the X training data.
input_nodes=X_train.shape[1]


# Create the input layer
input_layer = layers.Input(shape=(input_nodes,))


# Create at least two shared layers
shared_layer1 = layers.Dense(64, activation='relu')(input_layer)
shared_layer2 = layers.Dense(32, activation='relu')(shared_layer1)


In [60]:
y_train_department.shape

(1102, 3)

In [61]:
# Create a branch for Department
# with a hidden layer and an output layer
department_branch = layers.Dense(32, activation='relu')(shared_layer2)
department_output = layers.Dense(y_train_department.shape[1], activation='sigmoid')(department_branch)





# Create the hidden layer
hidden_layer = layers.Dense(64, activation='relu')(shared_layer2)


# Create the output layer
department_output = layers.Dense(y_train_department.shape[1], activation='sigmoid')(department_branch)


In [62]:
# Create a branch for Attrition
# with a hidden layer and an output layer
attrition_branch = layers.Dense(32, activation='relu')(shared_layer2)
attrition_output = layers.Dense(2, activation='softmax')(attrition_branch)

# Create the hidden layer
hidden_layer = layers.Dense(64, activation='relu')(shared_layer2)



In [66]:
# Create the model
model = Model(inputs=input_layer, outputs=[department_output, attrition_output])


# Compile the model
model.compile(optimizer='adam',
              loss={'dense_20': 'categorical_crossentropy',
                     'dense_22': 'binary_crossentropy'},
             metrics={'dense_20': 'accuracy',
                      'dense_22': 'accuracy'})


# Summarize the model
model.summary()


In [67]:
#Train the model
model.fit(X_train, {'dense_20': y_train_department, 'dense_22': y_train_attrition},
          epochs=100,
          batch_size=32,
          validation_split=0.2)



Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - dense_20_accuracy: 0.4422 - dense_20_loss: 1.0540 - dense_22_accuracy: 0.7758 - dense_22_loss: 0.6451 - loss: 1.6997 - val_dense_20_accuracy: 0.6606 - val_dense_20_loss: 0.8406 - val_dense_22_accuracy: 0.8235 - val_dense_22_loss: 0.5331 - val_loss: 1.3736
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - dense_20_accuracy: 0.6857 - dense_20_loss: 0.7940 - dense_22_accuracy: 0.8131 - dense_22_loss: 0.5218 - loss: 1.3157 - val_dense_20_accuracy: 0.6606 - val_dense_20_loss: 0.7564 - val_dense_22_accuracy: 0.8235 - val_dense_22_loss: 0.4479 - val_loss: 1.2050
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - dense_20_accuracy: 0.6398 - dense_20_loss: 0.7872 - dense_22_accuracy: 0.8298 - dense_22_loss: 0.4515 - loss: 1.2387 - val_dense_20_accuracy: 0.6606 - val_dense_20_loss: 0.7440 - val_dense_22_accuracy: 0.8235 - val_dense_22_lo

<keras.src.callbacks.history.History at 0x7dedb2165490>

In [69]:
# Evaluate the model with the testing data
model_loss, department_loss, attrition_loss, department_accuracy, attrition_accuracy = model.evaluate(
    X_test, {'dense_20': y_test_encoded[:, 0:3], 'dense_22': y_test_encoded[:, 3:]}, verbose=2
)


12/12 - 0s - 6ms/step - dense_20_accuracy: 0.5326 - dense_20_loss: 2.0625 - dense_22_accuracy: 0.8098 - dense_22_loss: 0.5874 - loss: 2.6616


In [70]:
# Print the accuracy for both department and attrition
print(f"Loss: {model_loss}, Department Loss: {department_loss}, Attrition Loss: {attrition_loss}")
print(f"Department Accuracy: {department_accuracy}, Attrition Accuracy: {attrition_accuracy}")


Loss: 2.6616005897521973, Department Loss: 2.0624969005584717, Attrition Loss: 0.5873993039131165
Department Accuracy: 0.532608687877655, Attrition Accuracy: 0.8097826242446899


# Summary

In the provided space below, briefly answer the following questions.

1. Is accuracy the best metric to use on this data? Why or why not?

2. What activation functions did you choose for your output layers, and why?

3. Can you name a few ways that this model might be improved?

YOUR ANSWERS HERE

1. No,the dataset is uneven because there is more people that would leave the company then staying making the accuracy the wrong metric to use on this dataset.
2. I used the softmax function for the department predition because it has a multi-class issue and it has three unique departments. And Sigmoid  can be used for the department output layer because it has it own unique singular possibilities.
3. There is probably a class mismatch in the dataset, especially related to the attrition forecast, which often shows more data for employees who stay than those who leave. The model may become skewed in favor of the larger class.