## Part 1: Preprocessing

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [2]:
# Determine the number of unique values in each column.
attrition_df.nunique()

Age                         43
Attrition                    2
BusinessTravel               3
Department                   3
DistanceFromHome            29
Education                    5
EducationField               6
EnvironmentSatisfaction      4
HourlyRate                  71
JobInvolvement               4
JobLevel                     5
JobRole                      9
JobSatisfaction              4
MaritalStatus                3
NumCompaniesWorked          10
OverTime                     2
PercentSalaryHike           15
PerformanceRating            2
RelationshipSatisfaction     4
StockOptionLevel             4
TotalWorkingYears           40
TrainingTimesLastYear        7
WorkLifeBalance              4
YearsAtCompany              37
YearsInCurrentRole          19
YearsSinceLastPromotion     16
YearsWithCurrManager        18
dtype: int64

In [3]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[["Attrition","Department"]]

In [4]:
# Create a list of at least 10 column names to use as X data
x_columns = ["Education","Age","DistanceFromHome","JobSatisfaction","OverTime","StockOptionLevel","WorkLifeBalance","YearsAtCompany","YearsSinceLastPromotion","NumCompaniesWorked"]


# Create X_df using your selected columns
X_df = pd.DataFrame(attrition_df, columns=x_columns)

# Show the data types for X_df
X_df.dtypes

Education                   int64
Age                         int64
DistanceFromHome            int64
JobSatisfaction             int64
OverTime                   object
StockOptionLevel            int64
WorkLifeBalance             int64
YearsAtCompany              int64
YearsSinceLastPromotion     int64
NumCompaniesWorked          int64
dtype: object

In [5]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, random_state=1)

In [6]:
# Convert your X data to numeric data types however you see fit
# Add new code cells as necessary
from sklearn.preprocessing import StandardScaler, LabelEncoder
X_train['OverTime'] = X_train['OverTime'].map({'Yes': 1, 'No': 0})
X_test['OverTime'] = X_test['OverTime'].map({'Yes': 1, 'No': 0})

X_train = X_train.fillna(0)         # Replace NaNs with 0s or some other value

X_test = X_test.fillna(0)         # Replace NaNs with 0s or some other value

print(X_train.shape)
print(X_test.shape)

(1102, 10)
(368, 10)


In [7]:
# Create a StandardScaler
scaler = StandardScaler().fit(X_train)

# Fit the StandardScaler to the training data
X_train_scaled = scaler.transform(X_train)

X_test_scaled = scaler.transform(X_test)

# Scale the training and testing data
X_train_scaled

array([[ 8.08431209e-02, -5.97353422e-04, -1.00933748e+00, ...,
        -1.42174403e-01, -3.50759886e-01,  5.05015829e-01],
       [ 1.07072222e+00, -1.09773647e+00, -7.61501047e-01, ...,
        -4.83145442e-01, -3.50759886e-01, -1.09305292e+00],
       [ 1.07072222e+00, -7.68594737e-01,  3.53762891e-01, ...,
        -9.94601999e-01, -6.63492406e-01, -6.93535736e-01],
       ...,
       [-9.09035982e-01,  3.28544382e-01, -3.89746401e-01, ...,
         2.41510839e+00,  1.52563523e+00, -6.93535736e-01],
       [ 8.08431209e-02,  6.57686118e-01,  8.49435752e-01, ...,
         1.73316631e+00, -3.50759886e-01,  9.04533017e-01],
       [-9.09035982e-01, -1.42687821e+00,  4.77681106e-01, ...,
        -9.94601999e-01, -6.63492406e-01, -6.93535736e-01]])

In [17]:
# Create a OneHotEncoder for the Department column
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder to the training data
enc.fit(y_train[['Department']])


# Create two new variables by applying the encoder
# to the training and testing data

y_train_Department_encoded = enc.transform(y_train[['Department']])

y_test_Department_encoded = enc.transform(y_test[['Department']])



In [18]:
# Create a OneHotEncoder for the Attrition column
from sklearn.preprocessing import OneHotEncoder
enc2 = OneHotEncoder(sparse=False)

# Fit the encoder to the training data
enc2.fit(y_train[['Attrition']])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_Attrition_encoded = enc2.transform(y_train[['Attrition']])

y_test_Attrition_encoded = enc2.transform(y_test[['Attrition']])



## Create, Compile, and Train the Model

In [29]:
# Find the number of columns in the X training data
number_input_features = len(X_train.columns)
number_input_features

# Create the input layer
input = layers.Input(shape=(number_input_features,))
print(number_input_features)

# Create at least two shared layers
shared_layer_1 = layers.Dense(128, activation='relu')(input)
shared_layer_2 = layers.Dense(64, activation='relu')(shared_layer_1)

10


In [30]:
# Create a branch for Department
# with a hidden layer and an output layer


# Create the hidden layer
department_hidden = layers.Dense(64, activation='relu')(shared_layer_2)


# Create the output layer
department_output = layers.Dense(3, activation='softmax',name='department_output')(department_hidden)


In [31]:
# Create a branch for Attrition
# with a hidden layer and an output layer

# Create the hidden layer
Attrition_hidden = layers.Dense(64, activation='relu')(shared_layer_2)

# Create the output layer
Attrition_output = layers.Dense(2, activation='softmax',name='Attrition_output')(Attrition_hidden)

In [32]:
# Create the model

nn_model = Model(inputs=input, outputs=[department_output, Attrition_output])


# Compile the model
loss={'department_output': 'binary_crossentropy', 'Attrition_output': 'binary_crossentropy'}
metrics={'department_output': ['accuracy'], 'Attrition_output': ['accuracy']}

nn_model.compile(optimizer='adam', loss=loss, metrics=metrics)


# Summarize the model
nn_model.summary()

In [33]:
# Train the model

nn_model.fit(X_train_scaled, [y_train_Department_encoded, y_train_Attrition_encoded], epochs=100, batch_size=32)

Epoch 1/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - Attrition_output_accuracy: 0.7391 - department_output_accuracy: 0.4526 - loss: 1.2197
Epoch 2/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - Attrition_output_accuracy: 0.8429 - department_output_accuracy: 0.6509 - loss: 0.9213
Epoch 3/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Attrition_output_accuracy: 0.8461 - department_output_accuracy: 0.6455 - loss: 0.8756
Epoch 4/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Attrition_output_accuracy: 0.8508 - department_output_accuracy: 0.6408 - loss: 0.8500
Epoch 5/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - Attrition_output_accuracy: 0.8678 - department_output_accuracy: 0.6779 - loss: 0.8042
Epoch 6/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - Attrition_output_accuracy: 0.8669 - department_o

<keras.src.callbacks.history.History at 0x1e91053bf10>

In [35]:
# Evaluate the model with the testing data
model_accuracy = nn_model.evaluate(X_test_scaled, [y_test_Department_encoded, y_test_Attrition_encoded], verbose=2)

12/12 - 0s - 39ms/step - Attrition_output_accuracy: 0.7853 - department_output_accuracy: 0.5625 - loss: 3.6393


In [45]:

# Print the accuracy for both department and attrition
print(f"Department Accuracy: {model_accuracy[2]*100}%") 
print(f"Attrition Accuracy: {model_accuracy[1]*100}%")

Department Accuracy: 56.25%
Attrition Accuracy: 78.53260636329651%


# Summary

In the provided space below, briefly answer the following questions.

### 1. Is accuracy the best metric to use on this data? Why or why not?
- Accuracy is a good metric for Binary Classification:
Accuracy can be a good metric if the classes are balanced and the cost of false positives and false negatives is similar.
In Multi-Class Classification:
Accuracy can be useful, but other metrics like the macro-averaged F1 score might provide more insight, especially in imbalanced datasets.
With Multi-Label Classification:
Accuracy can be less informative. Instead, metrics like the subset accuracy, Hamming loss, or average precision are often used.


### 2. What activation functions did you choose for your output layers, and why?
- Softmax Activation Function
The softmax activation function is typically used in the output layer of a classification model where you need to predict probabilities of each class, especially for multi-class classification problems. 
When you have a classification problem with more than two classes, softmax in the output layer provides a clear, interpretable, and probabilistic prediction.

### 3. Can you name a few ways that this model might be improved?
- Below are some methods of improving the model 
1. Hyperparameter Tuning
Learning Rate: Adjust the learning rate to find the optimal value that allows the model to converge faster and more effectively.
Batch Size: Experiment with different batch sizes to find the best balance between convergence speed and stability.
Number of Epochs: Train for more epochs if the model hasn't converged yet, but be cautious of overfitting.
2. Data Augmentation
Augment Training Data: Use techniques like rotation, flipping, scaling, and cropping to artificially increase the size of your training dataset.
3. Early Stopping and Model Checkpointing
Early Stopping: Stop training when the model's performance on a validation set stops improving.
python
3. Transfer Learning
Pretrained Models: Use a pretrained model and fine-tune it on your dataset.
4. Cross-Validation
K-Fold Cross-Validation: Evaluate the model using k-fold cross-validation to ensure its performance is consistent across different subsets of the data.








