# Retention Prediction with Tensorflow
Ref : https://www.digitalocean.com/community/tutorials/how-to-build-a-deep-learning-model-to-predict-employee-retention-using-keras-and-tensorflow

# Load Dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
df = pd.read_csv("https://raw.githubusercontent.com/mwitiderrick/kerasDO/master/HR_comma_sep.csv")

In [2]:
df.rename(columns={"Work_accident": "work_accident", "salary": "salary_level"}, inplace=True)
df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,work_accident,left,promotion_last_5years,department,salary_level
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [3]:
feats = ['department','salary_level']
df_final = pd.get_dummies(df,columns=feats,drop_first=False)

In [4]:
print(df["department"].unique())
print(df["department"].nunique())

['sales' 'accounting' 'hr' 'technical' 'support' 'management' 'IT'
 'product_mng' 'marketing' 'RandD']
10


In [5]:
df_final.columns

Index(['satisfaction_level', 'last_evaluation', 'number_project',
       'average_montly_hours', 'time_spend_company', 'work_accident', 'left',
       'promotion_last_5years', 'department_IT', 'department_RandD',
       'department_accounting', 'department_hr', 'department_management',
       'department_marketing', 'department_product_mng', 'department_sales',
       'department_support', 'department_technical', 'salary_level_high',
       'salary_level_low', 'salary_level_medium'],
      dtype='object')

## Separating Train and Test Dataset

In [6]:
X = df_final.drop(['left'],axis=1).values
y = df_final['left'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [7]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Train Model

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

def build_model():
    classifier = Sequential()
    classifier.add(Dense(10, kernel_initializer = "uniform", activation = "relu", input_dim=20))
    classifier.add(Dropout(rate = 0.1))
    classifier.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid"))
    classifier.compile(
        optimizer = 'adam',
        loss = "binary_crossentropy",
        metrics = ["accuracy"]
    )
    return classifier

2024-01-02 13:43:04.603237: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
params = {
    'batch_size':[10,20,30,40,50],
    'epochs':[2,4,6,8,10,12,14],
    'optimizer':['adam']
}

In [10]:
classifier = KerasClassifier(
    model=build_model,
    optimizer=params["optimizer"],
    verbose=2
)

In [11]:
grid_search = GridSearchCV(estimator=classifier,
                           param_grid=params,
                           scoring="accuracy")

In [12]:
grid_search = grid_search.fit(X_train,y_train)

Epoch 1/2
840/840 - 1s - loss: 0.4474 - accuracy: 0.8112 - 1s/epoch - 2ms/step
Epoch 2/2
840/840 - 1s - loss: 0.2657 - accuracy: 0.9018 - 772ms/epoch - 919us/step
210/210 - 0s - 188ms/epoch - 897us/step
Epoch 1/2
840/840 - 1s - loss: 0.4508 - accuracy: 0.7906 - 1s/epoch - 2ms/step
Epoch 2/2
840/840 - 1s - loss: 0.3112 - accuracy: 0.8350 - 753ms/epoch - 896us/step
210/210 - 0s - 164ms/epoch - 781us/step
Epoch 1/2
840/840 - 1s - loss: 0.4548 - accuracy: 0.7913 - 1s/epoch - 1ms/step
Epoch 2/2
840/840 - 1s - loss: 0.3006 - accuracy: 0.8372 - 770ms/epoch - 917us/step
210/210 - 0s - 167ms/epoch - 797us/step
Epoch 1/2
840/840 - 1s - loss: 0.4498 - accuracy: 0.8146 - 1s/epoch - 2ms/step
Epoch 2/2
840/840 - 1s - loss: 0.2676 - accuracy: 0.9018 - 755ms/epoch - 899us/step
210/210 - 0s - 163ms/epoch - 776us/step
Epoch 1/2
840/840 - 1s - loss: 0.4570 - accuracy: 0.7988 - 1s/epoch - 1ms/step
Epoch 2/2
840/840 - 1s - loss: 0.2796 - accuracy: 0.8943 - 795ms/epoch - 946us/step
210/210 - 0s - 202ms/epoc

In [13]:
best_param = grid_search.best_params_
best_accuracy = grid_search.best_score_
print(best_param)
print(best_accuracy)

{'batch_size': 10, 'epochs': 8, 'optimizer': 'adam'}
0.9505666643980127


In [14]:
best_classifier = KerasClassifier(
    model = build_model,
    batch_size=best_param.get('batch_size'),
    epochs=best_param.get('epochs'),
    optimizer=best_param.get('optimizer')
    )

In [15]:
accuracies = cross_val_score(
    estimator = best_classifier,
    X = X_train,
    y = y_train,
    cv = 10,
    n_jobs = -1
)

Epoch 1/8
Epoch 1/8
Epoch 1/8
Epoch 1/8
Epoch 1/8
Epoch 1/8
Epoch 1/8
Epoch 1/8
Epoch 2/8
Epoch 2/8
  5/945 [..............................] - ETA: 11s - loss: 0.3827 - accuracy: 0.7000Epoch 2/8
Epoch 2/8
Epoch 2/8
Epoch 2/8
 14/945 [..............................] - ETA: 3s - loss: 0.3252 - accuracy: 0.8786 Epoch 2/8
Epoch 3/8
Epoch 3/8
  1/945 [..............................] - ETA: 1s - loss: 0.1816 - accuracy: 0.9000Epoch 3/8
 38/945 [>.............................] - ETA: 3s - loss: 0.2243 - accuracy: 0.8842Epoch 3/8
Epoch 3/8
 72/945 [=>............................] - ETA: 3s - loss: 0.2453 - accuracy: 0.8722Epoch 3/8
Epoch 4/8
  1/945 [..............................] - ETA: 1s - loss: 0.2284 - accuracy: 0.9000Epoch 4/8
 25/945 [..............................] - ETA: 1s - loss: 0.2067 - accuracy: 0.9280Epoch 4/8
Epoch 4/8
Epoch 4/8
 19/945 [..............................] - ETA: 5s - loss: 0.3183 - accuracy: 0.9000Epoch 4/8
Epoch 4/8
Epoch 5/8
Epoch 5/8
Epoch 5/8
Epoch 5/8
Epoch 

In [17]:
mean = accuracies.mean()
variance = accuracies.var()
print(f"accuracy mean: {mean}; accuracy variance: {variance}")

accuracy mean: 0.9448522402287892; accuracy variance: 0.00021555563353166542


In [18]:
best_classifier.fit(
    X_train,
    y_train,
    batch_size = best_param.get('batch_size'),
    epochs = best_param.get('epochs')
)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [19]:
y_pred = (best_classifier.predict(X_test) > 0.5).astype("bool")
y_pred



array([False, False,  True, ..., False, False, False])

# Confusion Matrix

In [20]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[3252,  171],
       [ 132,  945]])

In [21]:
new_pred = best_classifier.predict(sc.transform(np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.,0.]])))
new_pred = (new_pred > 0.5)
print(new_pred)

[False]


# Save and Load model

## Save into HDF5 format

In [30]:
import pickle
import numpy as np
# Create model folder
folder = "../model/hdf5/v1/"
!mkdir -p ../model/hdf5/v1/

In [31]:
# saving model
json_model = best_classifier.model_.to_json()
with open(f'{folder}retention_prediction_model.json', 'w') as handler:
    handler.write(json_model)
    
# saving weights
best_classifier.model_.save_weights(f'{folder}retention_prediction_weights.h5', overwrite=True)

# saving standard scaler
with open(f'{folder}standard_scaler.pkl', 'wb') as handler:
    pickle.dump(sc, handler)

In [32]:
# loading model
from tensorflow.keras.models import model_from_json

model = model_from_json(open(f'{folder}retention_prediction_model.json').read())
model.load_weights(f'{folder}retention_prediction_weights.h5')

# dont forget to compile your model
model.compile(loss='binary_crossentropy', optimizer='adam')

# and load standard_scaler
with open(f'{folder}standard_scaler.pkl', 'rb') as handler:
    sc = pickle.load(handler)

In [33]:
model_pred = model.predict(sc.transform(np.array([[0.26,0.7 ,3., 238., 6., 0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,1.,0., 0.,1.,0.]])))
print(model_pred)

[[0.20488872]]
