In [1]:
import sys
sys.path.append("../src")
import pandas as pd
import numpy as np
from tensorflow.keras import Sequential, layers, callbacks
from sklearn.model_selection import train_test_split
from functions import get_df_uniques

In [2]:
df = pd.read_csv("../data/01_raw/attrition_train.csv")

In [3]:
df["Attrition"] = df["Attrition"].apply(lambda x: 0 if x == "No" else 1)
y = df["Attrition"]
X = df.drop('Attrition', axis=1)
X = pd.get_dummies(X)

In [4]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 55 columns):
 #   Column                             Non-Null Count  Dtype
---  ------                             --------------  -----
 0   Age                                1100 non-null   int64
 1   DailyRate                          1100 non-null   int64
 2   DistanceFromHome                   1100 non-null   int64
 3   Education                          1100 non-null   int64
 4   EmployeeCount                      1100 non-null   int64
 5   EmployeeNumber                     1100 non-null   int64
 6   EnvironmentSatisfaction            1100 non-null   int64
 7   HourlyRate                         1100 non-null   int64
 8   JobInvolvement                     1100 non-null   int64
 9   JobLevel                           1100 non-null   int64
 10  JobSatisfaction                    1100 non-null   int64
 11  MonthlyIncome                      1100 non-null   int64
 12  MonthlyRate         

In [5]:
#X=X[num_cols]
#X=np.asarray(X).astype('float32')

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.15, random_state=1)

In [7]:
# model
model = Sequential([
    layers.Dense(32, activation="relu", input_shape=[X.shape[1]]),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(16, activation="relu"),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(8, activation="relu"),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(1, activation="sigmoid")
])

In [8]:
model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["binary_accuracy"]
)

In [9]:
early_stopping = callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

In [10]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=512,
    epochs=1000,
    callbacks=[early_stopping],
    verbose=0, # hide the output because we have so many epochs
)

In [30]:
history_df = pd.DataFrame(history.history)
# Start the plot at epoch 5
#history_df.loc[5:, ['loss', 'val_loss']].plot()
#.history_df.loc[5:, ['binary_accuracy', 'val_binary_accuracy']].plot()

print(("Best Validation Loss: {:0.4f}" +\
      "\nBest Validation Accuracy: {:0.4f}")\
      .format(history_df['val_loss'].min(), 
              history_df['val_binary_accuracy'].max()))

Best Validation Loss: 0.4488
Best Validation Accuracy: 0.8424


In [15]:
import tensorflow as tf
import autokeras as ak
import kerastuner as kt

METRICS = [
      tf.keras.metrics.AUC(name='auc'),
]

# Initialize the structured data classifier.
clf = ak.StructuredDataClassifier(
    overwrite=True,
    max_trials=4,
    metrics=METRICS,
    objective = kt.Objective("auc", direction="max"),
    loss="binary_crossentropy"
)

early_stopping = callbacks.EarlyStopping(
    min_delta=0.001,  # minimium amount of change to count as an improvement
    patience=5,  # how many epochs to wait before stopping
    restore_best_weights=True,
)

clf.fit(X_train, y_train, epochs=100, callbacks=[early_stopping])

Trial 4 Complete [00h 00m 06s]
auc: 0.9754437804222107

Best auc So Far: 0.9754437804222107
Total elapsed time: 00h 00m 31s
INFO:tensorflow:Oracle triggered exit
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Ep

In [22]:
from tensorflow.keras.models import load_model

model = clf.export_model()
try:
    model.save("model_autokeras", save_format="tf")
except Exception:
    model.save("model_autokeras.h5")


loaded_model = load_model("model_autokeras", custom_objects=ak.CUSTOM_OBJECTS)

predicted_y = loaded_model.predict(tf.expand_dims(X_test, -1))
print(predicted_y)

INFO:tensorflow:Assets written to: model_autokeras/assets
[[3.48864475e-08]
 [5.02288913e-07]
 [9.99989033e-01]
 [4.82866675e-01]
 [3.31542611e-01]
 [9.63446498e-03]
 [6.04416637e-05]
 [6.26234942e-06]
 [5.75248027e-09]
 [1.81975290e-09]
 [2.82379442e-05]
 [8.85634303e-01]
 [8.14185478e-06]
 [8.71605217e-01]
 [4.61965410e-10]
 [2.61913836e-02]
 [3.12036276e-03]
 [8.85003737e-06]
 [1.89101696e-03]
 [3.78738985e-10]
 [5.91257958e-08]
 [1.73652734e-06]
 [1.19587821e-11]
 [6.54251380e-06]
 [1.31815672e-04]
 [3.59346275e-10]
 [9.99973893e-01]
 [1.90590470e-07]
 [4.72685695e-03]
 [2.72847495e-10]
 [1.08929797e-07]
 [9.99971926e-01]
 [5.43156148e-07]
 [1.42260701e-08]
 [1.27968192e-02]
 [2.95919253e-05]
 [3.33363452e-08]
 [3.10531014e-08]
 [2.31314488e-07]
 [3.12149525e-04]
 [4.45288420e-03]
 [7.89350807e-10]
 [2.23627687e-03]
 [3.57899116e-05]
 [4.98205423e-04]
 [1.56383378e-10]
 [9.55641270e-02]
 [1.10874258e-04]
 [1.39197473e-10]
 [5.74597716e-03]
 [5.05179167e-04]
 [1.65495135e-06]
 [1.01