In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/mwitiderrick/kerasDO/master/HR_comma_sep.csv")

In [3]:
df.head()
df.columns

Index(['satisfaction_level', 'last_evaluation', 'number_project',
       'average_montly_hours', 'time_spend_company', 'Work_accident', 'left',
       'promotion_last_5years', 'department', 'salary'],
      dtype='object')

In [4]:
df['salary'].unique()

array(['low', 'medium', 'high'], dtype=object)

In [5]:
feats = ['department', 'salary']
df_final = pd.get_dummies(df, columns=feats, drop_first='first')
print(df_final.head())

   satisfaction_level  last_evaluation  ...  salary_low  salary_medium
0                0.38             0.53  ...           1              0
1                0.80             0.86  ...           0              1
2                0.11             0.88  ...           0              1
3                0.72             0.87  ...           1              0
4                0.37             0.52  ...           1              0

[5 rows x 19 columns]


In [6]:
df_final.columns

Index(['satisfaction_level', 'last_evaluation', 'number_project',
       'average_montly_hours', 'time_spend_company', 'Work_accident', 'left',
       'promotion_last_5years', 'department_RandD', 'department_accounting',
       'department_hr', 'department_management', 'department_marketing',
       'department_product_mng', 'department_sales', 'department_support',
       'department_technical', 'salary_low', 'salary_medium'],
      dtype='object')

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X = df_final.drop(['left'], axis=1).values
Y = df_final['left'].values

In [9]:
print(X.shape)
print(Y.shape)

(14999, 18)
(14999,)


In [10]:
x_train_new, x_test, y_train_new, y_test = train_test_split(X, Y, test_size=0.2, random_state=2021)

In [11]:
x_train, x_val, y_train, y_val = train_test_split(x_train_new, y_train_new, test_size=0.1, random_state=2021)

In [12]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)
x_val = sc.fit_transform(x_val)

In [13]:
print(x_train)
print(x_train.shape)
print(y_train.shape)

[[ 0.79074099  0.31148715 -0.65648126 ... -0.47441638 -0.97629713
  -0.86796835]
 [-1.62104563  0.54536105  0.96135104 ... -0.47441638 -0.97629713
   1.15211574]
 [ 0.9917232   0.83770342 -0.65648126 ... -0.47441638  1.02427834
  -0.86796835]
 ...
 [ 0.42897299 -0.09779216  0.15243489 ... -0.47441638  1.02427834
  -0.86796835]
 [-1.01809898 -0.97481927 -0.65648126 ... -0.47441638 -0.97629713
  -0.86796835]
 [ 0.67015165  0.83770342  0.15243489 ... -0.47441638 -0.97629713
   1.15211574]]
(10799, 18)
(10799,)


In [14]:
import keras
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.callbacks import EarlyStopping

In [15]:
feed_model = Sequential()

In [16]:
feed_model.add(Dense(10, input_dim=18, activation="relu", kernel_initializer="uniform", kernel_regularizer='l2'))
feed_model.add(Dropout(0.2))

In [17]:
feed_model.add(Dense(1, activation="sigmoid", kernel_initializer="uniform"))

In [18]:
!pip install keras-visualizer
from keras_visualizer import visualizer

Collecting keras-visualizer
  Downloading keras_visualizer-2.4-py3-none-any.whl (5.4 kB)
Installing collected packages: keras-visualizer
Successfully installed keras-visualizer-2.4


In [19]:
visualizer(feed_model, format='png', view=True)

In [20]:
feed_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                190       
                                                                 
 dropout (Dropout)           (None, 10)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
Total params: 201
Trainable params: 201
Non-trainable params: 0
_________________________________________________________________


In [21]:
feed_model.compile(optimizer= "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

In [22]:
# feed_model.fit(x_train, y_train, batch_size=10, epochs=1, validation_split=0.1)

monitor_loss = EarlyStopping(monitor='loss', patience=2)

feed_model.fit(x_train, y_train, batch_size=10, epochs=1, validation_data=[x_val, y_val], callbacks=monitor_loss)



<keras.callbacks.History at 0x7f8b87faed90>

In [23]:
y_pred = feed_model.predict(x_test)

In [24]:
print(y_pred)

[[0.10104324]
 [0.5641714 ]
 [0.08855135]
 ...
 [0.01469496]
 [0.18539098]
 [0.29292646]]


In [25]:
y_pred = (y_pred>0.5)

In [26]:
y_pred

array([[False],
       [ True],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [27]:
from sklearn.metrics import confusion_matrix

In [28]:
cm = confusion_matrix(y_test, y_pred)

In [29]:
cm

array([[2229,   46],
       [ 437,  288]])

In [30]:
acc = (2178+ 304)/y_test.shape[0]

In [31]:
acc

0.8273333333333334

In [32]:
# Custom model build

In [33]:
import keras
from keras.layers import Dense, Input


In [34]:
input_layer = Input(shape=(18,))
hidden_layer = Dense(10, activation='relu')(input_layer)
output_layer = Dense(1, activation='sigmoid')(hidden_layer)

feed_custom_model = keras.Model(inputs=input_layer, outputs=output_layer)

In [35]:
feed_custom_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 18)]              0         
                                                                 
 dense_2 (Dense)             (None, 10)                190       
                                                                 
 dense_3 (Dense)             (None, 1)                 11        
                                                                 
Total params: 201
Trainable params: 201
Non-trainable params: 0
_________________________________________________________________


In [36]:
feed_custom_model.compile(optimizer= "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
feed_custom_model.fit(x_train, y_train, batch_size=10, epochs=1, validation_split=0.1)



<keras.callbacks.History at 0x7f8c7b0bc790>

In [37]:
# visualizer(feed_custom_model, filename='custom_nn_model', format='png', view=True)