In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import os

# DNN

In [2]:
from keras_visualizer import visualizer 
from keras.models import Sequential
from keras.layers import Dense
from keras import metrics
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
import tensorflow_addons as tfa
from keras.regularizers import l2
from keras.utils.vis_utils import plot_model

In [3]:
PATH = '/Users/zionn/Desktop/資料科學導論/hw5/'
os.getcwd()

'/Users/zionn/Desktop/資料科學導論/hw5/models'

In [4]:
PATH_TRAIN = PATH + 'churn_prediction_data/train.csv'
df_train = pd.read_csv(PATH_TRAIN)
#df_train.head()

In [5]:
cols = ['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']
data_dummy = pd.get_dummies(df_train.loc[:, cols])
df_train = pd.DataFrame(data_dummy)
df_train['HasCrCard'] = df_train['HasCrCard'].replace({0: -1})
df_train['IsActiveMember'] = df_train['IsActiveMember'].replace({0: -1})
#df_train.head()

In [6]:
train_label = df_train.loc[:, 'Exited']
df_train.drop('Exited', axis=1, inplace=True)
train_data = df_train.loc[:, 'CreditScore': 'Gender_Male']

In [7]:
from imblearn.over_sampling import RandomOverSampler, SMOTE
ros = RandomOverSampler()
smote = SMOTE()
train_data, train_label = smote.fit_resample(train_data, train_label)

In [8]:
scaler = StandardScaler()
x_train_standardized = scaler.fit_transform(train_data)
y_train = np_utils.to_categorical(train_label)

In [9]:
METRICS = [
    'accuracy', 
    metrics.Precision(name='precision', thresholds=0.7), 
    tfa.metrics.F1Score(num_classes=2)
]

number_of_features = x_train_standardized.shape[1]

2021-12-22 15:59:18.123295: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
early_stopping = EarlyStopping(monitor='precision', 
                               min_delta=0.01, 
                               patience=300, 
                               verbose=1, 
                               mode='max')

In [19]:
activation_1 = 'relu'
activation_2 = 'sigmoid'

network = Sequential()
network.add(Dense(units=9, activation=activation_1, input_dim=number_of_features))
network.add(Dense(units=30, activation=activation_1))
network.add(Dense(units=30, activation=activation_1, kernel_regularizer=l2(0.01))) #kernel_regularizer=l2(0.01)
network.add(Dense(units=30, activation=activation_1))
network.add(Dense(units=2, activation=activation_2))

network.compile(
    loss='binary_crossentropy', 
    optimizer='rmsprop', 
    metrics=METRICS)

plot_model(network, to_file='model_struc.png', show_shapes=True, show_layer_names=True)
visualizer(network, filename='model_graph', format='png', view=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [12]:
history = network.fit(
    x_train_standardized, 
    y_train, 
    epochs=1000, 
    verbose=2, 
    batch_size=100, # -> 6000/batch_size
    callbacks=[early_stopping]
    )

2021-12-22 15:59:19.589398: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/1000
128/128 - 1s - loss: 0.7697 - accuracy: 0.6883 - precision: 0.8489 - f1_score: 0.6879
Epoch 2/1000
128/128 - 0s - loss: 0.5199 - accuracy: 0.7841 - precision: 0.8692 - f1_score: 0.7839
Epoch 3/1000
128/128 - 0s - loss: 0.4389 - accuracy: 0.8116 - precision: 0.8824 - f1_score: 0.8112
Epoch 4/1000
128/128 - 0s - loss: 0.4026 - accuracy: 0.8251 - precision: 0.8914 - f1_score: 0.8246
Epoch 5/1000
128/128 - 0s - loss: 0.3774 - accuracy: 0.8353 - precision: 0.8991 - f1_score: 0.8348
Epoch 6/1000
128/128 - 0s - loss: 0.3568 - accuracy: 0.8445 - precision: 0.9041 - f1_score: 0.8441
Epoch 7/1000
128/128 - 0s - loss: 0.3397 - accuracy: 0.8546 - precision: 0.9089 - f1_score: 0.8542
Epoch 8/1000
128/128 - 0s - loss: 0.3276 - accuracy: 0.8617 - precision: 0.9110 - f1_score: 0.8613
Epoch 9/1000
128/128 - 0s - loss: 0.3184 - accuracy: 0.8653 - precision: 0.9126 - f1_score: 0.8650
Epoch 10/1000
128/128 - 0s - loss: 0.3132 - accuracy: 0.8668 - precision: 0.9139 - f1_score: 0.8665
Epoch 11/

128/128 - 0s - loss: 0.2438 - accuracy: 0.8990 - precision: 0.9371 - f1_score: 0.8988
Epoch 84/1000
128/128 - 0s - loss: 0.2447 - accuracy: 0.8989 - precision: 0.9373 - f1_score: 0.8987
Epoch 85/1000
128/128 - 0s - loss: 0.2434 - accuracy: 0.8999 - precision: 0.9377 - f1_score: 0.8997
Epoch 86/1000
128/128 - 0s - loss: 0.2423 - accuracy: 0.9001 - precision: 0.9374 - f1_score: 0.9000
Epoch 87/1000
128/128 - 0s - loss: 0.2431 - accuracy: 0.8991 - precision: 0.9371 - f1_score: 0.8989
Epoch 88/1000
128/128 - 0s - loss: 0.2435 - accuracy: 0.8999 - precision: 0.9384 - f1_score: 0.8997
Epoch 89/1000
128/128 - 0s - loss: 0.2414 - accuracy: 0.9001 - precision: 0.9378 - f1_score: 0.9000
Epoch 90/1000
128/128 - 0s - loss: 0.2408 - accuracy: 0.8997 - precision: 0.9372 - f1_score: 0.8995
Epoch 91/1000
128/128 - 0s - loss: 0.2406 - accuracy: 0.9002 - precision: 0.9377 - f1_score: 0.9000
Epoch 92/1000
128/128 - 0s - loss: 0.2417 - accuracy: 0.9015 - precision: 0.9395 - f1_score: 0.9013
Epoch 93/1000


Epoch 165/1000
128/128 - 0s - loss: 0.2310 - accuracy: 0.9040 - precision: 0.9420 - f1_score: 0.9038
Epoch 166/1000
128/128 - 0s - loss: 0.2298 - accuracy: 0.9041 - precision: 0.9408 - f1_score: 0.9040
Epoch 167/1000
128/128 - 0s - loss: 0.2308 - accuracy: 0.9046 - precision: 0.9407 - f1_score: 0.9044
Epoch 168/1000
128/128 - 0s - loss: 0.2288 - accuracy: 0.9049 - precision: 0.9425 - f1_score: 0.9048
Epoch 169/1000
128/128 - 0s - loss: 0.2303 - accuracy: 0.9037 - precision: 0.9405 - f1_score: 0.9036
Epoch 170/1000
128/128 - 0s - loss: 0.2302 - accuracy: 0.9043 - precision: 0.9418 - f1_score: 0.9041
Epoch 171/1000
128/128 - 0s - loss: 0.2299 - accuracy: 0.9036 - precision: 0.9421 - f1_score: 0.9034
Epoch 172/1000
128/128 - 0s - loss: 0.2290 - accuracy: 0.9056 - precision: 0.9419 - f1_score: 0.9055
Epoch 173/1000
128/128 - 0s - loss: 0.2303 - accuracy: 0.9069 - precision: 0.9401 - f1_score: 0.9067
Epoch 174/1000
128/128 - 0s - loss: 0.2294 - accuracy: 0.9061 - precision: 0.9416 - f1_scor

128/128 - 0s - loss: 0.2251 - accuracy: 0.9074 - precision: 0.9429 - f1_score: 0.9073
Epoch 247/1000
128/128 - 0s - loss: 0.2249 - accuracy: 0.9081 - precision: 0.9421 - f1_score: 0.9080
Epoch 248/1000
128/128 - 0s - loss: 0.2257 - accuracy: 0.9063 - precision: 0.9421 - f1_score: 0.9062
Epoch 249/1000
128/128 - 0s - loss: 0.2255 - accuracy: 0.9075 - precision: 0.9418 - f1_score: 0.9074
Epoch 250/1000
128/128 - 0s - loss: 0.2252 - accuracy: 0.9083 - precision: 0.9418 - f1_score: 0.9081
Epoch 251/1000
128/128 - 0s - loss: 0.2260 - accuracy: 0.9066 - precision: 0.9429 - f1_score: 0.9064
Epoch 252/1000
128/128 - 0s - loss: 0.2271 - accuracy: 0.9054 - precision: 0.9411 - f1_score: 0.9052
Epoch 253/1000
128/128 - 0s - loss: 0.2248 - accuracy: 0.9073 - precision: 0.9419 - f1_score: 0.9071
Epoch 254/1000
128/128 - 0s - loss: 0.2244 - accuracy: 0.9099 - precision: 0.9436 - f1_score: 0.9097
Epoch 255/1000
128/128 - 0s - loss: 0.2246 - accuracy: 0.9095 - precision: 0.9418 - f1_score: 0.9093
Epoch

Epoch 328/1000
128/128 - 0s - loss: 0.2226 - accuracy: 0.9089 - precision: 0.9424 - f1_score: 0.9088
Epoch 329/1000
128/128 - 0s - loss: 0.2230 - accuracy: 0.9092 - precision: 0.9428 - f1_score: 0.9090
Epoch 330/1000
128/128 - 0s - loss: 0.2232 - accuracy: 0.9081 - precision: 0.9433 - f1_score: 0.9080
Epoch 331/1000
128/128 - 0s - loss: 0.2232 - accuracy: 0.9081 - precision: 0.9411 - f1_score: 0.9080
Epoch 332/1000
128/128 - 0s - loss: 0.2229 - accuracy: 0.9084 - precision: 0.9427 - f1_score: 0.9082
Epoch 333/1000
128/128 - 0s - loss: 0.2230 - accuracy: 0.9073 - precision: 0.9430 - f1_score: 0.9072
Epoch 334/1000
128/128 - 0s - loss: 0.2230 - accuracy: 0.9096 - precision: 0.9415 - f1_score: 0.9095
Epoch 335/1000
128/128 - 0s - loss: 0.2230 - accuracy: 0.9077 - precision: 0.9427 - f1_score: 0.9076
Epoch 336/1000
128/128 - 0s - loss: 0.2231 - accuracy: 0.9081 - precision: 0.9428 - f1_score: 0.9080
Epoch 337/1000
128/128 - 0s - loss: 0.2235 - accuracy: 0.9089 - precision: 0.9409 - f1_scor

Epoch 410/1000
128/128 - 0s - loss: 0.2206 - accuracy: 0.9097 - precision: 0.9431 - f1_score: 0.9096
Epoch 411/1000
128/128 - 0s - loss: 0.2215 - accuracy: 0.9095 - precision: 0.9410 - f1_score: 0.9094
Epoch 412/1000
128/128 - 0s - loss: 0.2210 - accuracy: 0.9103 - precision: 0.9429 - f1_score: 0.9101
Epoch 413/1000
128/128 - 0s - loss: 0.2207 - accuracy: 0.9103 - precision: 0.9425 - f1_score: 0.9101
Epoch 414/1000
128/128 - 0s - loss: 0.2206 - accuracy: 0.9115 - precision: 0.9425 - f1_score: 0.9114
Epoch 415/1000
128/128 - 0s - loss: 0.2206 - accuracy: 0.9095 - precision: 0.9429 - f1_score: 0.9093
Epoch 416/1000
128/128 - 0s - loss: 0.2212 - accuracy: 0.9108 - precision: 0.9414 - f1_score: 0.9107
Epoch 417/1000
128/128 - 0s - loss: 0.2209 - accuracy: 0.9101 - precision: 0.9424 - f1_score: 0.9099
Epoch 418/1000
128/128 - 0s - loss: 0.2211 - accuracy: 0.9090 - precision: 0.9428 - f1_score: 0.9089
Epoch 419/1000
128/128 - 0s - loss: 0.2210 - accuracy: 0.9100 - precision: 0.9422 - f1_scor

128/128 - 0s - loss: 0.2193 - accuracy: 0.9126 - precision: 0.9420 - f1_score: 0.9125
Epoch 492/1000
128/128 - 0s - loss: 0.2193 - accuracy: 0.9114 - precision: 0.9420 - f1_score: 0.9113
Epoch 493/1000
128/128 - 0s - loss: 0.2182 - accuracy: 0.9117 - precision: 0.9443 - f1_score: 0.9115
Epoch 494/1000
128/128 - 0s - loss: 0.2192 - accuracy: 0.9109 - precision: 0.9424 - f1_score: 0.9108
Epoch 495/1000
128/128 - 0s - loss: 0.2195 - accuracy: 0.9117 - precision: 0.9426 - f1_score: 0.9115
Epoch 496/1000
128/128 - 0s - loss: 0.2193 - accuracy: 0.9115 - precision: 0.9424 - f1_score: 0.9114
Epoch 497/1000
128/128 - 0s - loss: 0.2193 - accuracy: 0.9107 - precision: 0.9435 - f1_score: 0.9106
Epoch 498/1000
128/128 - 0s - loss: 0.2176 - accuracy: 0.9117 - precision: 0.9453 - f1_score: 0.9116
Epoch 499/1000
128/128 - 0s - loss: 0.2199 - accuracy: 0.9106 - precision: 0.9425 - f1_score: 0.9104
Epoch 500/1000
128/128 - 0s - loss: 0.2187 - accuracy: 0.9108 - precision: 0.9433 - f1_score: 0.9107
Epoch

In [13]:
network_result = network.evaluate(x_train_standardized, y_train, batch_size=50, verbose=1)
for name, value in zip(network.metrics_names, network_result):
    print(name, ':', value)

loss : 0.2160847932100296
accuracy : 0.9110395908355713
precision : 0.9454306960105896
f1_score : [0.91436774 0.9074422 ]


In [14]:
PATH_TEST = PATH + 'churn_prediction_data/test.csv'
df_test = pd.read_csv(PATH_TEST)
rw = [i for i in df_test.RowNumber]

In [15]:
data_dummy = pd.get_dummies(df_test.loc[:, 'CreditScore': 'EstimatedSalary'])
df_test = pd.DataFrame(data_dummy)
df_test['HasCrCard'] = df_test['HasCrCard'].replace({0: -1})
df_test['IsActiveMember'] = df_test['IsActiveMember'].replace({0: -1})

In [16]:
x_hat_test = df_test.loc[:, 'CreditScore': 'Gender_Male']
x_hat_test_standardized = scaler.transform(x_hat_test)

In [17]:
y_hat_predictions_label = np.argmax(network.predict(x_hat_test_standardized), axis=1)
y_hat_predictions_label

array([0, 0, 0, ..., 0, 0, 0])

In [18]:
df_upload = pd.DataFrame({
    'RowNumber': rw, 
    'Exited': y_hat_predictions_label
})

df_upload.to_csv(PATH + 'churn_prediction_data/DNN_8000.csv')