In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import precision_score, recall_score, accuracy_score

from tensorflow.contrib.layers import fully_connected

%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
default = pd.read_csv('../data/credit_card_default.csv')
default.rename(columns=lambda x: x.lower(), inplace=True)
default.rename(columns={'pay_0':'pay_1','default payment next month':'default'}, inplace=True)
# Base values: female, other_education, not_married
default['grad_school'] = (default['education'] == 1).astype('int')
default['university'] = (default['education'] == 2).astype('int')
default['high_school'] = (default['education'] == 3).astype('int')
default.drop('education', axis=1, inplace=True)

default['male'] = (default['sex']==1).astype('int')
default.drop('sex', axis=1, inplace=True)

default['married'] = (default['marriage'] == 1).astype('int')
default.drop('marriage', axis=1, inplace=True)

# For pay_n features if >0 then it means the customer was delayed on that month
pay_features = ['pay_' + str(i) for i in range(1,7)]
for p in pay_features:
    default[p] = (default[p] > 0).astype(int)

In [3]:
target_name = 'default'
X = default.drop('default', axis=1)
feature_names = X.columns
robust_scaler = RobustScaler()
X = robust_scaler.fit_transform(X)
y = default[target_name]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=12, stratify=y)

In [4]:
n_epochs = 40
batch_size = 100 

In [6]:
X_placeholder = tf.placeholder(X_train.dtype, shape=X_train.shape)
y_placeholder = tf.placeholder(y_train.dtype, shape=y_train.shape)

dataset = tf.data.Dataset.from_tensor_slices((X_placeholder, y_placeholder))
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(batch_size)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

In [7]:
n_inputs = X_train.shape[1] #25
n_hidden1 = 200
n_hidden2 = 200 
n_hidden3 = 200
n_outputs = 2

In [8]:
X = tf.placeholder(X_train.dtype, shape=[None,n_inputs])
y = tf.placeholder(y_train.dtype)

In [9]:
def DNN(X_values):
    hidden1 = fully_connected(X_values, n_hidden1, activation_fn=tf.nn.elu)
    hidden2 = fully_connected(hidden1, n_hidden2, activation_fn=tf.nn.elu)
    hidden3 = fully_connected(hidden2, n_hidden3, activation_fn=tf.nn.elu)
    logits = fully_connected(hidden3, n_outputs, activation_fn=None)
    return tf.cast(logits, dtype=tf.float32)

In [10]:
logits = DNN(X)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(cross_entropy)

In [11]:
probs = tf.nn.softmax(logits)

In [12]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
training_op = optimizer.minimize(loss)

In [13]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for epoch in range(n_epochs):
        sess.run(iterator.initializer, feed_dict={X_placeholder: X_train, y_placeholder: y_train})
        while True:
            try:
                batch_data = sess.run(next_element)
                X_batch = batch_data[0]
                y_batch = batch_data[1]
                sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
            except tf.errors.OutOfRangeError:
                break
        print("Epoch: {}".format(epoch+1))
    print("Done Trainning!")
    probabilities = probs.eval(feed_dict={X: X_test})[:,1]

Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
Epoch: 26
Epoch: 27
Epoch: 28
Epoch: 29
Epoch: 30
Epoch: 31
Epoch: 32
Epoch: 33
Epoch: 34
Epoch: 35
Epoch: 36
Epoch: 37
Epoch: 38
Epoch: 39
Epoch: 40
Done Trainning!


In [14]:
y_pred = (probabilities > 0.16).astype(int)
print('Recall: {:0.2f}'.format(100*recall_score(y_true=y_test, y_pred=y_pred)))
print('Precision: {:0.2f}'.format(100*precision_score(y_true=y_test, y_pred=y_pred)))
print('Accuracy: {:0.2f}'.format(100*accuracy_score(y_true=y_test, y_pred=y_pred)))

Recall: 80.12
Precision: 34.28
Accuracy: 61.60
