In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook

from sklearn.metrics import average_precision_score

In [2]:
df = pd.read_csv('bank_data.csv')
df.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,39,admin.,married,university.degree,unknown,no,no,cellular,jul,thu,...,3,999,0,nonexistent,1.4,93.918,-42.7,4.968,5228.1,-1
1,31,services,divorced,high.school,no,yes,no,cellular,jul,mon,...,1,999,0,nonexistent,1.4,93.918,-42.7,4.96,5228.1,-1
2,34,services,divorced,high.school,no,yes,no,cellular,may,fri,...,1,999,0,nonexistent,-1.8,92.893,-46.2,1.25,5099.1,-1
3,23,admin.,single,professional.course,no,yes,no,cellular,jul,wed,...,2,999,0,nonexistent,1.4,93.918,-42.7,4.963,5228.1,-1
4,63,housemaid,married,basic.4y,no,no,no,cellular,aug,tue,...,1,999,0,nonexistent,-2.9,92.201,-31.4,0.838,5076.2,1


In [3]:
data = df.select_dtypes(['int64', 'float64'])
X = data.drop(columns=['y', 'pdays']).values
data.drop(columns=['pdays', 'y'])

Unnamed: 0,age,duration,campaign,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,39,67,3,0,1.4,93.918,-42.7,4.968,5228.1
1,31,522,1,0,1.4,93.918,-42.7,4.960,5228.1
2,34,84,1,0,-1.8,92.893,-46.2,1.250,5099.1
3,23,332,2,0,1.4,93.918,-42.7,4.963,5228.1
4,63,479,1,0,-2.9,92.201,-31.4,0.838,5076.2
...,...,...,...,...,...,...,...,...,...
9275,55,146,1,0,1.4,94.465,-41.8,4.958,5228.1
9276,31,115,1,0,-0.1,93.200,-42.0,4.474,5195.8
9277,36,874,1,0,1.4,93.918,-42.7,4.961,5228.1
9278,31,870,1,0,1.4,93.918,-42.7,4.963,5228.1


In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
ss = StandardScaler()
ss.fit(X[:8000])
X = ss.transform(data.drop(columns=['y', 'pdays']).values)

In [6]:
y = (data.y == 1).astype(int).values
X_train, X_val, y_train, y_val = X[:8000], X[8000:], y[:8000], y[8000:]

In [7]:
tf.__version__

'1.15.0'

In [8]:
X_train.shape

(8000, 9)

$W_{9x3}; b_{1x3}; U_{3x1}; c_{1x1}$

In [9]:
tf.reset_default_graph()
np.random.seed(2020)

num_hidden = 100

X_plh = tf.placeholder(np.float64, shape=[None, 9], name='X')
y_plh = tf.placeholder(np.float64, shape=[None, 1], name='y')

W = tf.get_variable('W', initializer=np.random.normal(0, 1/3, size=[9, num_hidden]))
b = tf.get_variable('b', initializer=np.zeros(shape=[num_hidden]) + np.random.normal(0, 1/3, size=[num_hidden]) / 10)
U = tf.get_variable('U', initializer=np.random.normal(0, np.sqrt(1/num_hidden), size=[num_hidden, 1]))
c = tf.get_variable('c', initializer=np.array([0]).astype('float64'))

out = (tf.nn.relu(X_plh @ W + b) @ U + c)
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_plh, logits=out)

optimizer = tf.train.AdamOptimizer().minimize(loss)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [10]:
num_epochs = 100
batch_size = 1024
num_obs = 8000

In [11]:
y_train, y_val = y_train[:, None], y_val[:, None]

In [12]:
auc_vals, acc_vals = [], []

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for epoch in tqdm_notebook(range(num_epochs)):
    for idx_batch in range(num_obs // batch_size + 1):
        X_batch = X_train[idx_batch * batch_size : (idx_batch+1) * batch_size]
        y_batch = y_train[idx_batch * batch_size : (idx_batch+1) * batch_size]
        
        sess.run(optimizer, feed_dict={X_plh: X_batch, y_plh: y_batch})
        
    train_out = sess.run(out, feed_dict={X_plh: X_train, y_plh: y_train})
    val_out = sess.run(out, feed_dict={X_plh: X_val, y_plh: y_val})
    
    acc_train = np.mean((train_out > 0).astype(int) == (y_train > 0).astype(int))
    acc_val = np.mean((val_out > 0).astype(int) == (y_val > 0).astype(int))
    acc_vals.append(acc_val)
    
    print(f'Epoch {epoch}; Accuracy train: {acc_train:.4f}; Accuracy val: {acc_val:.4f}')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  import sys


HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Epoch 0; Accuracy train: 0.4358; Accuracy val: 0.4445
Epoch 1; Accuracy train: 0.5949; Accuracy val: 0.5734
Epoch 2; Accuracy train: 0.6760; Accuracy val: 0.6547
Epoch 3; Accuracy train: 0.8016; Accuracy val: 0.7828
Epoch 4; Accuracy train: 0.8080; Accuracy val: 0.7836
Epoch 5; Accuracy train: 0.8085; Accuracy val: 0.7836
Epoch 6; Accuracy train: 0.8176; Accuracy val: 0.7852
Epoch 7; Accuracy train: 0.8240; Accuracy val: 0.7914
Epoch 8; Accuracy train: 0.8281; Accuracy val: 0.7969
Epoch 9; Accuracy train: 0.8327; Accuracy val: 0.8023
Epoch 10; Accuracy train: 0.8383; Accuracy val: 0.8102
Epoch 11; Accuracy train: 0.8425; Accuracy val: 0.8148
Epoch 12; Accuracy train: 0.8455; Accuracy val: 0.8187
Epoch 13; Accuracy train: 0.8511; Accuracy val: 0.8250
Epoch 14; Accuracy train: 0.8534; Accuracy val: 0.8281
Epoch 15; Accuracy train: 0.8552; Accuracy val: 0.8313
Epoch 16; Accuracy train: 0.8552; Accuracy val: 0.8305
Epoch 17; Accuracy train: 0.8570; Accuracy val: 0.8328
Epoch 18; Accuracy t

In [15]:
from sklearn.linear_model import LogisticRegression

In [16]:
lr = LogisticRegression()

y_train = (y_train > 0).astype(int).ravel()
y_val = (y_val > 0).astype(int).ravel()

lr.fit(X_train, y_train)
train_out = lr.predict_proba(X_train)[:, 1]
val_out = lr.predict_proba(X_val)[:, 1]

auc_train = average_precision_score(y_train, train_out)
auc_val = average_precision_score(y_val, val_out)

print(f'Accuracy train: {auc_train:.4f}; Accuracy val: {auc_val:.4f}')

Accuracy train: 0.9022; Accuracy val: 0.8713
