# 3. Deep NN

In [1]:
import tensorflow as tf
import numpy as np

from reader import Reader

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

## 3.1 Data

5 fold cross validation 사용

In [2]:
my_reader = Reader()
X_data, y_data, id_data = my_reader.get_entire_data()

In [3]:
# 5 fold
X_splited = np.array_split(X_data, 5)
y_splited = np.array_split(y_data, 5)
folds_X = []
folds_y = []
for idx in range(5):
    tmp_train_X = np.concatenate(np.delete(X_splited, idx, axis=0))
    tmp_val_X = X_splited[idx]
    folds_X.append((tmp_train_X, tmp_val_X))
    
    tmp_train_y = np.concatenate(np.delete(y_splited, idx, axis=0))
    tmp_val_y = y_splited[idx]
    folds_y.append((tmp_train_y, tmp_val_y))

## 3.2 Model

- he initializer 사용
- ReLU 사용
- 구성: 30 neurons, 3 layers

In [4]:
he_init = tf.contrib.layers.variance_scaling_initializer() # he initializer

# dnn model: 100 neurons for each 5 hidden layers
def dnn(inputs, n_hidden_layers=3, n_neurons=50, name=None,
        activation=tf.nn.relu, initializer=he_init):
    with tf.variable_scope(name, "dnn"):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation=activation,
                                     kernel_initializer=initializer,
                                     name="hidden%d" % (layer + 1))
        return inputs

In [5]:
n_inputs = 30 # 30 features
n_outputs = 2 # M, B

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

dnn_outputs = dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
Y_proba = tf.nn.softmax(logits, name="Y_proba")

- learning rate : 0.01
- loss : cross entropy 사용
- optimizer : Adam
- top 1 rate 사용

In [6]:
learning_rate = 0.01

# entropy as loss
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

# Adam optimizer minimizes loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

# top 1 rate
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

## 3.3 Training

- Early stopping : loss를 더 줄이지 못한채 학습하는 epoch이 내가 설정한 횟수(20) 이상이 되면 멈춘다.
- 최고 accuracy가 나왔을 때 Saver를 활용해서 그 때의 weight 값을 저장해둔다. epoch이 끝났을 때 최고 weight를 불러와서 전체 training data에 적용해서 accuracy 측정
- 5 fold로 아래 코드의 data_idx를 0에서 4까지 바꿔가며 학습해보았다.
    + 0: 90.16%
    + 1: 95.08%
    + 2: 93.85%
    + 3: 93.50%
    + 4: 93.02%
- 평균 accuracy : 93.18%

In [7]:
data_idx = 4 # Check 0 to 4
kfold_data_X = folds_X[data_idx]
kfold_data_y = folds_y[data_idx]
n_epochs = 1000
batch_size = 20 # mini batch size

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(kfold_data_X[0])) # 숫자만큼 인덱스 랜덤 정렬
        for rnd_indices in np.array_split(rnd_idx, len(kfold_data_X[0]) // batch_size): # rnd_idx 리스트를 둘째 매개변수 개수만큼으로 쪼갬
            X_batch, y_batch = kfold_data_X[0][rnd_indices], kfold_data_y[0][rnd_indices] # 쪼갠걸로 그 때 그 때 배치 만들고 학습
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: kfold_data_X[1], y: kfold_data_y[1]})
        
        # 가장 작은 loss 값을 갱신해감. 갱신하면 checks를 0으로
        if loss_val < best_loss:
            save_path = saver.save(sess, "./dnn_checkpoints/wdbc.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        # loss 값을 더 줄이지 못한채 max 이상 epoch를 돌면 early stopping
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./dnn_checkpoints/wdbc.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_data, y: y_data})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 4.281433	Best loss: 4.281433	Accuracy: 75.22%
1	Validation loss: 1.074118	Best loss: 1.074118	Accuracy: 92.04%
2	Validation loss: 0.966664	Best loss: 0.966664	Accuracy: 91.15%
3	Validation loss: 2.910038	Best loss: 0.966664	Accuracy: 68.14%
4	Validation loss: 1.482485	Best loss: 0.966664	Accuracy: 92.04%
5	Validation loss: 2.366786	Best loss: 0.966664	Accuracy: 92.04%
6	Validation loss: 1.266086	Best loss: 0.966664	Accuracy: 94.69%
7	Validation loss: 0.729205	Best loss: 0.729205	Accuracy: 92.04%
8	Validation loss: 1.166198	Best loss: 0.729205	Accuracy: 92.04%
9	Validation loss: 1.186710	Best loss: 0.729205	Accuracy: 74.34%
10	Validation loss: 0.424935	Best loss: 0.424935	Accuracy: 90.27%
11	Validation loss: 0.222514	Best loss: 0.222514	Accuracy: 92.04%
12	Validation loss: 0.194688	Best loss: 0.194688	Accuracy: 92.04%
13	Validation loss: 0.215423	Best loss: 0.194688	Accuracy: 92.92%
14	Validation loss: 0.343392	Best loss: 0.194688	Accuracy: 88.50%
15	Validation loss: 

126	Validation loss: 0.208271	Best loss: 0.141730	Accuracy: 92.92%
127	Validation loss: 0.143881	Best loss: 0.141730	Accuracy: 96.46%
128	Validation loss: 0.190151	Best loss: 0.141730	Accuracy: 93.81%
Early stopping!
INFO:tensorflow:Restoring parameters from ./dnn_checkpoints/wdbc.ckpt
Final test accuracy: 93.32%
