### 경고 무시

In [2]:
import warnings

warnings.filterwarnings(action='ignore') 

### 필요 라이브러리 Import 

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf 
import os

from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model 

from sklearn.metrics import mean_absolute_error
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import Callback

from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report

### 경로 설정 및 함수 정의

In [4]:
TRAIN_DATA = './data/chapter_4/train/train_data.csv'
VAL_DATA = './data/chapter_4/val/val_data.csv'
TEST_DATA = './data/chapter_4/test/test_data.csv'

In [6]:
# 데이터 로드 함수 
def load_data():
    train = pd.read_csv(TRAIN_DATA)
    val = pd.read_csv(VAL_DATA)
    test = pd.read_csv(TEST_DATA)
    
    data = dict()
    data['train_y'] = train.pop('y')
    data['val_y'] = val.pop('y')
    data['test_y'] = test.pop('y')
    
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    val = scaler.transform(val)
    test = scaler.transform(test)
    
    data['train_X'] = train
    data['val_X'] = val
    data['test_X'] = test
    
    # scaler를 유지함으로써 예측을 다시 원래 크기로 복원할 수 있도록 함 
    data['scaler'] = scaler 
    
    return data 

In [7]:
data = load_data()

In [9]:
# 모델 생성 함수 
def build_network(input_features=None):
    inputs = Input(shape=(input_features, ), name='input')
    x = Dense(128, activation='relu', name='hidden1')(inputs)
    x = Dense(64, activation='relu', name='hidden2')(x)
    x = Dense(32, activation='relu', name='hidden3')(x)
    x = Dense(16, activation='relu', name='hidden4')(x)
    x = Dense(8, activation='relu', name='hidden5')(x)
    prediction = Dense(1, activation='sigmoid', name='final')(x)
    
    model = Model(inputs=inputs, outputs=prediction)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [8]:
data['train_X'].shape

(9200, 178)

### ROC AUC 사용자 지정 콜백 생성 

In [10]:
class RocAUCScore(Callback):
    def __init__(self, training_data, validation_data):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]
        super(RocAUCScore, self).__init__()
        
    # 각 epoch 말에 ROC AUC 점수 계산 
    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.x)
        roc = roc_auc_score(self.y, y_pred)
        y_pred_val = self.model.predict(self.x_val)
        roc_val = roc_auc_score(self.y_val, y_pred_val)
        print('\n *** ROC AUC Score: %s - roc-auc_val: %s ***' % (str(roc), str(roc_val)))

        return 

### Callback 생성 

In [11]:
def create_callbacks(data):
    tensorboard_callback = TensorBoard(log_dir='./ch4_tb_log/mlp', histogram_freq=1, batch_size=32, 
                                        write_graph=True, write_grads=False)
    roc_auc_callback = RocAUCScore(
        training_data=(data['train_X'], data['train_y']),
        validation_data=(data['val_X'], data['val_y']))
    checkpoint_callback = ModelCheckpoint(
        filepath='./model-weights.{epoch:02d}-{val_acc:.6f}.hdf5',
        monitor='val_acc', verbose=1, save_best_only=True)

    return [tensorboard_callback, roc_auc_callback, checkpoint_callback]

In [12]:
callbacks = create_callbacks(data)

In [13]:
data['train_X'].shape

(9200, 178)

In [14]:
data['val_X'].shape, data['val_y'].shape

((1150, 178), (1150,))

### 모델 학습 

In [15]:
input_features = data['train_X'].shape[1]   # 열 개수 
model = build_network(input_features=input_features)
model.fit(x=data['train_X'], y=data['train_y'], batch_size=32, epochs=20, verbose=1,
         validation_data=(data['val_X'], data['val_y']), callbacks=callbacks)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 9200 samples, validate on 1150 samples
Epoch 1/20
 *** ROC AUC Score: 0.990110281851847 - roc-auc_val: 0.9911686695215375 ***

Epoch 00001: val_acc improved from -inf to 0.95913, saving model to ./model-weights.01-0.959130.hdf5
Epoch 2/20
 *** ROC AUC Score: 0.9975708696101282 - roc-auc_val: 0.9932085685638221 ***

Epoch 00002: val_acc improved from 0.95913 to 0.98000, saving model to ./model-weights.02-0.980000.hdf5
Epoch 3/20
 *** ROC AUC Score: 0.9977542324081539 - roc-auc_val: 0.9908081062525623 ***

Epoch 00003: val_acc improved from 0.98000 to 0.98087, saving model to ./model-weights.03-0.980870.hdf5
Epoch 4/20
 *** ROC AUC Score: 0.9987938200726387 - roc-auc_val: 0.9887533895416896 ***

Epoch 00004: val_acc did not improve from 0.98087
Epoch 5/20
 *** ROC AUC Sc

<tensorflow.python.keras.callbacks.History at 0x7f5b5de5eba8>

### 정밀도, 재현율 및 f1 점수 측정 

In [16]:
def class_from_prob(x, operating_point=0.5):
    x[x >= operating_point] = 1 
    x[x < operating_point] = 0 
    return x 

In [19]:
y_prob_val = model.predict(data['val_X'])
y_hat_val = class_from_prob(y_prob_val)

print(classification_report(data['val_y'], y_hat_val))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98       933
           1       0.91      0.95      0.93       217

    accuracy                           0.97      1150
   macro avg       0.95      0.97      0.96      1150
weighted avg       0.97      0.97      0.97      1150



### 텐서보드 시각화

In [21]:
!tensorboard --logdir ./ch4_tb_log/ --port 6008

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.8.0 at http://localhost:6008/ (Press CTRL+C to quit)
W0221 21:13:07.305130 140413237593856 plugin_event_accumulator.py:319] Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
W0221 21:13:07.305858 140413237593856 plugin_event_accumulator.py:331] Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
W0221 21:13:07.316735 140413237593856 plugin_event_accumulator.py:319] Found more than one graph event per run, or there was