## 피마 인디언 당뇨병 예측
- 최적 모델 찾기, 조기 종료

In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
seed = 2023
np.random.seed(seed)
tf.random.set_seed(seed)
import warnings
warnings.filterwarnings('ignore')

In [69]:
df = pd.read_csv('data/pima-indians-diabetes.csv', skiprows=9, header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


- 데이터 전처리

In [70]:
from sklearn.preprocessing import StandardScaler
X_scaled = StandardScaler().fit_transform(df.iloc[:,:-1].values)

In [71]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, df[8].values, stratify=df[8].values, test_size=0.2, random_state=seed
)

- 모델 정의/설정/학습/평가

In [72]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [73]:
model = Sequential([
    Dense(20, input_dim=8, activation='relu'),
    Dense(12, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_58 (Dense)            (None, 20)                180       
                                                                 
 dense_59 (Dense)            (None, 12)                252       
                                                                 
 dense_60 (Dense)            (None, 8)                 104       
                                                                 
 dense_61 (Dense)            (None, 1)                 9         
                                                                 
Total params: 545 (2.13 KB)
Trainable params: 545 (2.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


- Callback 사용

In [253]:
from keras.optimizers import Adam
from keras.layers import Dropout


In [278]:
model2 = Sequential([
    Dense(32, input_dim=8, activation='relu'),
    Dropout(0.2),
    Dense(24, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

optimizer = Adam(learning_rate=0.002)
model2.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [279]:
mc = ModelCheckpoint('models/pima_best.h5', monitor='val_loss',
                     verbose=0, save_best_only=True)
# best를 찾은 뒤 10회 epoch 동안 best가 나오지 않으면, 학습을 강제 종료함
es = EarlyStopping(monitor='val_loss', patience=10) 

In [280]:
hist2 = model2.fit(X_train, y_train, validation_split=0.2,
                   epochs=225, batch_size=105, verbose=0,
                   callbacks=[mc, es])

In [281]:
best_model = load_model('models/pima_best.h5')
best_model.evaluate(X_test, y_test)



[0.46384304761886597, 0.7922077775001526]