In [40]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dropout, Flatten, Conv2D, Dense
from sklearn.preprocessing import MinMaxScaler as MMS, StandardScaler as SDS
from tensorflow.keras.callbacks import EarlyStopping 
import numpy as np

# 1. 데이터

In [41]:
dataset = load_diabetes()       
x = dataset.data                
y = dataset.target              

print("원본 데이터")
# print("x: ", x, "\ny: ", y)
print(x.shape, y.shape)

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=3333)

# scaler = MMS()
scaler = SDS()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

print("split + scailing 데이터")
# print("x_test: ", x_test, "\nx_trian: ", x_train)
# print("y_test: ", y_test, "\ny_trian: ", y_train)
print(x_train.shape, x_test.shape)

# ---------- CNN 모델에 적용해보기 위해 4차원으로 변환 ----------- #
x_train = x_train.reshape(309, 10, 1, 1)
x_test = x_test.reshape(133, 10, 1, 1)


원본 데이터
(442, 10) (442,)
split + scailing 데이터
(309, 10) (133, 10)


# 2. 모델


In [42]:
model = Sequential()
model.add(Conv2D(32, (2,1), input_shape = (10, 1, 1), activation='relu'))
model.add(Dropout(0.5)) # 과적합 방지
model.add(Conv2D(16, (2,1), activation='relu'))
model.add(Dropout(0.3)) # 과적합 방지
model.add(Flatten())    # DNN모델에 적용하기 위해 2차원으로 변환
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2)) # 과적합 방지
model.add(Dense(16, activation='relu'))
model.add(Dense(1))

# 3. 컴파일 및 훈련

In [43]:
model.compile(loss='mse', optimizer='adam') # one-hot encoding 하지 않아도 되는 데이터이므로 loss= sparse_categorical_crossentropy

ES = EarlyStopping(monitor='val_loss', mode='auto', patience=4, restore_best_weights=True) 
model.fit(x_train, y_train, epochs=64, batch_size=1, validation_split=0.2, callbacks = [ES], verbose=2) # verbose: 함수 수행시 발생하는 상세한 정보들을 표준 출력으로 자세히 내보낼 것인지

Epoch 1/64
247/247 - 1s - loss: 15806.5029 - val_loss: 4912.2861 - 585ms/epoch - 2ms/step
Epoch 2/64
247/247 - 0s - loss: 5726.3120 - val_loss: 4334.8193 - 240ms/epoch - 972us/step
Epoch 3/64
247/247 - 0s - loss: 4813.2046 - val_loss: 3280.1272 - 267ms/epoch - 1ms/step
Epoch 4/64
247/247 - 0s - loss: 4470.4272 - val_loss: 2991.1970 - 228ms/epoch - 923us/step
Epoch 5/64
247/247 - 0s - loss: 4528.3081 - val_loss: 2792.0554 - 236ms/epoch - 955us/step
Epoch 6/64
247/247 - 0s - loss: 4496.5425 - val_loss: 3494.9568 - 232ms/epoch - 939us/step
Epoch 7/64
247/247 - 0s - loss: 4017.3801 - val_loss: 2349.0566 - 222ms/epoch - 899us/step
Epoch 8/64
247/247 - 0s - loss: 3832.0339 - val_loss: 2355.3784 - 231ms/epoch - 935us/step
Epoch 9/64
247/247 - 0s - loss: 4041.6638 - val_loss: 2566.6802 - 231ms/epoch - 935us/step
Epoch 10/64
247/247 - 0s - loss: 4175.0430 - val_loss: 2220.2268 - 233ms/epoch - 943us/step
Epoch 11/64
247/247 - 0s - loss: 4371.4673 - val_loss: 2309.9968 - 226ms/epoch - 916us/step


<keras.callbacks.History at 0x22664ae26d0>

# 4. 평가 및 예측

In [44]:
loss = model.evaluate(x_test, y_test, verbose=2)
print('loss(mse): ', loss)

y_predict = model.predict(x_test)
print('x_test:\n', x_test[5])
print('y_test:\n', y_test[5])

print('y_predict:\n', y_predict[5])

RMSE = np.sqrt(mean_squared_error(y_test, y_predict))
print("RMSE: ", RMSE)

r2 = r2_score(y_test, y_predict)
print("R2: ", r2)

5/5 - 0s - loss: 3088.6470 - 70ms/epoch - 14ms/step
loss(mse):  3088.64697265625
x_test:
 [[[-0.85919798]]

 [[-0.93419873]]

 [[-0.97595706]]

 [[-0.69258933]]

 [[-0.10625264]]

 [[-0.44073313]]

 [[ 1.76115174]]

 [[-0.77860724]]

 [[-1.34549314]]

 [[ 0.2283082 ]]]
y_test:
 128.0
y_predict:
 [81.55428]
RMSE:  55.57559781089035
R2:  0.43674579165863947
