In [9]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dropout, Flatten, Conv2D, Dense
from sklearn.preprocessing import MinMaxScaler as MMS, StandardScaler as SDS
from tensorflow.keras.callbacks import EarlyStopping 
import numpy as np

# 1. 데이터

In [10]:
dataset = fetch_california_housing()        
x = dataset.data                
y = dataset.target              

print("원본 데이터")
# print("x: ", x, "\ny: ", y)
print(x.shape, y.shape)


x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=3333)

# scaler = MMS()
scaler = SDS()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

print("split + scailing 데이터")
# print("x_test: ", x_test, "\nx_trian: ", x_train)
# print("y_test: ", y_test, "\ny_trian: ", y_train)
print(x_train.shape, x_test.shape)

# ---------- CNN 모델에 적용해보기 위해 4차원으로 변환 ----------- #
x_train = x_train.reshape(14447, 8, 1, 1)
x_test = x_test.reshape(6193, 8, 1, 1)


원본 데이터
(20640, 8) (20640,)
split + scailing 데이터
(14447, 8) (6193, 8)


# 2. 모델


In [11]:
model = Sequential()
model.add(Conv2D(32, (2,1), input_shape = (8, 1, 1), activation='relu'))
model.add(Dropout(0.5)) # 과적합 방지
model.add(Conv2D(16, (2,1), activation='relu'))
model.add(Dropout(0.3)) # 과적합 방지
model.add(Flatten())    # DNN모델에 적용하기 위해 2차원으로 변환
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2)) # 과적합 방지
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

# 3. 컴파일 및 훈련

In [12]:
model.compile(loss='mse', optimizer='adam') # one-hot encoding 하지 않아도 되는 데이터이므로 loss= sparse_categorical_crossentropy

ES = EarlyStopping(monitor='val_loss', mode='auto', patience=4, restore_best_weights=True) 
model.fit(x_train, y_train, epochs=256, batch_size=64, validation_split=0.2, callbacks = [ES], verbose=2) # verbose: 함수 수행시 발생하는 상세한 정보들을 표준 출력으로 자세히 내보낼 것인지

Epoch 1/256
181/181 - 1s - loss: 1.6582 - val_loss: 0.7799 - 798ms/epoch - 4ms/step
Epoch 2/256
181/181 - 0s - loss: 0.9385 - val_loss: 0.7231 - 371ms/epoch - 2ms/step
Epoch 3/256
181/181 - 0s - loss: 0.8210 - val_loss: 0.6833 - 370ms/epoch - 2ms/step
Epoch 4/256
181/181 - 0s - loss: 0.7521 - val_loss: 0.5664 - 364ms/epoch - 2ms/step
Epoch 5/256
181/181 - 0s - loss: 0.6888 - val_loss: 0.6134 - 367ms/epoch - 2ms/step
Epoch 6/256
181/181 - 0s - loss: 0.6810 - val_loss: 0.5314 - 371ms/epoch - 2ms/step
Epoch 7/256
181/181 - 0s - loss: 0.6585 - val_loss: 0.5403 - 364ms/epoch - 2ms/step
Epoch 8/256
181/181 - 0s - loss: 0.6156 - val_loss: 0.5003 - 368ms/epoch - 2ms/step
Epoch 9/256
181/181 - 0s - loss: 0.6132 - val_loss: 0.5013 - 360ms/epoch - 2ms/step
Epoch 10/256
181/181 - 0s - loss: 0.6033 - val_loss: 0.4866 - 364ms/epoch - 2ms/step
Epoch 11/256
181/181 - 0s - loss: 0.5731 - val_loss: 0.4599 - 362ms/epoch - 2ms/step
Epoch 12/256
181/181 - 0s - loss: 0.5603 - val_loss: 0.4731 - 373ms/epoch 

<keras.callbacks.History at 0x251f38becd0>

# 4. 평가 및 예측

In [13]:
loss = model.evaluate(x_test, y_test, verbose=2)
print('loss(mse): ', loss)

y_predict = model.predict(x_test)
print('x_test:\n', x_test[5])
print('y_test:\n', y_test[5])

print('y_predict:\n', y_predict[5])

RMSE = np.sqrt(mean_squared_error(y_test, y_predict))
print("RMSE: ", RMSE)

r2 = r2_score(y_test, y_predict)
print("R2: ", r2)

194/194 - 0s - loss: 0.3438 - 107ms/epoch - 551us/step
loss(mse):  0.3437986969947815
x_test:
 [[[-1.16390401]]

 [[-0.12999977]]

 [[-0.22882731]]

 [[-0.06416469]]

 [[-0.04198844]]

 [[ 0.08611436]]

 [[-0.1298398 ]]

 [[ 0.28999479]]]
y_test:
 0.498
y_predict:
 [0.79450804]
RMSE:  0.5863433865433628
R2:  0.744469385807191
