In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dropout, Flatten, Conv2D, Dense
from sklearn.preprocessing import MinMaxScaler as MMS, StandardScaler as SDS
from tensorflow.keras.callbacks import EarlyStopping 
import numpy as np

# 1. 데이터

In [2]:
dataset = load_boston()        
x = dataset.data                
y = dataset.target              

print("원본 데이터")
# print("x: ", x, "\ny: ", y)
print(x.shape, y.shape)


x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=3333)

# scaler = MMS()
scaler = SDS()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

print("split + scailing 데이터")
# print("x_test: ", x_test, "\nx_trian: ", x_train)
# print("y_test: ", y_test, "\ny_trian: ", y_train)
print(x_train.shape, x_test.shape)

# ---------- CNN 모델에 적용해보기 위해 4차원으로 변환 ----------- #
x_train = x_train.reshape(354, 13, 1, 1)
x_test = x_test.reshape(152, 13, 1, 1)


원본 데이터
(506, 13) (506,)
split + scailing 데이터
(354, 13) (152, 13)



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

# 2. 모델


In [3]:
model = Sequential()
model.add(Conv2D(32, (2,1), input_shape = (13, 1, 1), activation='relu'))
model.add(Dropout(0.5)) # 과적합 방지
model.add(Conv2D(16, (2,1), activation='relu'))
model.add(Dropout(0.3)) # 과적합 방지
model.add(Flatten())    # DNN모델에 적용하기 위해 2차원으로 변환
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2)) # 과적합 방지
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

# 3. 컴파일 및 훈련

In [4]:
model.compile(loss='mse', optimizer='adam') # one-hot encoding 하지 않아도 되는 데이터이므로 loss= sparse_categorical_crossentropy

ES = EarlyStopping(monitor='val_loss', mode='auto', patience=4, restore_best_weights=True) 
model.fit(x_train, y_train, epochs=64, batch_size=5, validation_split=0.2, callbacks = [ES], verbose=2) # verbose: 함수 수행시 발생하는 상세한 정보들을 표준 출력으로 자세히 내보낼 것인지

Epoch 1/64
57/57 - 1s - loss: 397.7469 - val_loss: 91.8751 - 643ms/epoch - 11ms/step
Epoch 2/64
57/57 - 0s - loss: 86.8427 - val_loss: 56.0173 - 89ms/epoch - 2ms/step
Epoch 3/64
57/57 - 0s - loss: 71.7245 - val_loss: 41.9752 - 97ms/epoch - 2ms/step
Epoch 4/64
57/57 - 0s - loss: 60.9937 - val_loss: 38.1709 - 93ms/epoch - 2ms/step
Epoch 5/64
57/57 - 0s - loss: 61.0833 - val_loss: 29.3963 - 87ms/epoch - 2ms/step
Epoch 6/64
57/57 - 0s - loss: 56.7961 - val_loss: 26.1245 - 101ms/epoch - 2ms/step
Epoch 7/64
57/57 - 0s - loss: 57.0135 - val_loss: 22.5400 - 91ms/epoch - 2ms/step
Epoch 8/64
57/57 - 0s - loss: 44.0047 - val_loss: 25.7890 - 88ms/epoch - 2ms/step
Epoch 9/64
57/57 - 0s - loss: 42.2128 - val_loss: 27.4385 - 89ms/epoch - 2ms/step
Epoch 10/64
57/57 - 0s - loss: 41.6357 - val_loss: 20.0170 - 101ms/epoch - 2ms/step
Epoch 11/64
57/57 - 0s - loss: 35.9649 - val_loss: 28.8277 - 101ms/epoch - 2ms/step
Epoch 12/64
57/57 - 0s - loss: 44.2535 - val_loss: 25.6705 - 102ms/epoch - 2ms/step
Epoch 

<keras.callbacks.History at 0x165cfc3caf0>

# 4. 평가 및 예측

In [7]:
loss = model.evaluate(x_test, y_test, verbose=2)
print('loss(mse): ', loss)

y_predict = model.predict(x_test)
print('x_test:\n', x_test[5])
print('y_test:\n', y_test[5])

print('y_predict:\n', y_predict[5])

RMSE = np.sqrt(mean_squared_error(y_test, y_predict))
print("RMSE: ", RMSE)

r2 = r2_score(y_test, y_predict)
print("R2: ", r2)

5/5 - 0s - loss: 15.3766 - 18ms/epoch - 4ms/step
loss(mse):  15.376649856567383
x_test:
 [[[-0.39848448]]

 [[-0.46923384]]

 [[-0.0909565 ]]

 [[-0.25742001]]

 [[-1.26668521]]

 [[-0.27176278]]

 [[-2.1768987 ]]

 [[ 0.7914537 ]]

 [[-0.63969222]]

 [[-0.634042  ]]

 [[ 0.36721029]]

 [[ 0.38567854]]

 [[-1.01949614]]]
y_test:
 22.8
y_predict:
 [24.729757]
RMSE:  3.92130711967423
R2:  0.7865315226026884
