In [1]:
from keras.datasets import boston_housing
from keras import models
from keras.layers import Dense
from keras import optimizers

In [13]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
print("train data shape : {}, test data shape : {}".format(train_data.shape, test_data.shape))

train data shape : (404, 13), test data shape : (102, 13)


In [14]:
train_targets[:10]

array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4])

In [17]:
# 정규화 ;  normalization
# 모든 feature의 값을 평균은 0, 표준편차는 1로 맞춰준다.
test_data = (test_data - train_data.mean(axis=0)) / train_data.std(axis=0)
train_data = (train_data - train_data.mean(axis=0)) / train_data.std(axis=0)

In [18]:
# model function 생성
# 동일한 모델을 여러번 생성할 거라 함수로 만들어준다.
# 샘플이 적기 때문에 64 units, 2 hidden-layers 구성
def build_model():
    model = models.Sequential()
    # 13 = train_shape[1]
    model.add(Dense(64, input_shape=(13,), activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

In [19]:
# k fold cross validation
import numpy as np
k = 4

num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

for i in range(k):
    val_data = train_data[i*num_val_samples : (i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples : (i+1)*num_val_samples]
    
    partial_train_data = np.concatenate([train_data[:i*num_val_samples],train_data[(i+1)*num_val_samples:]],axis=0)
    partial_train_targets = np.concatenate([train_targets[:i*num_val_samples],train_targets[(i+1)*num_val_samples:]],axis=0)
    
    model = build_model()
    model.fit(x=partial_train_data, y=partial_train_targets, epochs=num_epochs, batch_size=1, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

In [25]:
print(all_scores)
print(np.mean(all_scores))

[2.0406161570312955, 2.272351319246953, 2.9175872330618375, 2.278880035523141]
2.377358686215807


validation score가 2.04~2.9까지 폭이 크다. <br>
각각의 validation score보다 평균값이 더 신뢰할만한하다

In [None]:
# validation score를 로그에 저장하기
# epoch를 500으로 늘려 차이를 확인하자
k = 4

all_mae_histories = []

num_val_samples = len(train_data) // k
num_epochs = 500

for i in range(k):
    val_data = train_data[i*num_val_samples : (i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples : (i+1)*num_val_samples]
    
    partial_train_data = np.concatenate([train_data[:i*num_val_samples],train_data[(i+1)*num_val_samples:]],axis=0)
    partial_train_targets = np.concatenate([train_targets[:i*num_val_samples],train_targets[(i+1)*num_val_samples:]],axis=0)
    
    model = build_model()
    hist = model.fit(x=partial_train_data, y=partial_train_targets,
                     epochs=num_epochs, batch_size=1, verbose=0, 
                     validation_data=(val_data, val_targets))
    mae_history = history.history['val_mean_absolute_error']
    all_mae_histories.append(mae_history)