## Houseprice Prediction: Regression Problem

In [1]:
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/keras-datasets/boston_housing.npz


In [2]:
train_data.shape, train_targets.shape, test_data.shape, test_targets.shape

((404, 13), (404,), (102, 13), (102,))

#### 👇数据标准化：

In [4]:
mean = train_data.mean(axis=0) # 列平均
train_data -= mean
std = train_data.std(axis=0) # 列标准差
train_data /= std

test_data -= mean
test_data /= std

In [10]:
train_data.shape

(404, 13)

In [9]:
train_data[1].shape

(13,)

In [12]:
train_data.shape[1]

13

In [11]:
train_data.shape[1],

(13,)

#### 👇定义模型：

In [29]:
def build_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)),
        tf.keras.layers.Dense(1)])

    model.compile(optimizer='rmsprop',
                  loss='mse',      # mean squared error
                  metrics=['mae']) # mean absolute error
    return model

#### 👇K-fold Cross Validation：

In [19]:
import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

for i in range(k):
    print('processing fold #', i) # 准备validation set（第k个分区的数据）
    val_data = train_data[i * num_val_samples : (i+1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples : (i+1) * num_val_samples]
    
    partial_train_data = np.concatenate(
        [train_data[: i * num_val_samples],
         train_data[(i+1) * num_val_samples :]],
         axis=0) #列concatenate
    
    partial_train_targets = np.concatenate(
        [train_targets[: i * num_val_samples],
         train_targets[(i+1) * num_val_samples :]],
         axis=0) #列concatenate
    
    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
          epochs=num_epochs,
          batch_size=1,
          verbose=0) #静默模式

    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


#### 👇训练：

In [20]:
all_scores

[1.8758149, 2.1908677, 2.465655, 2.2012665]

In [21]:
np.mean(all_scores)

2.183401

#### 👇训练500个epoch，并保存每个fold的验证结果

In [33]:
num_epochs = 500
k = 4
num_val_samples = len(train_data) // k
all_mae_histories = []

for i in range(k):
    print('processing fold #', i)
    val_data = train_data[i*num_val_samples : (i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples : (i+1)*num_val_samples]
    
    partial_train_data = np.concatenate(
        [train_data[: i*num_val_samples],
         train_data[(i+1)*num_val_samples :]],
         axis=0)
    
    partial_train_targets = np.concatenate(
        [train_targets[: i*num_val_samples],
         train_targets[(i+1)*num_val_samples :]],
         axis=0)
    
    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
              epochs=num_epochs, 
              batch_size=1, 
              verbose=0)
    
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_mae_histories.append(val_mae)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


In [34]:
all_mae_histories

[2.1035893, 2.8346531, 2.9892159, 2.5115106]

In [40]:
np.mean(all_mae_histories)

2.6097422