In [1]:
'''3_24 加载波士顿房价数据
2019_12_6
'''
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

Using TensorFlow backend.


In [2]:
train_data.shape

(404, 13)

In [3]:
test_data.shape

(102, 13)

In [4]:
test_data

array([[1.80846e+01, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        2.72500e+01, 2.90500e+01],
       [1.23290e-01, 0.00000e+00, 1.00100e+01, ..., 1.78000e+01,
        3.94950e+02, 1.62100e+01],
       [5.49700e-02, 0.00000e+00, 5.19000e+00, ..., 2.02000e+01,
        3.96900e+02, 9.74000e+00],
       ...,
       [1.83377e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
        3.89610e+02, 1.92000e+00],
       [3.58090e-01, 0.00000e+00, 6.20000e+00, ..., 1.74000e+01,
        3.91700e+02, 9.71000e+00],
       [2.92400e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
        2.40160e+02, 9.81000e+00]])

In [5]:
train_targets.shape

(404,)

In [6]:
'''3_25 数据标准化
2019_12_6
'''
#mean是平均值
mean = train_data.mean(axis=0)
train_data -= mean
#std是标准差
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [7]:
'''3_26 模型定义
2019_12_6
'''
from keras import models
from keras import layers

#因为需要将同一个模型多次实例化所以用一个函数来构建模型
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu',
                           input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu',))
    #网络的最后一层，没有激活，是一个线性层，这是标量回归的典型设置
    #添加输出函数会限制输出范围，最后一层是纯线性的，所以网络可以学会预测任意范围内的值
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop',
                  #这里使用的是mse损失函数(均方误差, mean squared error)，它是预测值与目标值之差的平方
                  loss='mse',
                  #mae:平均绝对误差，它是预测值与目标值之差的绝对值
                  metrics=['mae'])
    return model

In [8]:
'''3_27 K折验证
2019_12_6
'''

import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

for i in range(k):
    print('processing fold #', i)
    #准备验证数据:第k个分区的数据
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
    
    #准备训练数据:其他所有分区的数据
    #concatenate()参数axis=0 就是对0轴的数组对象进行纵向的拼接
    #axis=1就是横向的拼接
    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
         train_data[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
         train_targets[(i + 1) * num_val_samples:]],
        axis=0)
    
    #构建Keras模型(已编译)
    model = build_model()
    #训练模型(静默模式,verbose=0)
    model.fit(partial_train_data,
              partial_train_targets,
              epochs=num_epochs,
              batch_size=1,
              verbose=0)
    #在验证数据上评估模型
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)
    

processing fold # 0
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
processing fold # 1
processing fold # 2
processing fold # 3


In [10]:
all_scores

[2.0452951842015334,
 2.3342570455947724,
 2.8091593119177487,
 2.3743369785865935]

In [11]:
np.mean(all_scores)

2.390762130075162