In [26]:
# 导入需要用到的package
import numpy as np
import json
# 读入训练数据
datafile = './data/housing.data'
data = np.fromfile(datafile, sep=' ')

In [27]:
# 读入之后的数据被转化成1维array，其中array的第0-13项是第一条数据，第14-27项是第二条数据，以此类推.... 
# 这里对原始数据做reshape，变成N x 14的形式
feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS', 
                 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
feature_num = len(feature_names)
data = data.reshape([data.shape[0] // feature_num, feature_num])

In [18]:
# 查看数据
x = data[0]
print(x.shape)
print(x)

(14,)
[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00 2.400e+01]


In [19]:
ratio = 0.8
offset = int(data.shape[0] * ratio)
training_data = data[:offset]
training_data.shape

(404, 14)

### 数据归一化处理

对每个特征进行归一化处理，使得每个特征的取值缩放到0~1之间。这样做有两个好处：一是模型训练更高效；二是特征前的权重大小可以代表该变量对预测结果的贡献度（因为每个特征值本身的范围相同）。

In [20]:
# 计算train数据集的最大值，最小值，平均值
maximums, minimums, avgs, std = \
                     training_data.max(axis=0), \
                     training_data.min(axis=0), \
     training_data.sum(axis=0) / training_data.shape[0], \
         training_data.std(axis=0)
# 对数据进行归一化处理
for i in range(feature_num):
    #print(maximums[i], minimums[i], avgs[i])
    #data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])
    data[:, i] = (data[:, i] - avgs[i]) / std[i]

### 封装成load data函数

将上述几个数据处理操作封装成`load data`函数，以便下一步模型的调用，实现方法如下。

In [21]:
def load_data():
    # 从文件导入数据
    datafile = './work/housing.data'
    data = np.fromfile(datafile, sep=' ')

    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数
    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \
                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
    feature_num = len(feature_names)

    # 将原始数据进行Reshape，变成[N, 14]这样的形状
    data = data.reshape([data.shape[0] // feature_num, feature_num])

    # 将原数据集拆分成训练集和测试集
    # 这里使用80%的数据做训练，20%的数据做测试
    # 测试集和训练集必须是没有交集的
    ratio = 0.8
    offset = int(data.shape[0] * ratio)
    training_data = data[:offset]

    # 计算训练集的最大值，最小值，平均值
    maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \
                                 training_data.sum(axis=0) / training_data.shape[0]

    # 对数据进行归一化处理
    for i in range(feature_num):
        #print(maximums[i], minimums[i], avgs[i])
        data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])

    # 训练集和测试集的划分比例
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data, test_data

将上述计算预测输出的过程以“类和对象”的方式来描述，类成员变量有参数$w$和$b$。通过写一个`forward`函数（代表“前向计算”）完成上述从特征和参数到输出预测值的计算过程，代码如下所示。

In [22]:
class Network(object):
    def __init__(self, num_of_weights):
        np.random.seed(0) 
        self.w1 = np.random.randn(num_of_weights, 10)
        self.b1 = np.zeros(10)
        self.w2 = np.random.randn(10,1)
        self.b2 = np.zeros(1)
    
    def Relu(self,x):
        return np.where(x < 0,0,x)

    def forward(self, x):
        z1 = np.dot(x, self.w1) + self.b1
        z1 = self.Relu(z1)
        z = np.dot(z1, self.w2) + self.b2
        return z
        
    def loss(self, z, y):
        error = z - y
        num_samples = error.shape[0]
        cost = error * error
        cost = np.sum(cost) / num_samples
        return cost

    def gradient(self, x, y):
        z = self.forward(x)
        N = x.shape[0]
        gradient_w1 = 1. / N * np.sum((z - y) * x, axis = 0)
        gradient_w1 = gradient_w1[:, np.newaxis]
        gradient_b1 = 1. / N * np.sum(z - y)

        
        return gradient_w1, gradient_b1

    def updata(self, gradient_w1, gradient_b1, eta = 0.01):
        self.w1 = self.w1 + eta * gradient_w1
        self.b1 = self.b1 + eta * gradient_b1


    def train(self, training_data, num_epochs, batch_size = 10, eta = 0.01):
        n = len(training_data)
        losses = []
        for epoch_id in range(num_epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
            for iter_id, mini_batch in enumerate(mini_batches):
                x = mini_batch[:, :-1]
                y = mini_batch[:, -1:]
                a = self.forward(x)
                loss = self.loss(a, y)
                gradient_w1, gradient_b1 = self.gradient(x, y)
                self.updata(gradient_w1, gradient_b1, eta)
                losses.append(loss)
                print('epoch{:3d} / iter{:3d}, loss = {:.4f}'.format(epoch_id, iter_id, loss))
        return losses

基于Network类的定义，模型的计算过程如下所示。

In [23]:
def train():
    train_data, test_data = load_data()
    net = Network(13)
    losses = net.train(train_data, num_epochs = 50, batch_size = 100, eta = 0.1)
    plot_x = np.arange(len(losses))
    plot_y = np.array(losses)
    plt.plot(plot_x, plot_y)
    plt.show()

In [24]:
def plot_3D_neural_work_weight():

    training_data, test_data = load_data()
    x = training_data[:, :-1]
    y = training_data[:, -1:]

    net = Network(13)
    losses = []
    w5 = np.arange(-160.0, 160.0, 1.0)
    w9 = np.arange(-160.0, 160.0, 1.0)
    losses = np.zeros([len(w5), len(w9)])

    for i in range(len(w5)):
        for j in range(len(w9)):
            net.w1[5] = w5[i]
            net.w1[9] = w9[j]
            z = net.forward(x)
            loss = net.loss(z, y)
            losses[i, j] = loss

    fig = plt.figure()
    ax = Axes3D(fig)

    w5, w9 = np.meshgrid(w5, w9)

    ax.plot_surface(w5, w9, losses, rstride=1, cstride=1, cmap='rainbow')
    plt.show()


In [25]:
if __name__ == '__main__':
    plot_3D_neural_work_weight()
    train()

FileNotFoundError: [Errno 2] No such file or directory: './work/housing.data'