# Boston Home Price Forecasting Through PaddlePaddle

In [34]:
import paddle
from paddle.nn import Linear
import paddle.nn.functional as F
import numpy as np
import os
import random

datafile = './housing.data'
data = np.fromfile(datafile, sep = ' ')
feature_names = ['CRIM', 'zn', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS',
                'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
ratio = 0.8

def load_data(data, feature_names, ratio):
    feature_num = len(feature_names)

    # reshape the data into [N, 14]
    data = data.reshape([data.shape[0] // feature_num, feature_num])

    # calculate the maximums, minimums, and averages for every column
    maximums = data.max(axis = 0)
    minimums = data.min(axis = 0)
    avgs = data.sum(axis = 0) / data.shape[0]

    print(data)
    # normalize the data into [0, 1]
    for i in range(feature_num):
        data[:, i] = (data[:, i] - minimums[i]) / (maximums[i] - minimums[i])

    # spilt the data into training data(80%) and testing data(20%)
    offset = int(data.shape[0] * ratio)
    train_data = data[: offset]
    test_data = data[offset : -1]

    return train_data, test_data, maximums, minimums, avgs

## Model Definition

Create a Python class to define your model's **initialization and forward function**.

In [35]:
class Regressor(paddle.nn.Layer):
    def __init__(self):
        super(Regressor, self).__init__()

        # define one fully connected layer, 
        # which has 13 input dimensions and 1 output dimensions
        self.fc = Linear(in_features = 13, out_features = 1)
    
    def forward(self, inputs):
        x = self.fc(inputs)
        return x

## Training Configurations

In [36]:
# declare the predefined model
model = Regressor()

# open the model training on
model.train()

# load_data
train_data, test_data, max_values, min_values, avg_values = \
            load_data(data, feature_names, ratio)

# define optimization algorithm(SGD)
# define the learning rate as 0.01
opt = paddle.optimizer.SGD(learning_rate = 0.01, parameters = model.parameters())

[[6.3200e-03 1.8000e+01 2.3100e+00 ... 3.9690e+02 4.9800e+00 2.4000e+01]
 [2.7310e-02 0.0000e+00 7.0700e+00 ... 3.9690e+02 9.1400e+00 2.1600e+01]
 [2.7290e-02 0.0000e+00 7.0700e+00 ... 3.9283e+02 4.0300e+00 3.4700e+01]
 ...
 [6.0760e-02 0.0000e+00 1.1930e+01 ... 3.9690e+02 5.6400e+00 2.3900e+01]
 [1.0959e-01 0.0000e+00 1.1930e+01 ... 3.9345e+02 6.4800e+00 2.2000e+01]
 [4.7410e-02 0.0000e+00 1.1930e+01 ... 3.9690e+02 7.8800e+00 1.1900e+01]]


## Training

In [37]:
# set epochs and batch size
EPOCH_NUM = 10
BATCH_SIZE = 10
n = len(train_data)

# 
for epoch_id in range(EPOCH_NUM):
    # random arrange the training data in every epoch
    np.random.shuffle(train_data)
    # spilt the training data in batch size
    mini_batches = [train_data[k : k + BATCH_SIZE] for k in range(0, n, BATCH_SIZE)]
    for iter_id, mini_batch in enumerate(mini_batches):
        x = mini_batch[:, : -1]
        y = mini_batch[:, -1 :]
        # convert numpy to tensor type
        house_features = paddle.to_tensor(x, dtype="float32")
        prices = paddle.to_tensor(y, dtype="float32")
        
        # 1. forward calculation
        predicts = model(house_features)
        # 2. loss
        loss = F.square_error_cost(predicts, label = prices)
        avg_loss = paddle.mean(loss)
        if iter_id % 20 == 0:
            print("epoch: {} / iter: {}, loss = {}".format(epoch_id, iter_id, float(avg_loss)))
        # 3. Backpropagation
        avg_loss.backward()
        # 4. update parameters
        opt.step()
        # 5. clear the gradient variables
        opt.clear_grad()
        

epoch: 0 / iter: 0, loss = 0.9022024869918823
epoch: 0 / iter: 20, loss = 0.27136388421058655
epoch: 0 / iter: 40, loss = 0.24334724247455597
epoch: 1 / iter: 0, loss = 0.11687825620174408
epoch: 1 / iter: 20, loss = 0.1603066474199295
epoch: 1 / iter: 40, loss = 0.046303536742925644
epoch: 2 / iter: 0, loss = 0.20476026833057404
epoch: 2 / iter: 20, loss = 0.08621034771203995
epoch: 2 / iter: 40, loss = 0.05824138969182968
epoch: 3 / iter: 0, loss = 0.11593504995107651
epoch: 3 / iter: 20, loss = 0.12232111394405365
epoch: 3 / iter: 40, loss = 0.10337256640195847
epoch: 4 / iter: 0, loss = 0.04896111041307449
epoch: 4 / iter: 20, loss = 0.09840662032365799
epoch: 4 / iter: 40, loss = 0.11095009744167328
epoch: 5 / iter: 0, loss = 0.015612234361469746
epoch: 5 / iter: 20, loss = 0.0559270866215229
epoch: 5 / iter: 40, loss = 0.02309376560151577
epoch: 6 / iter: 0, loss = 0.0636683851480484
epoch: 6 / iter: 20, loss = 0.047666728496551514
epoch: 6 / iter: 40, loss = 0.020763441920280457

# Save Model

In [38]:
# save model
paddle.save(model.state_dict(), 'LR_model.pdparams')
print("save Successfully in ./LR_model.pdparms")

save Successfully in ./LR_model.pdparms


# Test Model

In [39]:
def load_example(test_data):
    idx = np.random.randint(0, test_data.shape[0])
    one_data, label = test_data[idx, : -1], test_data[idx, -1]
    # reshape the selected testing data
    one_data = one_data.reshape([1, -1])

    return one_data, label

In [44]:
# load model
model_dict = paddle.load('LR_model.pdparams')
model.load_dict(model_dict)
# start evaluation
model.eval()

one_data, label = load_example(test_data)
# Varialbel mode in dynamic graph
one_data = paddle.to_tensor(one_data, dtype="float32")
predict = model(one_data)

# normalize the result
predict = predict * (max_values[-1] - min_values[-1]) + avg_values[-1]
label = label * (max_values[-1] - min_values[-1]) + avg_values[-1]

print("Inference result is {}, the corresponding label is {}".format(float(predict), label))

Inference result is 32.806671142578125, the corresponding label is 31.632806324110696
