In [19]:
#Import neccesary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
import pandas as pd

# A. Prepare the dataset

In [2]:
#Fixing the csv file
#You only need to run this code block ONCE when you first downloaded the source dataset.
#Because (somehow) the original csv file is unusable, we need to do some formatting with simple read/write operations.
srcfile = open('../datasets/USA_Housing.csv','r+')
destfile = open('../datasets/USA_Housing_Fixed.csv','w')

srclist = srcfile.readlines()
for i in range(len(srclist)):
    if i == 0:
        continue
    if i % 2 != 0:
        destfile.write(srclist[i].rpartition(",\"")[0] + "\n")

In [3]:
dataset = np.genfromtxt('../datasets/USA_Housing_Fixed.csv',delimiter=',',dtype='float64')
dataset

array([[7.95454586e+04, 5.68286132e+00, 7.00918814e+00, 4.09000000e+00,
        2.30868005e+04, 1.05903356e+06],
       [7.92486425e+04, 6.00289981e+00, 6.73082102e+00, 3.09000000e+00,
        4.01730722e+04, 1.50589091e+06],
       [6.12870672e+04, 5.86588984e+00, 8.51272743e+00, 5.13000000e+00,
        3.68821594e+04, 1.05898799e+06],
       ...,
       [6.37132729e+04, 4.78748780e+00, 8.01791445e+00, 3.12000000e+00,
        4.25076117e+04, 1.03234687e+06],
       [6.30791724e+04, 6.38116562e+00, 8.99598369e+00, 3.05000000e+00,
        2.19407472e+04, 1.20111029e+06],
       [7.52631156e+04, 3.60740473e+00, 7.96045754e+00, 5.36000000e+00,
        3.08887505e+04, 1.18516086e+06]])

We split dataset to data and targets.

In [4]:
targets = dataset[:, 5]
dataset = np.delete(dataset, 5, 1)
dataset

array([[7.95454586e+04, 5.68286132e+00, 7.00918814e+00, 4.09000000e+00,
        2.30868005e+04],
       [7.92486425e+04, 6.00289981e+00, 6.73082102e+00, 3.09000000e+00,
        4.01730722e+04],
       [6.12870672e+04, 5.86588984e+00, 8.51272743e+00, 5.13000000e+00,
        3.68821594e+04],
       ...,
       [6.37132729e+04, 4.78748780e+00, 8.01791445e+00, 3.12000000e+00,
        4.25076117e+04],
       [6.30791724e+04, 6.38116562e+00, 8.99598369e+00, 3.05000000e+00,
        2.19407472e+04],
       [7.52631156e+04, 3.60740473e+00, 7.96045754e+00, 5.36000000e+00,
        3.08887505e+04]])

Next, we split the dataset into train and test data.

In [5]:
dataset_train, dataset_test, targets_train, targets_test = train_test_split(dataset, targets, test_size=0.2, random_state=0)

# B. Creating the model

Because our data values are increasingly large, we need to normalize it first.

In [83]:
#Layer to normalize training data
normalizer = tf.keras.layers.Normalization(axis=-1)

#Adapt the normalizer to data
normalizer.adapt(dataset_train)

print(normalizer.mean.numpy())

[[6.8615773e+04 5.9660344e+00 7.0001631e+00 3.9816835e+00 3.6168910e+04]]


Linear regression with multiple inputs.

In [51]:
linear_model = tf.keras.Sequential(
    [normalizer,
     tf.keras.layers.Dense(units=1)]
)

Let's do some prediction.

In [52]:
linear_model.predict(dataset_train[:10])



array([[-0.2097762 ],
       [ 0.27382514],
       [ 1.7428455 ],
       [-1.5083934 ],
       [-1.2699938 ],
       [ 0.12023443],
       [-0.58222455],
       [ 0.35414168],
       [-0.9193488 ],
       [ 0.61535835]], dtype=float32)

When you call a model, its weight matrices are built, you can check it through kernel.

In [53]:
linear_model.layers[1].kernel

<tf.Variable 'dense_9/kernel:0' shape=(5, 1) dtype=float32, numpy=
array([[-0.90284944],
       [ 0.191041  ],
       [-0.3287394 ],
       [ 0.25876045],
       [-0.5714843 ]], dtype=float32)>

Compile the model with <Model.compile> and train with <Model.fit> for 100 epochs.

In [54]:
linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error'
)

linear_model.fit(
    dataset_train,
    targets_train,
    epochs=100,
    validation_split=0.2
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x24c1c329a10>

In [55]:
test_results = {}
test_results['linear_model'] = linear_model.evaluate(dataset_test, targets_test)



Regression with a deep neural network.

In [101]:
dnn_model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(128, activation=tf.keras.activations.elu),
    tf.keras.layers.Dense(128, activation=tf.keras.activations.relu),
    tf.keras.layers.Dense(1)
])

dnn_model.compile(loss='mean_absolute_error',optimizer=tf.keras.optimizers.Adam(0.001))

Let's train the model.

In [102]:
errors = dnn_model.fit(dataset_train, targets_train, validation_split=0.2, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [99]:
test_results['dnn_model'] = dnn_model.evaluate(dataset_test, targets_test)



In [100]:
pd.DataFrame(test_results, index=['Mean absolute error ']).T

Unnamed: 0,Mean absolute error
linear_model,1223946.0
dnn_model,83469.2


In [88]:
test_predictions = dnn_model.predict(dataset_test)

test_predictions.flatten() - targets_test



array([-4.27180469e+04,  5.09120141e+04,  6.54148266e+04,  1.27115890e+05,
        2.39850217e+05,  8.58556551e+04, -1.52335699e+05,  8.28027108e+04,
       -9.22203601e+04,  3.29590995e+04,  1.18392783e+04,  9.21501395e+04,
        4.74883328e+03, -5.79757988e+04,  1.78821023e+05,  1.46221593e+04,
       -9.71651379e+04, -1.31417193e+05, -1.24705634e+04,  2.19014963e+05,
       -1.03927794e+05, -2.56763848e+05,  1.18968884e+05,  1.84988501e+05,
       -1.19523030e+04, -1.17463119e+05,  7.81239411e+04, -1.37696188e+05,
       -1.90663459e+05, -1.22979489e+05,  2.65419178e+04, -1.76145439e+05,
        1.48128882e+05,  4.02035763e+04, -8.97690755e+04,  1.35754173e+04,
       -4.70758303e+04, -1.71257474e+04,  3.32482181e+04,  1.45042317e+04,
       -3.18648876e+04, -9.08736031e+04,  8.44530665e+04, -4.38490838e+04,
        4.77438963e+04, -1.21078219e+05,  7.62003537e+04, -1.74295882e+05,
        9.10034292e+04,  1.22640021e+05,  2.12493611e+04,  3.74498102e+05,
       -6.22182199e+04,  