In [None]:
import os, sys
import keras
import numpy as np
import pandas as pd

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
(raw_train_data, train_targets), (raw_test_data, test_targets) =  keras.datasets.boston_housing.load_data()

mean = raw_train_data.mean(axis=0)
train_data = raw_train_data - mean
std = train_data.std(axis=0)
train_data /= std

test_data = raw_test_data - mean
test_data /= std

print(mean)
print(std)

In [None]:
bos = pd.DataFrame(raw_train_data)
bos.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] #boston.feature_names
print(bos.head())
print(bos.describe())

medv = pd.DataFrame(train_targets)
medv.columns = ['MEDV']
print(medv.head())
print(medv.describe())


In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(1))
model.compile(optimizer='rmsprop', loss='logcosh', metrics=['mape'])
model.summary()

In [None]:
history = model.fit(train_data, train_targets, epochs=300, batch_size=16, verbose=1, validation_data=(test_data, test_targets))

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

epochs = range(10, len(history.history['loss']) + 1)
plt.plot(epochs, history.history['loss'][9:], label='train loss')
plt.plot(epochs, history.history['val_loss'][9:], label='validation loss')
plt.legend()
plt.show()

plt.plot(epochs, history.history['mean_absolute_percentage_error'][9:], label='train MAPE')
plt.plot(epochs, history.history['val_mean_absolute_percentage_error'][9:], label='validation MAPE')
plt.legend()
plt.show()

In [None]:
# CRIM    - per capita crime rate by town",
# ZN      - proportion of residential land zoned for lots over 25,000 sq.ft.",
# INDUS   - proportion of non-retail business acres per town.",
# CHAS    - Charles River dummy variable (1 if tract bounds river; 0 otherwise)",
# NOX     - nitric oxides concentration (parts per 10 million)",
# RM      - average number of rooms per dwelling",
# AGE     - proportion of owner-occupied units built prior to 1940",
# DIS     - weighted distances to five Boston employment centres",
# RAD     - index of accessibility to radial highways",
# TAX     - full-value property-tax rate per $10,000",
# PTRATIO - pupil-teacher ratio by town",
# B       - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town",
# LSTAT   - % lower status of the population",
# 
# MEDV    - Median value of owner-occupied homes in $1000's"]
 
sample = 5
pred_input = np.expand_dims(test_data[sample], axis=0)
print(pred_input)

print('-' * 60)
print(" CRIM    ZN    INDUS   CHAS   NOX    RM    AGE      DIS   RAD    TAX     PTR     B     LSTAT ---   MEDV")
for i in range(13):
    print("{: 6.2f}".format(raw_test_data[sample][i]), end=' ')
print("    {: 6.2f}".format(test_targets[sample]))
print('-' * 60)
print("Predicted MEDV: {: 6.2f}".format(model.predict(pred_input)[0][0]))
print('-' * 60)


In [None]:
for i in range(1,10):
    new_test_data = np.array(raw_test_data[12])
    new_test_data[0] = 0.06 * 100.0 *i
    new_test_data -= mean
    new_test_data /= std
    
    pred_input = np.expand_dims(new_test_data, axis=0)
    res = model.predict(pred_input)[0][0]
    print("Crime ratio: {} Prediction MEDV {}".format(0.06*100.0*i, res))