In [593]:
import pandas as pd
from datetime import datetime
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib
import time
import numpy as np
%matplotlib inline
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [594]:
tf.__version__

'2.3.0'

In [595]:
DATASET_PATH = "./data/melb_data.csv"

In [596]:
melbourne_data= pd.read_csv(DATASET_PATH) 

In [597]:
melbourne_data.columns

Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',
       'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',
       'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',
       'Longtitude', 'Regionname', 'Propertycount'],
      dtype='object')

In [598]:
EXCLUDE_COLUMNS = ['Address','Method','SellerG','Date','Postcode','CouncilArea','Lattitude','Longtitude']
melbourne_data = melbourne_data.drop(columns=EXCLUDE_COLUMNS)

In [599]:
target_column = melbourne_data.pop('Price')

In [600]:
melbourne_data.head()

Unnamed: 0,Suburb,Rooms,Type,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Regionname,Propertycount
0,Abbotsford,2,h,2.5,2.0,1.0,1.0,202.0,,,Northern Metropolitan,4019.0
1,Abbotsford,2,h,2.5,2.0,1.0,0.0,156.0,79.0,1900.0,Northern Metropolitan,4019.0
2,Abbotsford,3,h,2.5,3.0,2.0,0.0,134.0,150.0,1900.0,Northern Metropolitan,4019.0
3,Abbotsford,3,h,2.5,3.0,2.0,1.0,94.0,,,Northern Metropolitan,4019.0
4,Abbotsford,4,h,2.5,3.0,1.0,2.0,120.0,142.0,2014.0,Northern Metropolitan,4019.0


In [601]:
# lower 
melbourne_data['Suburb']=melbourne_data['Suburb'].str.lower()
melbourne_data['Regionname']=melbourne_data['Regionname'].str.lower()

numeric_features = melbourne_data.dtypes[melbourne_data.dtypes != 'object'].index
melbourne_data[numeric_features] = melbourne_data[numeric_features].apply(
    lambda x: (x - x.mean()) / (x.std()))
melbourne_data[numeric_features] = melbourne_data[numeric_features].fillna(-1)

In [602]:
melbourne_data.head()

Unnamed: 0,Suburb,Rooms,Type,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Regionname,Propertycount
0,abbotsford,-0.981427,h,-1.301437,-0.947,-0.772347,-0.633757,-0.089312,-1.0,-1.0,northern metropolitan,-0.784596
1,abbotsford,-0.981427,h,-1.301437,-0.947,-0.772347,-1.672574,-0.100839,-0.134872,-1.735382,northern metropolitan,-0.784596
2,abbotsford,0.064874,h,-1.301437,0.088281,0.673342,-1.672574,-0.106352,-0.003637,-1.735382,northern metropolitan,-0.784596
3,abbotsford,0.064874,h,-1.301437,0.088281,0.673342,-0.633757,-0.116375,-1.0,-1.0,northern metropolitan,-0.784596
4,abbotsford,1.111175,h,-1.301437,0.088281,-0.772347,0.40506,-0.10986,-0.018424,1.32307,northern metropolitan,-0.784596


In [603]:
melbourne_data = pd.get_dummies(melbourne_data, dummy_na=True)
melbourne_data.shape

(13580, 337)

In [604]:
melbourne_data.head()

Unnamed: 0,Rooms,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Propertycount,Suburb_abbotsford,...,Type_nan,Regionname_eastern metropolitan,Regionname_eastern victoria,Regionname_northern metropolitan,Regionname_northern victoria,Regionname_south-eastern metropolitan,Regionname_southern metropolitan,Regionname_western metropolitan,Regionname_western victoria,Regionname_nan
0,-0.981427,-1.301437,-0.947,-0.772347,-0.633757,-0.089312,-1.0,-1.0,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
1,-0.981427,-1.301437,-0.947,-0.772347,-1.672574,-0.100839,-0.134872,-1.735382,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
2,0.064874,-1.301437,0.088281,0.673342,-1.672574,-0.106352,-0.003637,-1.735382,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
3,0.064874,-1.301437,0.088281,0.673342,-0.633757,-0.116375,-1.0,-1.0,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
4,1.111175,-1.301437,0.088281,-0.772347,0.40506,-0.10986,-0.018424,1.32307,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0


In [605]:
melbourne_data["Price"]=target_column

In [606]:
melbourne_data = melbourne_data.sample(frac=1)
melbourne_data.head()

Unnamed: 0,Rooms,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Propertycount,Suburb_abbotsford,...,Regionname_eastern metropolitan,Regionname_eastern victoria,Regionname_northern metropolitan,Regionname_northern victoria,Regionname_south-eastern metropolitan,Regionname_southern metropolitan,Regionname_western metropolitan,Regionname_western victoria,Regionname_nan,Price
813,2.157476,0.641063,2.158844,0.673342,2.482694,0.014931,-0.01103,-0.393956,0.802676,0,...,0,0,0,0,0,1,0,0,0,950000.0
819,0.064874,0.641063,0.088281,0.673342,-0.633757,-0.051474,-1.0,-1.0,0.802676,0,...,0,0,0,0,0,1,0,0,0,800000.0
12794,1.111175,0.692182,1.123562,-0.772347,1.443877,0.043247,-0.012879,0.142615,1.350114,0,...,1,0,0,0,0,0,0,0,0,1335000.0
9585,1.111175,-0.210911,1.123562,0.673342,0.40506,-0.073776,-0.022121,0.94747,-1.042442,0,...,1,0,0,0,0,0,0,0,0,828000.0
8843,1.111175,0.658103,1.123562,-0.772347,-0.633757,0.033474,0.053663,-0.125671,-1.508118,0,...,0,0,1,0,0,0,0,0,0,650000.0


In [607]:
train = melbourne_data.sample(frac=0.8,random_state=200)
target_train = train.pop('Price')
test = melbourne_data.drop(train.index)
target_test = test.pop('Price')



In [608]:
train_dataset = tf.data.Dataset.from_tensor_slices((train.values, target_train.values))
test_dataset = tf.data.Dataset.from_tensor_slices((test.values, target_test.values))

In [609]:
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

loss = tf.keras.losses.MeanSquaredError()

def custom_loss_function(y_actual,y_predicted):
    clipped_preds = tf.clip_by_value(y_predicted, 1, float('inf'))
    return tf.sqrt(tf.reduce_mean(loss(
        tf.math.log(y_actual), tf.math.log(clipped_preds))))


class MultiPerceptron(tf.keras.Model):

    def __init__(self):
        super(MultiPerceptron, self).__init__()
        self.dense1 = tf.keras.layers.Dense(train.shape[1],activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(50,activation=tf.nn.relu)
        self.dense3 = tf.keras.layers.Dense(50,activation=tf.nn.relu)
        self.dense4 = tf.keras.layers.Dense(1)
        self.dropout = tf.keras.layers.Dropout(0.5)

    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        if training:
            x = self.dropout(x, training=training)
        y = self.dense2(x)
        y2 = self.dense3(y)
        return self.dense4(y2)

model = MultiPerceptron()
mod_loss = loss_wrapper()
model.compile(optimizer='adam',
                loss=custom_loss_function,
                metrics=['accuracy'],
             )


In [610]:
train_dataset = dataset.shuffle(len(train)).batch(1)

In [611]:
test_dataset = test_dataset.batch(1)

In [612]:
model.fit(train_dataset, epochs=12, callbacks=[tensorboard_callback])


Epoch 1/12


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<tensorflow.python.keras.callbacks.History at 0x7f8aacb93520>

In [615]:
res = model.predict(test_dataset)

In [622]:
average_diff = 0
for pred_price, orig_price in zip(res,target_test):
    average_diff = (average_diff + abs(orig_price - pred_price[0])) / 2
    print(f'Original price: {orig_price};    Prediciton price: {pred_price[0]}. \t Difference: {abs(orig_price-pred_price[0])}')
    
print('\n')
print(f'Average loss: {average_diff}')

Original price: 1335000.0;    Prediciton price: 1440786.625. 	 Difference: 105786.625
Original price: 828000.0;    Prediciton price: 945452.1875. 	 Difference: 117452.1875
Original price: 3225000.0;    Prediciton price: 2376892.25. 	 Difference: 848107.75
Original price: 636000.0;    Prediciton price: 592867.875. 	 Difference: 43132.125
Original price: 770000.0;    Prediciton price: 835180.3125. 	 Difference: 65180.3125
Original price: 695000.0;    Prediciton price: 605233.1875. 	 Difference: 89766.8125
Original price: 413000.0;    Prediciton price: 477943.15625. 	 Difference: 64943.15625
Original price: 585000.0;    Prediciton price: 444591.40625. 	 Difference: 140408.59375
Original price: 725000.0;    Prediciton price: 756047.9375. 	 Difference: 31047.9375
Original price: 925000.0;    Prediciton price: 874469.3125. 	 Difference: 50530.6875
Original price: 1770000.0;    Prediciton price: 1566675.875. 	 Difference: 203324.125
Original price: 591000.0;    Prediciton price: 500638.0625. 

Original price: 585000.0;    Prediciton price: 733194.875. 	 Difference: 148194.875
Original price: 440000.0;    Prediciton price: 464025.5625. 	 Difference: 24025.5625
Original price: 1190000.0;    Prediciton price: 1032738.25. 	 Difference: 157261.75
Original price: 1275000.0;    Prediciton price: 1339187.875. 	 Difference: 64187.875
Original price: 2375000.0;    Prediciton price: 2084532.125. 	 Difference: 290467.875
Original price: 885000.0;    Prediciton price: 912632.4375. 	 Difference: 27632.4375
Original price: 2690000.0;    Prediciton price: 2160005.75. 	 Difference: 529994.25
Original price: 390000.0;    Prediciton price: 405962.0. 	 Difference: 15962.0
Original price: 2750000.0;    Prediciton price: 2303204.0. 	 Difference: 446796.0
Original price: 815000.0;    Prediciton price: 866340.4375. 	 Difference: 51340.4375
Original price: 735000.0;    Prediciton price: 712675.25. 	 Difference: 22324.75
Original price: 722500.0;    Prediciton price: 512938.65625. 	 Difference: 20956

In [None]:
%tensorboard --logdir logs/scalars