In [150]:
import pandas as pd
from datetime import datetime
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib
import time
import numpy as np
%matplotlib inline
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [151]:
tf.__version__

'2.3.0'

In [152]:
DATASET_PATH = "./data/melb_data.csv"
EXCLUDE_COLUMNS = ['Address','Method','SellerG','Date','Postcode','CouncilArea','Lattitude','Longtitude']
TARGET_COLUMN_NAME = 'Price'

In [153]:
melbourne_data= pd.read_csv(DATASET_PATH) 

In [154]:
melbourne_data.columns

Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',
       'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',
       'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',
       'Longtitude', 'Regionname', 'Propertycount'],
      dtype='object')

In [155]:
melbourne_data = melbourne_data.drop(columns=EXCLUDE_COLUMNS)

In [156]:
target_column = melbourne_data.pop(TARGET_COLUMN_NAME)

In [157]:
melbourne_data.head()

Unnamed: 0,Suburb,Rooms,Type,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Regionname,Propertycount
0,Abbotsford,2,h,2.5,2.0,1.0,1.0,202.0,,,Northern Metropolitan,4019.0
1,Abbotsford,2,h,2.5,2.0,1.0,0.0,156.0,79.0,1900.0,Northern Metropolitan,4019.0
2,Abbotsford,3,h,2.5,3.0,2.0,0.0,134.0,150.0,1900.0,Northern Metropolitan,4019.0
3,Abbotsford,3,h,2.5,3.0,2.0,1.0,94.0,,,Northern Metropolitan,4019.0
4,Abbotsford,4,h,2.5,3.0,1.0,2.0,120.0,142.0,2014.0,Northern Metropolitan,4019.0


In [158]:
# lower 
melbourne_data['Suburb']=melbourne_data['Suburb'].str.lower()
melbourne_data['Regionname']=melbourne_data['Regionname'].str.lower()

# covenrt hige numbers to small
numeric_features = melbourne_data.dtypes[melbourne_data.dtypes != 'object'].index
melbourne_data[numeric_features] = melbourne_data[numeric_features].apply(
    lambda x: (x - x.mean()) / (x.std()))
melbourne_data[numeric_features] = melbourne_data[numeric_features].fillna(-1)

In [159]:
melbourne_data.head()

Unnamed: 0,Suburb,Rooms,Type,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Regionname,Propertycount
0,abbotsford,-0.981427,h,-1.301437,-0.947,-0.772347,-0.633757,-0.089312,-1.0,-1.0,northern metropolitan,-0.784596
1,abbotsford,-0.981427,h,-1.301437,-0.947,-0.772347,-1.672574,-0.100839,-0.134872,-1.735382,northern metropolitan,-0.784596
2,abbotsford,0.064874,h,-1.301437,0.088281,0.673342,-1.672574,-0.106352,-0.003637,-1.735382,northern metropolitan,-0.784596
3,abbotsford,0.064874,h,-1.301437,0.088281,0.673342,-0.633757,-0.116375,-1.0,-1.0,northern metropolitan,-0.784596
4,abbotsford,1.111175,h,-1.301437,0.088281,-0.772347,0.40506,-0.10986,-0.018424,1.32307,northern metropolitan,-0.784596


In [160]:
melbourne_data = pd.get_dummies(melbourne_data, dummy_na=True) # add Nan and convert word to one hot dataframe column
melbourne_data.shape

(13580, 337)

In [161]:
melbourne_data.head()

Unnamed: 0,Rooms,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Propertycount,Suburb_abbotsford,...,Type_nan,Regionname_eastern metropolitan,Regionname_eastern victoria,Regionname_northern metropolitan,Regionname_northern victoria,Regionname_south-eastern metropolitan,Regionname_southern metropolitan,Regionname_western metropolitan,Regionname_western victoria,Regionname_nan
0,-0.981427,-1.301437,-0.947,-0.772347,-0.633757,-0.089312,-1.0,-1.0,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
1,-0.981427,-1.301437,-0.947,-0.772347,-1.672574,-0.100839,-0.134872,-1.735382,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
2,0.064874,-1.301437,0.088281,0.673342,-1.672574,-0.106352,-0.003637,-1.735382,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
3,0.064874,-1.301437,0.088281,0.673342,-0.633757,-0.116375,-1.0,-1.0,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
4,1.111175,-1.301437,0.088281,-0.772347,0.40506,-0.10986,-0.018424,1.32307,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0


In [162]:
melbourne_data[TARGET_COLUMN_NAME]=target_column

In [163]:
train = melbourne_data.sample(frac=0.8,random_state=200)
target_train = train.pop('Price')
test = melbourne_data.drop(train.index)
target_test = test.pop('Price')

In [164]:
train_dataset = tf.data.Dataset.from_tensor_slices((train.values, target_train.values))
test_dataset = tf.data.Dataset.from_tensor_slices((test.values, target_test.values))

In [174]:
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

def custom_loss_function(y_actual,y_predicted):
    loss = tf.keras.losses.MeanSquaredError()
    clipped_preds = tf.clip_by_value(y_predicted, 1, float('inf'))
    return tf.sqrt(tf.reduce_mean(loss(
        tf.math.log(y_actual), tf.math.log(clipped_preds))))


class MultiPerceptron(tf.keras.Model):

    def __init__(self):
        super(MultiPerceptron, self).__init__()
        self.dense1 = tf.keras.layers.Dense(train.shape[1],activation=tf.nn.sigmoid)
        self.dense2 = tf.keras.layers.Dense(50,activation=tf.nn.sigmoid)
        self.dense3 = tf.keras.layers.Dense(50,activation=tf.nn.sigmoid)
        self.dense4 = tf.keras.layers.Dense(1)
        self.dropout = tf.keras.layers.Dropout(0.5)

    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        if training:
            x = self.dropout(x, training=training)
        y = self.dense2(x)
        y2 = self.dense3(y)
        return self.dense4(y2)
   

model = MultiPerceptron()
model.compile(optimizer='adam',
                loss='mean_squared_logarithmic_error',
                metrics=['mean_squared_logarithmic_error'],
             )


In [166]:
train_dataset = train_dataset.shuffle(len(train)).batch(1)

In [167]:
test_dataset = test_dataset.batch(1)

In [175]:
model.fit(
    train_dataset, 
    epochs=10, 
    callbacks=[tensorboard_callback],
    validation_data=(test.values,target_test.values),
    use_multiprocessing=True
)


Epoch 1/10


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff5522cddf0>

In [169]:
res = model.predict(test_dataset)

In [173]:
average_diff = 0
for pred_price, orig_price in zip(res,target_test):
    average_diff = (average_diff + abs(orig_price - pred_price[0])) / 2
#     print(f'Original price: {orig_price};    Prediciton price: {pred_price[0]}. \t Difference: {abs(orig_price-pred_price[0])}')
    
# print('\n')
print(f'Average loss: {average_diff}')

Average loss: 94568.98212369798


In [172]:
%tensorboard --logdir logs/scalars

Reusing TensorBoard on port 6006 (pid 12733), started 0:03:37 ago. (Use '!kill 12733' to kill it.)