In [365]:
import pandas as pd
from datetime import datetime
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib
import time
import numpy as np
import os
%matplotlib inline
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [366]:
print(tf.__version__)
print(tf.config.list_physical_devices("GPU"))
print(tf.config.list_logical_devices("CPU"))

2.3.0
[]
[LogicalDevice(name='/device:CPU:0', device_type='CPU')]


In [367]:
DATASET_PATH = "./data/melb_data.csv"
EXCLUDE_COLUMNS = ['Address','Method','SellerG','Date','Postcode','CouncilArea','Lattitude','Longtitude']
TARGET_COLUMN_NAME = 'Price'

In [368]:
melbourne_data= pd.read_csv(DATASET_PATH) 

In [369]:
melbourne_data.columns

Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',
       'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',
       'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',
       'Longtitude', 'Regionname', 'Propertycount'],
      dtype='object')

In [370]:
melbourne_data = melbourne_data.drop(columns=EXCLUDE_COLUMNS)

In [371]:
target_column = melbourne_data.pop(TARGET_COLUMN_NAME)
target_column.head()

0    1480000.0
1    1035000.0
2    1465000.0
3     850000.0
4    1600000.0
Name: Price, dtype: float64

In [372]:
melbourne_data.head()

Unnamed: 0,Suburb,Rooms,Type,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Regionname,Propertycount
0,Abbotsford,2,h,2.5,2.0,1.0,1.0,202.0,,,Northern Metropolitan,4019.0
1,Abbotsford,2,h,2.5,2.0,1.0,0.0,156.0,79.0,1900.0,Northern Metropolitan,4019.0
2,Abbotsford,3,h,2.5,3.0,2.0,0.0,134.0,150.0,1900.0,Northern Metropolitan,4019.0
3,Abbotsford,3,h,2.5,3.0,2.0,1.0,94.0,,,Northern Metropolitan,4019.0
4,Abbotsford,4,h,2.5,3.0,1.0,2.0,120.0,142.0,2014.0,Northern Metropolitan,4019.0


In [373]:
# lower 
melbourne_data['Suburb']=melbourne_data['Suburb'].str.lower()
melbourne_data['Regionname']=melbourne_data['Regionname'].str.lower()

# covenrt hige numbers to small
numeric_features = melbourne_data.dtypes[melbourne_data.dtypes != 'object'].index
melbourne_data[numeric_features] = melbourne_data[numeric_features].apply(
    lambda x: (x - x.mean()) / (x.std()))
melbourne_data[numeric_features] = melbourne_data[numeric_features].fillna(-1)

In [374]:
melbourne_data.head()

Unnamed: 0,Suburb,Rooms,Type,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Regionname,Propertycount
0,abbotsford,-0.981427,h,-1.301437,-0.947,-0.772347,-0.633757,-0.089312,-1.0,-1.0,northern metropolitan,-0.784596
1,abbotsford,-0.981427,h,-1.301437,-0.947,-0.772347,-1.672574,-0.100839,-0.134872,-1.735382,northern metropolitan,-0.784596
2,abbotsford,0.064874,h,-1.301437,0.088281,0.673342,-1.672574,-0.106352,-0.003637,-1.735382,northern metropolitan,-0.784596
3,abbotsford,0.064874,h,-1.301437,0.088281,0.673342,-0.633757,-0.116375,-1.0,-1.0,northern metropolitan,-0.784596
4,abbotsford,1.111175,h,-1.301437,0.088281,-0.772347,0.40506,-0.10986,-0.018424,1.32307,northern metropolitan,-0.784596


In [375]:
melbourne_data = pd.get_dummies(melbourne_data, dummy_na=True) # add Nan and convert word to one hot dataframe column
melbourne_data.shape

(13580, 337)

In [376]:
melbourne_data.head()

Unnamed: 0,Rooms,Distance,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Propertycount,Suburb_abbotsford,...,Type_nan,Regionname_eastern metropolitan,Regionname_eastern victoria,Regionname_northern metropolitan,Regionname_northern victoria,Regionname_south-eastern metropolitan,Regionname_southern metropolitan,Regionname_western metropolitan,Regionname_western victoria,Regionname_nan
0,-0.981427,-1.301437,-0.947,-0.772347,-0.633757,-0.089312,-1.0,-1.0,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
1,-0.981427,-1.301437,-0.947,-0.772347,-1.672574,-0.100839,-0.134872,-1.735382,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
2,0.064874,-1.301437,0.088281,0.673342,-1.672574,-0.106352,-0.003637,-1.735382,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
3,0.064874,-1.301437,0.088281,0.673342,-0.633757,-0.116375,-1.0,-1.0,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0
4,1.111175,-1.301437,0.088281,-0.772347,0.40506,-0.10986,-0.018424,1.32307,-0.784596,1,...,0,0,0,1,0,0,0,0,0,0


In [377]:
melbourne_data[TARGET_COLUMN_NAME]=target_column

In [378]:
melbourne_data = melbourne_data.sample(frac=1)
train = melbourne_data.sample(frac=0.8,random_state=200)
target_train = train.pop('Price')
test = melbourne_data.drop(train.index)
target_test = test.pop('Price')

In [379]:
train_dataset = tf.data.Dataset.from_tensor_slices((train.values, target_train.values))
test_dataset = tf.data.Dataset.from_tensor_slices((test.values, target_test.values))

In [380]:
train_dataset = train_dataset.shuffle(len(train)).batch(1)
test_dataset = test_dataset.batch(1)

In [382]:
logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
checkpoint_path = "models/training__{epoch:02d}__{loss:.2f}/cp.ckpt"

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)


checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 save_best_only=True,
                                                 verbose=1)

def custom_loss_function(y_actual,y_predicted):
    loss = tf.keras.losses.MeanSquaredError()
    clipped_preds = tf.clip_by_value(y_predicted, 1, float('inf'))
    return tf.sqrt(tf.reduce_mean(loss(
        tf.math.log(y_actual), tf.math.log(clipped_preds))))


class MultiPerceptron(tf.keras.Model):

    def __init__(self):
        super(MultiPerceptron, self).__init__()
        self.dense1 = tf.keras.layers.Dense(train.shape[1],activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(50,activation=tf.nn.relu)
        self.dense3 = tf.keras.layers.Dense(50,activation=tf.nn.relu)
        self.dense4 = tf.keras.layers.Dense(1)
        self.dropout = tf.keras.layers.Dropout(0.5)

    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        if training:
            x = self.dropout(x, training=training)
        y = self.dense2(x)
        y2 = self.dense3(y)
        return self.dense4(y2)
    

In [383]:
# start_weights_path = 'training_1/cp.ckpt'
model = MultiPerceptron()
model.compile(optimizer='adam',
                loss=custom_loss_function,
                metrics=[custom_loss_function],
             )
# model.load_weights(start_weights_path)

In [None]:
with tf.device('/cpu:0'):
    model.fit(
        train_dataset, 
        epochs=6, 
        validation_data=(test.values,target_test.values),
        callbacks=[tensorboard_callback, cp_callback]
    )

In [385]:
res = model.predict(test_dataset)

In [386]:
average_diff = 0
for pred_price, orig_price in zip(res,target_test):
    average_diff = (average_diff + abs(orig_price - pred_price[0])) / 2
    print(f'Original price: {orig_price};    Prediciton price: {pred_price[0]}. \t Difference: {abs(orig_price-pred_price[0])}')
    
# print('\n')
print(f'Average loss: {average_diff}')

Original price: 855000.0;    Prediciton price: 1219204.625. 	 Difference: 364204.625
Original price: 710000.0;    Prediciton price: 790964.4375. 	 Difference: 80964.4375
Original price: 860000.0;    Prediciton price: 686031.125. 	 Difference: 173968.875
Original price: 500000.0;    Prediciton price: 545390.625. 	 Difference: 45390.625
Original price: 830000.0;    Prediciton price: 827184.6875. 	 Difference: 2815.3125
Original price: 1026000.0;    Prediciton price: 892150.25. 	 Difference: 133849.75
Original price: 420000.0;    Prediciton price: 479644.625. 	 Difference: 59644.625
Original price: 306000.0;    Prediciton price: 375396.8125. 	 Difference: 69396.8125
Original price: 1751000.0;    Prediciton price: 1513212.5. 	 Difference: 237787.5
Original price: 990000.0;    Prediciton price: 1234131.625. 	 Difference: 244131.625
Original price: 250000.0;    Prediciton price: 351311.15625. 	 Difference: 101311.15625
Original price: 1852000.0;    Prediciton price: 1606828.125. 	 Difference

Original price: 2825000.0;    Prediciton price: 2042145.75. 	 Difference: 782854.25
Original price: 857700.0;    Prediciton price: 735990.75. 	 Difference: 121709.25
Original price: 1302500.0;    Prediciton price: 1256155.75. 	 Difference: 46344.25
Original price: 568000.0;    Prediciton price: 581058.0625. 	 Difference: 13058.0625
Original price: 470500.0;    Prediciton price: 613888.1875. 	 Difference: 143388.1875
Original price: 450000.0;    Prediciton price: 399777.0625. 	 Difference: 50222.9375
Original price: 599000.0;    Prediciton price: 592812.0625. 	 Difference: 6187.9375
Original price: 1550000.0;    Prediciton price: 1527990.625. 	 Difference: 22009.375
Original price: 912000.0;    Prediciton price: 881264.125. 	 Difference: 30735.875
Original price: 899000.0;    Prediciton price: 689725.0. 	 Difference: 209275.0
Original price: 1905000.0;    Prediciton price: 1932336.25. 	 Difference: 27336.25
Original price: 680000.0;    Prediciton price: 715504.125. 	 Difference: 35504.1

In [387]:
%tensorboard --logdir logs