In [39]:
# Provided: notebook bootstrapping
# Keras Models
import tensorflow as tf
import keras 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Conv2D, MaxPooling2D

# # Aditional Libs
import numpy as np
import os
import pandas as pd
import pickle
import time


import sys
module_path = os.path.abspath(os.path.join('..'))
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path) 

from warehouse import DataWarehouse

module_path = os.path.abspath(os.path.join('../..'))
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path) 
from db.etl import MultilayerPerceptronPipeline

pipe = MultilayerPerceptronPipeline()
schema_name = 'ml__MultiLayerPerceptron'


c:\Grace's Projects\ezproperty\db
c:\Grace's Projects\ezproperty
Connecting to 'localhost' with user 'root'
Database | Using database is3107g6.
Pipeline | ml__MultiLayerPerceptron   | Retreiving data from data lake with _id.
Pipeline | ml__MultiLayerPerceptron   | Loading 0 documents to data lake.
Pipeline | ml__MultiLayerPerceptron   | Load process to data warehouse failed.


In [40]:
db = DataWarehouse()

dataset = db.query('''
    SELECT district, floorRangeStart, floorRangeEnd, area, transactionDate, resale, price FROM main__PropertyTransaction
    LIMIT 5000
''')


# Data pre-processing to convert all to float and standardise magnitude
df = pd.DataFrame(dataset)
for column in df.columns:
    if column == "transactionDate":
        df[column] = pd.to_datetime(df[column])
        df[column] = (df[column].max() - df[column]) / np.timedelta64(1,'Y')
    if column == "price": # price is in millions
        df[column] = df[column].astype(float) / 1e6
    if column == "area": # area is in 100 square feet
        df[column] = df[column].astype(float) / 100
    else:
        df[column] = df[column].astype(float)
print(df.tail())
print(df.dtypes)

Connecting to 'localhost' with user 'root'
Database | Using database is3107g6.
Database | Query executed successfully.
      district  floorRangeStart  floorRangeEnd  area  transactionDate  resale  \
4995       3.0             16.0           20.0  0.76         0.999336     0.0   
4996       3.0             16.0           20.0  0.76         0.999336     0.0   
4997       3.0              6.0           10.0  1.40         0.999336     0.0   
4998       3.0             21.0           25.0  1.06         0.665311     0.0   
4999       3.0             26.0           30.0  1.06         0.665311     0.0   

      price  
4995  2.273  
4996  2.245  
4997  4.110  
4998  3.240  
4999  3.280  
district           float64
floorRangeStart    float64
floorRangeEnd      float64
area               float64
transactionDate    float64
resale             float64
price              float64
dtype: object


In [41]:
train = df.sample(frac=0.9,random_state=200)
test = df.drop(train.index)

X_train, Y_train = train[[column for column in df.columns if column != 'price']], train['price']
X_test, Y_test = test[[column for column in df.columns if column != 'price']], test['price']

print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

(4500, 6) (500, 6) (4500,) (500,)


In [42]:
# Build model
def build_mlp_model():
  model = keras.Sequential()
  model.add(Dense(6, activation='relu', input_shape=(6,)))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(1, activation='linear'))
  return model

mlp_model = build_mlp_model()
mlp_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 6)                 42        
                                                                 
 dense_11 (Dense)            (None, 64)                448       
                                                                 
 dense_12 (Dense)            (None, 64)                4160      
                                                                 
 dense_13 (Dense)            (None, 64)                4160      
                                                                 
 dense_14 (Dense)            (None, 1)                 65        
                                                                 
Total params: 8,875
Trainable params: 8,875
Non-trainable params: 0
_________________________________________________________________


In [43]:

# Compile the model
mlp_model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=0.001), loss='mae', metrics=['mae'])

In [44]:
mlp_history = mlp_model.fit(X_train, Y_train, epochs=10, batch_size=16)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [45]:
loss, acc = mlp_model.evaluate(X_test, Y_test)
print(X_test.shape)
print(f'test loss is {loss}')
print(f'test accuracy is {acc}')

(500, 6)
test loss is 0.46735242009162903
test accuracy is 0.46735242009162903


Saving to MongoDB

In [46]:
def save_model_to_db():

    #pickling the model
    pickled_model = pickle.dumps(mlp_model)
    
    # creating other attributes
    model = [{ "model": pickled_model, 'name': "MLP", 'created_time': time.time()}]
    pipe.dl_loader(model, schema_name)

save_model_to_db()

Pipeline | ml__MultiLayerPerceptron   | Loading 1 documents to data lake.


In [49]:
def load_from_db_and_predict(district, floorRangeStart, floorRangeEnd, area, transactionDate, resale):
    json_data = {}

    result = pipe.dl_getter('ml__MultiLayerPerceptron')
    model = pickle.loads(result[0]["model"])
    print(model)

    input_data  = np.array([[district, floorRangeStart, floorRangeEnd, area, transactionDate, resale]])
    pred_price = model.predict(input_data)
    print(pred_price)

load_from_db_and_predict(district, floorRangeStart, floorRangeEnd, area, transactionDate, resale)    

Pipeline | ml__MultiLayerPerceptron   | Retreiving data from data lake with _id.
Pipeline | ml__MultiLayerPerceptron   | Load success. Retrieved 1 documents.
<keras.engine.sequential.Sequential object at 0x0000019F851F4940>
[[2.1240528]]
