In [1]:
from flask import Flask, jsonify, render_template
import numpy as np
import datetime as dt
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from config import password
import pandas as pd
import joblib


In [32]:
connection_string = f"postgres:{password}@perth-property-market.cptzycsh4y3w.ap-southeast-2.rds.amazonaws.com:5432/perth-property-market"

# Create the database engine (to the PostgreSQL database)
engine = create_engine(f'postgresql://{connection_string}')
conn = engine.connect()
session = Session(bind=engine)

In [33]:
Base = automap_base()

Base.prepare(engine, reflect=True)

market_data = Base.classes.perth_market

In [34]:
properties = session.query(
    market_data.price, market_data.bedrooms, market_data.bathrooms, market_data.land_area,
    market_data.floor_area, market_data.build_year, market_data.cbd_dist, market_data.year_sold).all()

In [35]:
session.close()


In [36]:
property_list = []
for row in properties:
    property = list(np.ravel(row))
    property_list.append(property)
    

df = pd.DataFrame(property_list, columns=["Price", "Bedrooms", "Bathrooms", "Land_Area", 
"Floor_Area", "Build_Year", "CBD_Dist", "Year_Sold"])

df.head(2)

Unnamed: 0,Price,Bedrooms,Bathrooms,Land_Area,Floor_Area,Build_Year,CBD_Dist,Year_Sold
0,565000,4,2,600,160,2003,18300,2018
1,365000,3,2,351,139,2013,26900,2019


In [21]:
# Round the price values (i.e. $564000 -> 560000)
# df["Price"] = (round(df["Price"]/10000)*10000).astype("int")

# MLP Regression

In [37]:
X = df.drop(["Price"], axis=1)
y = df["Price"].values.reshape(-1, 1)

print(X.shape, y.shape)

(25683, 7) (25683, 1)


In [38]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [39]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)


In [40]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)


In [45]:
from sklearn.neural_network import MLPRegressor


regr = MLPRegressor().fit(X_train_scaled, y_train_scaled)


  return f(*args, **kwargs)


In [46]:
prediction = regr.predict(X_test_scaled)
prediction

array([-0.06830512, -0.2610633 ,  1.88552315, ..., -0.12133931,
       -0.89791091, -0.09358437])

### Inversing the transformation

In [47]:
y_scaler.inverse_transform(prediction)

array([ 610704.0435863 ,  542436.57601887, 1302674.18854467, ...,
        591921.39197417,  316889.86880671,  601751.11318915])

In [48]:
print(f"Training Data Score: {regr.score(X_train_scaled, y_train_scaled)}")
print(f"Testing Data Score: {regr.score(X_test_scaled, y_test_scaled)}")

Training Data Score: 0.7016568432515304
Testing Data Score: 0.6742998937255298


## Save Model

In [49]:
# save the model to disk
filename = '../models/price_model_MLPReg.sav'
joblib.dump(regr, filename)


['../models/price_model_MLPReg.sav']

In [19]:
# save the scaler to disk
xscaler_path = '../models/price_model_MLPReg_xscaler.sav'
yscaler_path = '../models/price_model_MLPReg_yscaler.sav'

joblib.dump(X_scaler, xscaler_path)
joblib.dump(y_scaler, yscaler_path)


['../models/price_model_MLPReg_yscaler.sav']

In [21]:
# load the model from disk
loaded_model = joblib.load(filename)
loaded_xscaler = joblib.load(xscaler_path)
loaded_yscaler = joblib.load(yscaler_path)


In [22]:
result = loaded_model.score(X_test_scaled, y_test_scaled)
print(result)

0.6780905173468472
