In [1]:
from flask import Flask, jsonify, render_template
import numpy as np
import datetime as dt
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from config import password
import pandas as pd

In [2]:
connection_string = f"postgres:{password}@perth-property-market.cptzycsh4y3w.ap-southeast-2.rds.amazonaws.com:5432/perth-property-market"

# Create the database engine (to the PostgreSQL database)
engine = create_engine(f'postgresql://{connection_string}')
conn = engine.connect()
session = Session(bind=engine)

In [3]:
Base = automap_base()

Base.prepare(engine, reflect=True)

market_data = Base.classes.perth_market

In [4]:
properties = session.query(
    market_data.price, market_data.bedrooms, market_data.bathrooms, market_data.land_area,
    market_data.floor_area, market_data.build_year, market_data.cbd_dist, market_data.year_sold).all()

In [5]:
session.close()


In [6]:
property_list = []
for row in properties:
    property = list(np.ravel(row))
    property_list.append(property)
    

df = pd.DataFrame(property_list, columns=["Price", "Bedrooms", "Bathrooms", "Land_Area", 
"Floor_Area", "Build_Year", "CBD_Dist", "Year_Sold"])

df.head(2)

Unnamed: 0,Price,Bedrooms,Bathrooms,Land_Area,Floor_Area,Build_Year,CBD_Dist,Year_Sold
0,565000,4,2,600,160,2003,18300,2018
1,365000,3,2,351,139,2013,26900,2019


In [13]:
# Round the sale price (i.e. $456000 -> 450000)
# df["Price"] = (round(df["Price"]/10000)*10000).astype("int")

# Support Vector Regression

In [7]:
X = df.drop(["Price"], axis=1)
y = df["Price"].values.reshape(-1, 1)
print(X.shape, y.shape)

(25683, 7) (25683, 1)


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [9]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data

X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)


In [10]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)


In [12]:
# Support vector machine rbf regression
from sklearn.svm import SVR
model_rbf = SVR(kernel="rbf")
model_rbf.fit(X_train_scaled, y_train_scaled)
# model_rbf.fit(X_train_scaled, np.ravel(y_train_scaled))

  return f(*args, **kwargs)


SVR()

In [13]:
print("Training Data Score: %.3f" % model_rbf.score(X_train_scaled, y_train_scaled))
print("Test Data Score: %.3f" % model_rbf.score(X_test_scaled, y_test_scaled))

Training Data Score: 0.687
Test Data Score: 0.658


In [14]:
# Support vector machine poly regression
from sklearn.svm import SVR
model_poly = SVR(kernel="poly")
model_poly.fit(X_train_scaled, y_train_scaled.ravel())

SVR(kernel='poly')

In [15]:
print("Training Data Score: %.3f" % model_poly.score(X_train_scaled, y_train_scaled))
print("Training Data Score: %.3f" % model_poly.score(X_test_scaled, y_test_scaled))

Training Data Score: 0.303
Training Data Score: 0.327


In [16]:
# Support vector machine linear regression
from sklearn.svm import SVR
model_linear = SVR(kernel="linear")
model_linear.fit(X_train_scaled, y_train_scaled.ravel())

SVR(kernel='linear')

In [17]:
print("Training Data Score: %.3f" % model_linear.score(X_train_scaled, y_train_scaled))
print("Training Data Score: %.3f" % model_linear.score(X_test_scaled, y_test_scaled))

Training Data Score: 0.531
Training Data Score: 0.556


## Save Model

In [15]:
# save the model to disk
import joblib
filename = '../models/price_model_supportVectorMachine_rbf.sav'
joblib.dump(model_rbf, filename)

['../models/price_model_supportVectorMachine_rbf.sav']

In [16]:
# Save x & y Scalars
xscaler_path = '../models/price_model_SVR_xscaler.sav'
yscaler_path = '../models/price_model_SVR_yscaler.sav'

joblib.dump(X_scaler, xscaler_path)
joblib.dump(y_scaler, yscaler_path) 

['../models/price_model_SVR_yscaler.sav']

In [17]:
# load the model from disk
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test_scaled, y_test_scaled)
print(result)

0.6578249597955634
