# Regression Housing Pricing Project 3 for Fathers who want to buy a House:

# Predicting Housing Prices

# By Sequential API

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [2]:
# load dataset
dataframe = pd.read_csv("housing.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:13]
Y = dataset[:,13]

# Baseline Model

In [3]:
# define base model
def baseline_model():
    # create model
    model = models.Sequential()
    model.add(layers.Dense(13, activation = 'relu', input_shape = (13,)))
    model.add(layers.Dense(1))

    # Compile model
    model.compile(optimizer = 'adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model

In [4]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0)

In [5]:
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Baseline: -39.75 (26.24) MSE


# Baseline Model With Standarized Dataset

In [6]:
# evaluate model with standardized dataset
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -23.04 (20.65) MSE


# Larger Model

In [7]:
# define larger model
def larger_model():
    # create model
    model = models.Sequential()
    model.add(layers.Dense(13, activation = 'relu', input_shape = (13,)))
    model.add(layers.Dense(6, activation = 'relu'))
    model.add(layers.Dense(1))

    # Compile model
    model.compile(optimizer = 'Adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model

In [8]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=larger_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Larger: -21.47 (26.56) MSE


# Wider Model

In [9]:

# define wider model
def wider_model():
    # create model
    model = models.Sequential()
    model.add(layers.Dense(20, activation = 'relu', input_shape = (13,)))
    model.add(layers.Dense(1))

    # Compile model
    model.compile(optimizer = 'Adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model

In [10]:

np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))


Wider: -20.86 (22.31) MSE


# Overfit Model

In [11]:
# define overfit model
def overfit_model():
    # create model
    model = models.Sequential()
    model.add(layers.Dense(13, activation = 'relu', input_shape = (13,)))
    model.add(layers.Dense(13, activation = 'relu'))
    model.add(layers.Dense(13, activation = 'relu'))
    model.add(layers.Dense(6, activation = 'relu'))
    model.add(layers.Dense(1))

    # Compile model
    model.compile(optimizer = 'Adam',
                 loss = 'mse',
                 metrics = ['mae'])
    return model


In [12]:
np.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, epochs=200, batch_size=50, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Overfit: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Overfit: -32.23 (31.53) MSE


# Without Using Scikit-Learn


# K-Fold Cross Validation

In [13]:
kf = 10
num_val_samples = len(X) // kf
num_epochs = 50
all_scores = []

In [14]:
for i in range(kf):
    print('processing fold #', i)
    val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = Y[i * num_val_samples: (i + 1) * num_val_samples]
    partial_train_data = np.concatenate(
        [X[:i * num_val_samples],
         X[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [Y[:i * num_val_samples],
         Y[(i + 1) * num_val_samples:]],
        axis=0)
    
    model = baseline_model()
    
    model.compile(optimizer='adam',
    loss='mse',
              metrics=['mae'])
    model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=5, verbose=0)
    mse, mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(mse)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [15]:
all_scores

[20.94009468078613,
 24.616104736328126,
 23.956162567138673,
 85.4762548828125,
 84.03016723632813,
 96.83247039794922,
 51.52581756591797,
 84.35399375915527,
 37.207648010253905,
 13.615547409057617]

In [16]:
###### all_scores
print("K-Fold: %.2f (%.2f)" % (np.mean(all_scores), np.std(all_scores)))

K-Fold: 52.26 (30.65)
