In [None]:
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.wrappers.scikit_learn import KerasClassifier
from math import floor
from sklearn.metrics import make_scorer, accuracy_score, mean_squared_error, mean_absolute_error, r2_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)

**IMPORT & PREPARE DATA**

In [None]:
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

X_train = X_train.drop(columns=['Unnamed: 0'])
X_test = X_test.drop(columns=['Unnamed: 0'])
y_train = y_train.drop(columns=['Unnamed: 0'])
y_test = y_test.drop(columns=['Unnamed: 0'])

In [None]:
with open('scaler.pkl', 'rb') as f:
    scaler = pkl.load(f)

with open('selectkbest_f_regression.pkl', 'rb') as f:
    selectkbest_f_regression = pkl.load(f)

with open('selectkbest_mutual_info_regression.pkl', 'rb') as f:
    selectkbest_mutual_info_regression = pkl.load(f)

In [None]:
# Scale
X_train_scaled_arr = scaler.transform(X_train)
X_test_scaled_arr = scaler.transform(X_test)

# SelectKBest
X_train_f_reg_arr = selectkbest_f_regression.transform(X_train)
X_test_f_reg_arr = selectkbest_f_regression.transform(X_test)

# SelectKBest
X_train_mi_reg_arr = selectkbest_mutual_info_regression.transform(X_train)
X_test_mi_reg_arr = selectkbest_mutual_info_regression.transform(X_test)

In [None]:
columns_scaled = ['crime_rate_ranking', 'area', 'contractMonth', 'distance_from_nearest_stop', 'nearest_mrt_walking_time', 'nearest_stop_num', 'nearest_taxi_stand_walking_time', 'ave_level',
                  'contractYear', 'rating', 'marketSegment_CCR', 'marketSegment_OCR', 'marketSegment_RCR', 'propertyType_Apartment', 'propertyType_Condominium', 'propertyType_Detached',
                  'propertyType_Semi-detached', 'propertyType_Strata Detached', 'propertyType_Strata Semi-detached', 'propertyType_Strata Terrace', 'propertyType_Terrace', 'typeOfArea_Land',
                  'typeOfArea_Strata', 'nearest_mrt_BENCOOLEN MRT STATION (DT21)', 'nearest_mrt_BUGIS MRT STATION (DT14)', 'nearest_mrt_BUONA VISTA MRT STATION (CC22)', 'nearest_mrt_BUONA VISTA MRT STATION (EW21)',
                  'nearest_mrt_CHINATOWN MRT STATION (NE4)', 'nearest_mrt_CITY HALL MRT STATION (EW13 / NS25)', 'nearest_mrt_CLARKE QUAY MRT STATION (NE5)', 'nearest_mrt_CLEMENTI MRT STATION (EW23)',
                  'nearest_mrt_COMMONWEALTH MRT STATION (EW20)', 'nearest_mrt_DOWNTOWN MRT STATION (DT17)', 'nearest_mrt_ESPLANADE MRT STATION (CC3)', 'nearest_mrt_FORT CANNING MRT STATION (DT20)',
                  'nearest_mrt_GREAT WORLD MRT STATION (TE15)', 'nearest_mrt_HARBOURFRONT MRT STATION (NE1 / CC29)', 'nearest_mrt_HAVELOCK MRT STATION (TE16)', 'nearest_mrt_HAW PAR VILLA MRT STATION (CC25)',
                  'nearest_mrt_KENT RIDGE MRT STATION (CC24)', 'nearest_mrt_KOVAN MRT STATION (NE13)', 'nearest_mrt_LABRADOR PARK MRT STATION (CC27)', 'nearest_mrt_LAVENDER MRT STATION (EW11)',
                  'nearest_mrt_LITTLE INDIA MRT STATION (NE7)', 'nearest_mrt_MARINA BAY MRT STATION (NS27)', 'nearest_mrt_MARINA BAY MRT STATION (TE20)', 'nearest_mrt_MARINA SOUTH PIER MRT STATION (NS28)',
                  'nearest_mrt_MAXWELL MRT STATION (TE18)', 'nearest_mrt_NICOLL HIGHWAY MRT STATION (CC5)', 'nearest_mrt_ONE-NORTH MRT STATION (CC23)', 'nearest_mrt_OUTRAM PARK MRT STATION (EW16)',
                  'nearest_mrt_OUTRAM PARK MRT STATION (NE3)', 'nearest_mrt_PASIR PANJANG MRT STATION (CC26)', 'nearest_mrt_QUEENSTOWN MRT STATION (EW19)', 'nearest_mrt_REDHILL MRT STATION (EW18)',
                  'nearest_mrt_ROCHOR MRT STATION (DT13)', 'nearest_mrt_SHENTON WAY MRT STATION (TE19)', 'nearest_mrt_TANJONG PAGAR MRT STATION (EW15)', 'nearest_mrt_TELOK AYER MRT STATION (DT18)',
                  'nearest_mrt_TELOK BLANGAH MRT STATION (CC28)', 'nearest_mrt_TIONG BAHRU MRT STATION (EW17)', 'typeOfSale_1', 'typeOfSale_2', 'typeOfSale_3', 'district_1', 'district_2',
                  'district_3', 'district_4', 'district_5', 'district_6', 'district_7', 'tenure_cat_1', 'tenure_cat_2', 'tenure_cat_3']
X_train_scaled = pd.DataFrame(X_train_scaled_arr, columns=columns_scaled)
X_test_scaled = pd.DataFrame(X_test_scaled_arr, columns=columns_scaled)

# WITHOUT SCALED TRAINING DATA

**BUILD THE MODEL**

**Hyperparameter Tuning**

In [None]:
# Make scorer accuracy
score_acc = make_scorer(mean_squared_error)

In [None]:
def function1(neurons, activation, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['Adam', 'Adam'] #Fixing the optimizer to Adam
    optimizerD= {'Adam':Adam(lr=learning_rate)}
    activationL = ['relu', 'relu'] #Fixing the activation to relu
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons, input_dim=74, activation=activation))
        if normalization > 0.5:
            nn.add(BatchNormalization())
        for i in range(layers1):
            nn.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            nn.add(Dropout(dropout_rate, seed=2023))
        for i in range(layers2):
            nn.add(Dense(neurons, activation=activation))
        nn.add(Dense(1))
        nn.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
        return nn
    es = EarlyStopping(monitor='mse', mode='min', verbose=0, patience=20)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size, verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [None]:
params_nn1 ={
    'neurons': (10, 100),
    'activation':(0.01, 1),
    'optimizer':(0,1),
    'learning_rate':(0.01, 1),
    'batch_size':(200, 1000),
    'epochs':(20, 100),
    'layers1':(1,5),
    'layers2':(1,5),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_bo1 = BayesianOptimization(function1, params_nn1, random_state=2023)
nn_bo1.maximize(init_points=25, n_iter=4)

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m2.22e+13 [0m | [0m0.3288   [0m | [0m912.3    [0m | [0m0.5881   [0m | [0m0.03798  [0m | [0m31.31    [0m | [0m2.872    [0m | [0m1.088    [0m | [0m0.73     [0m | [0m57.19    [0m | [0m0.5449   [0m | [0m0.4564   [0m |
| [0m2        [0m | [0m2.22e+13 [0m | [0m0.5064   [0m | [0m515.6    [0m | [0m0.1512   [0m | [0m0.1083   [0m | [0m32.97    [0m | [0m2.352    [0m | [0m1.721    [0m | [0m0.3971   [0m | [0m13.21    [0m | [0m0.5649   [0m | [0m0.2035   [0m |
| [95m3        [0m | [95m2.224e+13[0m | [95m0.3274   [0m | [95m501.3    [0m | [95m0.1841   [0m | [95m0.03119  [0m | [95m56.39    [0m | [95m1.783 

**BEST PARAMS**

In [None]:
params_nn_ = nn_bo1.max['params']
learning_rate = params_nn_['learning_rate']
activationL = ['relu', 'relu']
params_nn_['activation'] = activationL[round(params_nn_['activation'])]
params_nn_['batch_size'] = round(params_nn_['batch_size'])
params_nn_['epochs'] = round(params_nn_['epochs'])
params_nn_['layers1'] = round(params_nn_['layers1'])
params_nn_['layers2'] = round(params_nn_['layers2'])
params_nn_['neurons'] = round(params_nn_['neurons'])
optimizerL = ['Adam', 'Adam']
optimizerD= {'Adam':Adam(lr=learning_rate)}
params_nn_['optimizer'] = optimizerD[optimizerL[round(params_nn_['optimizer'])]]
params_nn_

{'activation': 'relu',
 'batch_size': 501,
 'dropout': 0.1840541418275664,
 'dropout_rate': 0.031185550905878476,
 'epochs': 56,
 'layers1': 2,
 'layers2': 3,
 'learning_rate': 0.931226639688736,
 'neurons': 78,
 'normalization': 0.7707642376587938,
 'optimizer': <keras.optimizers.legacy.adam.Adam at 0x12183e51220>}

**MODEL**

In [None]:
model_1 = Sequential()
model_1.add(Dense(params_nn_['neurons'], input_dim=74, activation=params_nn_['activation']))
if params_nn_['normalization'] > 0.5:
    model_1.add(BatchNormalization())
for i in range(params_nn_['layers1']):
    model_1.add(Dense(params_nn_['neurons'], activation=params_nn_['activation']))
if params_nn_['dropout'] > 0.5:
    model_1.add(Dropout(params_nn_['dropout_rate'], seed=2023))
for i in range(params_nn_['layers2']):
    model_1.add(Dense(params_nn_['neurons'], activation=params_nn_['activation']))
model_1.add(Dense(1))
model_1.compile(loss='mse', optimizer='adam', metrics=['mse'])
    
model_1.fit(X_train, y_train, epochs=params_nn_['epochs'], batch_size=params_nn_['batch_size'],verbose=1)

Epoch 1/56
Epoch 2/56
Epoch 3/56
Epoch 4/56
Epoch 5/56
Epoch 6/56
Epoch 7/56
Epoch 8/56
Epoch 9/56
Epoch 10/56
Epoch 11/56
Epoch 12/56
Epoch 13/56
Epoch 14/56
Epoch 15/56
Epoch 16/56
Epoch 17/56
Epoch 18/56
Epoch 19/56
Epoch 20/56
Epoch 21/56
Epoch 22/56
Epoch 23/56
Epoch 24/56
Epoch 25/56
Epoch 26/56
Epoch 27/56
Epoch 28/56
Epoch 29/56
Epoch 30/56
Epoch 31/56
Epoch 32/56
Epoch 33/56
Epoch 34/56
Epoch 35/56
Epoch 36/56
Epoch 37/56
Epoch 38/56
Epoch 39/56
Epoch 40/56
Epoch 41/56
Epoch 42/56
Epoch 43/56
Epoch 44/56
Epoch 45/56
Epoch 46/56
Epoch 47/56
Epoch 48/56
Epoch 49/56
Epoch 50/56
Epoch 51/56
Epoch 52/56
Epoch 53/56
Epoch 54/56
Epoch 55/56
Epoch 56/56


<keras.callbacks.History at 0x121809cf160>

**EVALUATION**

In [None]:
# For Non-scaled training data
predicted_prices = model_1.predict(X_test)
mse = mean_squared_error(y_test, predicted_prices)

rmse = mean_squared_error(y_test, predicted_prices, squared=False)

# Calculate mean absolute error
mae = mean_absolute_error(y_test, predicted_prices)

# Calculate coefficient of determination (R-squared)
r2 = r2_score(y_test, predicted_prices)

print('Standard Deviation:', y_test.std())
print('Range:', y_test.max() - y_test.min())
print('Mean squared error:', mse)
print('Mean absolute error:', mae)
print('RMSE:', rmse)
print('R-squared:', r2)
print('Percentage of error compared to SD:', mae/y_test.std() * 100)

Standard Deviation: price    5.275527e+06
dtype: float64
Range: price    342535000
dtype: int64
Mean squared error: 594849691614.395
Mean absolute error: 554803.5541750158
RMSE: 771264.9944178687
R-squared: 0.9786219984129205
Percentage of error compared to SD: price    10.516552
dtype: float64


# **With SCALED training data**

**Hyperparameter Tuning**

In [None]:
def function2(neurons, activation, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['Adam', 'Adam'] #Fixing optimizer to Adam
    optimizerD= {'Adam':Adam(lr=learning_rate)}
    activationL = ['relu', 'relu'] #Fixing activation to relu
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons, input_dim=74, activation=activation))
        if normalization > 0.5:
            nn.add(BatchNormalization())
        for i in range(layers1):
            nn.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            nn.add(Dropout(dropout_rate, seed=2023))
        for i in range(layers2):
            nn.add(Dense(neurons, activation=activation))
        nn.add(Dense(1))
        nn.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
        return nn
    es = EarlyStopping(monitor='mse', mode='min', verbose=0, patience=20)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size, verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
    score = cross_val_score(nn, X_train_scaled, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [None]:
params_nn2 ={
    'neurons': (10, 100),
    'activation':(0.01, 1),
    'optimizer':(0,1),
    'learning_rate':(0.01, 1),
    'batch_size':(200, 1000),
    'epochs':(20, 100),
    'layers1':(1,5),
    'layers2':(1,5),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_bo2 = BayesianOptimization(function2, params_nn2, random_state=2023)
nn_bo2.maximize(init_points=25, n_iter=4)

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m2.222e+13[0m | [0m0.3288   [0m | [0m912.3    [0m | [0m0.5881   [0m | [0m0.03798  [0m | [0m31.31    [0m | [0m2.872    [0m | [0m1.088    [0m | [0m0.73     [0m | [0m57.19    [0m | [0m0.5449   [0m | [0m0.4564   [0m |
| [0m2        [0m | [0m2.22e+13 [0m | [0m0.5064   [0m | [0m515.6    [0m | [0m0.1512   [0m | [0m0.1083   [0m | [0m32.97    [0m | [0m2.352    [0m | [0m1.721    [0m | [0m0.3971   [0m | [0m13.21    [0m | [0m0.5649   [0m | [0m0.2035   [0m |
| [95m3        [0m | [95m2.224e+13[0m | [95m0.3274   [0m | [95m501.3    [0m | [95m0.1841   [0m | [95m0.03119  [0m | [95m56.39    [0m | [95m1.783 

**BEST PARAMS**

In [None]:
params_nn_2 = nn_bo2.max['params']
learning_rate = params_nn_2['learning_rate']
activationL = ['relu', 'relu']
params_nn_2['activation'] = activationL[round(params_nn_2['activation'])]
params_nn_2['batch_size'] = round(params_nn_2['batch_size'])
params_nn_2['epochs'] = round(params_nn_2['epochs'])
params_nn_2['layers1'] = round(params_nn_2['layers1'])
params_nn_2['layers2'] = round(params_nn_2['layers2'])
params_nn_2['neurons'] = round(params_nn_2['neurons'])
optimizerL = ['Adam', 'Adam']
optimizerD= {'Adam':Adam(lr=learning_rate)}
params_nn_2['optimizer'] = optimizerD[optimizerL[round(params_nn_2['optimizer'])]]
params_nn_2

{'activation': 'relu',
 'batch_size': 250,
 'dropout': 0.7314779358819499,
 'dropout_rate': 0.22015325553707069,
 'epochs': 67,
 'layers1': 4,
 'layers2': 4,
 'learning_rate': 0.7781105957058365,
 'neurons': 72,
 'normalization': 0.7978975925046432,
 'optimizer': <keras.optimizers.legacy.adam.Adam at 0x121fe925310>}

**MODEL**

In [None]:
model_2 = Sequential()
model_2.add(Dense(params_nn_2['neurons'], input_dim=74, activation=params_nn_2['activation']))
if params_nn_2['normalization'] > 0.5:
    model_2.add(BatchNormalization())
for i in range(params_nn_2['layers1']):
    model_2.add(Dense(params_nn_2['neurons'], activation=params_nn_2['activation']))
if params_nn_2['dropout'] > 0.5:
    model_2.add(Dropout(params_nn_2['dropout_rate'], seed=2023))
for i in range(params_nn_2['layers2']):
    model_2.add(Dense(params_nn_2['neurons'], activation=params_nn_2['activation']))
model_2.add(Dense(1))
model_2.compile(loss='mse', optimizer='adam', metrics=['mse'])

model_2.fit(X_train_scaled, y_train, epochs=params_nn_2['epochs'], batch_size=params_nn_2['batch_size'], verbose=1)

Epoch 1/67
Epoch 2/67
Epoch 3/67
Epoch 4/67
Epoch 5/67
Epoch 6/67
Epoch 7/67
Epoch 8/67
Epoch 9/67
Epoch 10/67
Epoch 11/67
Epoch 12/67
Epoch 13/67
Epoch 14/67
Epoch 15/67
Epoch 16/67
Epoch 17/67
Epoch 18/67
Epoch 19/67
Epoch 20/67
Epoch 21/67
Epoch 22/67
Epoch 23/67
Epoch 24/67
Epoch 25/67
Epoch 26/67
Epoch 27/67
Epoch 28/67
Epoch 29/67
Epoch 30/67
Epoch 31/67
Epoch 32/67
Epoch 33/67
Epoch 34/67
Epoch 35/67
Epoch 36/67
Epoch 37/67
Epoch 38/67
Epoch 39/67
Epoch 40/67
Epoch 41/67
Epoch 42/67
Epoch 43/67
Epoch 44/67
Epoch 45/67
Epoch 46/67
Epoch 47/67
Epoch 48/67
Epoch 49/67
Epoch 50/67
Epoch 51/67
Epoch 52/67
Epoch 53/67
Epoch 54/67
Epoch 55/67
Epoch 56/67
Epoch 57/67
Epoch 58/67
Epoch 59/67
Epoch 60/67
Epoch 61/67
Epoch 62/67
Epoch 63/67
Epoch 64/67
Epoch 65/67
Epoch 66/67
Epoch 67/67


<keras.callbacks.History at 0x121802d8c70>

**EVALUATION**

In [None]:
# For Non-scaled training data
predicted_prices2 = model_2.predict(X_test_scaled)
mse2 = mean_squared_error(y_test, predicted_prices2)

# Calculate mean absolute error
mae2 = mean_absolute_error(y_test, predicted_prices2)

rmse2 = mean_squared_error(y_test, predicted_prices2, squared=False)

# Calculate coefficient of determination (R-squared)
r2_2 = r2_score(y_test, predicted_prices2)

print('Standard Deviation:', y_test.std())
print('Range:', y_test.max() - y_test.min())
print('Mean squared error:', mse2)
print('Mean absolute error:', mae2)
print('RMSE:', rmse2)
print('R-squared:', r2_2)
print('Percentage of error compared to SD:', mae2/y_test.std() * 100)

Standard Deviation: price    5.275527e+06
dtype: float64
Range: price    342535000
dtype: int64
Mean squared error: 17739932846785.258
Mean absolute error: 449198.51814622444
RMSE: 4211879.965856726
R-squared: 0.36245354431636856
Percentage of error compared to SD: price    8.514761
dtype: float64


# With SelectKBest F Regression Training Data

In [None]:
X_train_f_reg_df = pd.DataFrame(X_train_f_reg_arr)
X_train_f_reg_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
0,10.0,53.0,0.209648,4.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
1,18.0,67.0,0.048992,4.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10.0,87.0,0.209648,4.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
3,6.0,141.0,0.197689,4.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,11.0,85.0,0.182874,4.8,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18957,11.0,128.0,0.350559,4.8,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
18958,19.0,46.0,0.066560,4.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
18959,19.0,71.0,0.140742,4.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
18960,17.0,63.0,0.087684,4.8,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


**Hyperparameter Tuning**

In [None]:
def function3(neurons, activation, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['Adam', 'Adam'] #Fixing optimizer to Adam
    optimizerD= {'Adam':Adam(lr=learning_rate)}
    activationL = ['relu', 'relu'] #Fixing activation to relu
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons, input_dim=21, activation=activation))
        if normalization > 0.5:
            nn.add(BatchNormalization())
        for i in range(layers1):
            nn.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            nn.add(Dropout(dropout_rate, seed=2023))
        for i in range(layers2):
            nn.add(Dense(neurons, activation=activation))
        nn.add(Dense(1))
        nn.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
        return nn
    es = EarlyStopping(monitor='mse', mode='min', verbose=0, patience=20)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size, verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
    score = cross_val_score(nn, X_train_f_reg_df, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [None]:
params_nn3 ={
    'neurons': (10, 100),
    'activation':(0.01, 1),
    'optimizer':(0,1),
    'learning_rate':(0.01, 1),
    'batch_size':(200, 1000),
    'epochs':(20, 100),
    'layers1':(1,5),
    'layers2':(1,5),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_bo3 = BayesianOptimization(function3, params_nn3, random_state=2023)
nn_bo3.maximize(init_points=25, n_iter=4)

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m2.224e+13[0m | [0m0.3288   [0m | [0m912.3    [0m | [0m0.5881   [0m | [0m0.03798  [0m | [0m31.31    [0m | [0m2.872    [0m | [0m1.088    [0m | [0m0.73     [0m | [0m57.19    [0m | [0m0.5449   [0m | [0m0.4564   [0m |
| [0m2        [0m | [0m2.22e+13 [0m | [0m0.5064   [0m | [0m515.6    [0m | [0m0.1512   [0m | [0m0.1083   [0m | [0m32.97    [0m | [0m2.352    [0m | [0m1.721    [0m | [0m0.3971   [0m | [0m13.21    [0m | [0m0.5649   [0m | [0m0.2035   [0m |
| [95m3        [0m | [95m2.224e+13[0m | [95m0.3274   [0m | [95m501.3    [0m | [95m0.1841   [0m | [95m0.03119  [0m | [95m56.39    [0m | [95m1.783 

**BEST PARAMS**

In [None]:
params_nn_3 = nn_bo3.max['params']
learning_rate = params_nn_3['learning_rate']
activationL = ['relu', 'relu']
params_nn_3['activation'] = activationL[round(params_nn_3['activation'])]
params_nn_3['batch_size'] = round(params_nn_3['batch_size'])
params_nn_3['epochs'] = round(params_nn_3['epochs'])
params_nn_3['layers1'] = round(params_nn_3['layers1'])
params_nn_3['layers2'] = round(params_nn_3['layers2'])
params_nn_3['neurons'] = round(params_nn_3['neurons'])
optimizerL = ['Adam', 'Adam']
optimizerD= {'Adam':Adam(lr=learning_rate)}
params_nn_3['optimizer'] = optimizerD[optimizerL[round(params_nn_3['optimizer'])]]
params_nn_3

{'activation': 'relu',
 'batch_size': 501,
 'dropout': 0.1840541418275664,
 'dropout_rate': 0.031185550905878476,
 'epochs': 56,
 'layers1': 2,
 'layers2': 3,
 'learning_rate': 0.931226639688736,
 'neurons': 78,
 'normalization': 0.7707642376587938,
 'optimizer': <keras.optimizers.legacy.adam.Adam at 0x121806ac520>}

**MODEL**

In [None]:
model_3 = Sequential()
model_3.add(Dense(params_nn_3['neurons'], input_dim=21, activation=params_nn_3['activation']))
if params_nn_3['normalization'] > 0.5:
    model_3.add(BatchNormalization())
for i in range(params_nn_3['layers1']):
    model_3.add(Dense(params_nn_3['neurons'], activation=params_nn_3['activation']))
if params_nn_3['dropout'] > 0.5:
    model_3.add(Dropout(params_nn_3['dropout_rate'], seed=2023))
for i in range(params_nn_3['layers2']):
    model_3.add(Dense(params_nn_3['neurons'], activation=params_nn_3['activation']))
model_3.add(Dense(1))
model_3.compile(loss='mse', optimizer='adam', metrics=['mse'])
    
model_3.fit(X_train_f_reg_df, y_train, epochs=params_nn_3['epochs'], batch_size=params_nn_3['batch_size'],verbose=1)

Epoch 1/56
Epoch 2/56
Epoch 3/56
Epoch 4/56
Epoch 5/56
Epoch 6/56
Epoch 7/56
Epoch 8/56
Epoch 9/56
Epoch 10/56
Epoch 11/56
Epoch 12/56
Epoch 13/56
Epoch 14/56
Epoch 15/56
Epoch 16/56
Epoch 17/56
Epoch 18/56
Epoch 19/56
Epoch 20/56
Epoch 21/56
Epoch 22/56
Epoch 23/56
Epoch 24/56
Epoch 25/56
Epoch 26/56
Epoch 27/56
Epoch 28/56
Epoch 29/56
Epoch 30/56
Epoch 31/56
Epoch 32/56
Epoch 33/56
Epoch 34/56
Epoch 35/56
Epoch 36/56
Epoch 37/56
Epoch 38/56
Epoch 39/56
Epoch 40/56
Epoch 41/56
Epoch 42/56
Epoch 43/56
Epoch 44/56
Epoch 45/56
Epoch 46/56
Epoch 47/56
Epoch 48/56
Epoch 49/56
Epoch 50/56
Epoch 51/56
Epoch 52/56
Epoch 53/56
Epoch 54/56
Epoch 55/56
Epoch 56/56


<keras.callbacks.History at 0x12180951cd0>

**EVALUATION**

In [None]:
# For Non-scaled training data
X_test_f_reg_df = pd.DataFrame(X_test_f_reg_arr)
predicted_prices3 = model_3.predict(X_test_f_reg_df)
mse3 = mean_squared_error(y_test, predicted_prices3)

rmse3 = mean_squared_error(y_test, predicted_prices3, squared=False)

# Calculate mean absolute error
mae3 = mean_absolute_error(y_test, predicted_prices3)

# Calculate coefficient of determination (R-squared)
r2_3 = r2_score(y_test, predicted_prices3)

print('Standard Deviation:', y_test.std())
print('Range:', y_test.max() - y_test.min())
print('Mean squared error:', mse3)
print('Mean absolute error:', mae3)
print('RMSE:', rmse3)
print('R-squared:', r2_3)
print('Percentage of error compared to SD:', mae3/y_test.std() * 100)

Standard Deviation: price    5.275527e+06
dtype: float64
Range: price    342535000
dtype: int64
Mean squared error: 900300898706.1031
Mean absolute error: 459639.27273386414
RMSE: 948841.8723402247
R-squared: 0.9676445422890719
Percentage of error compared to SD: price    8.71267
dtype: float64


# With SelectKBest Mutual Info Regression Training Data

In [None]:
X_train_mi_reg_df = pd.DataFrame(X_train_mi_reg_arr)
X_train_mi_reg_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
0,10.0,53.0,6.0,0.209648,52.000000,18041.0,41.000000,26.0,21.0,4.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
1,18.0,67.0,3.0,0.048992,14.022488,1541.0,13.027174,16.0,21.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10.0,87.0,1.0,0.209648,14.022488,18041.0,13.027174,46.0,21.0,4.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
3,6.0,141.0,5.0,0.197689,14.022488,3579.0,2.000000,56.0,18.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,11.0,85.0,4.0,0.182874,5.000000,10121.0,13.027174,26.0,22.0,4.8,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18957,11.0,128.0,9.0,0.350559,14.022488,14151.0,13.027174,6.0,19.0,4.8,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0
18958,19.0,46.0,5.0,0.066560,14.022488,17081.0,13.027174,6.0,22.0,4.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0
18959,19.0,71.0,12.0,0.140742,14.022488,17131.0,13.027174,46.0,20.0,4.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
18960,17.0,63.0,2.0,0.087684,14.022488,6031.0,13.027174,56.0,23.0,4.8,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0


**Hyperparameter Tuning**

In [None]:
def function4(neurons, activation, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['Adam', 'Adam'] #Fixing optimizer to Adam
    optimizerD= {'Adam':Adam(lr=learning_rate)}
    activationL = ['relu', 'relu'] #Fixing activation to relu
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    def nn_cl_fun():
        nn = Sequential()
        nn.add(Dense(neurons, input_dim=21, activation=activation))
        if normalization > 0.5:
            nn.add(BatchNormalization())
        for i in range(layers1):
            nn.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            nn.add(Dropout(dropout_rate, seed=2023))
        for i in range(layers2):
            nn.add(Dense(neurons, activation=activation))
        nn.add(Dense(1))
        nn.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
        return nn
    es = EarlyStopping(monitor='mse', mode='min', verbose=0, patience=20)
    nn = KerasClassifier(build_fn=nn_cl_fun, epochs=epochs, batch_size=batch_size, verbose=0)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
    score = cross_val_score(nn, X_train_mi_reg_df, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [None]:
params_nn4 ={
    'neurons': (10, 100),
    'activation':(0.01, 1),
    'optimizer':(0,1),
    'learning_rate':(0.01, 1),
    'batch_size':(200, 1000),
    'epochs':(20, 100),
    'layers1':(1,5),
    'layers2':(1,5),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_bo4 = BayesianOptimization(function4, params_nn4, random_state=2023)
nn_bo4.maximize(init_points=25, n_iter=4)

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m2.223e+13[0m | [0m0.3288   [0m | [0m912.3    [0m | [0m0.5881   [0m | [0m0.03798  [0m | [0m31.31    [0m | [0m2.872    [0m | [0m1.088    [0m | [0m0.73     [0m | [0m57.19    [0m | [0m0.5449   [0m | [0m0.4564   [0m |
| [0m2        [0m | [0m2.22e+13 [0m | [0m0.5064   [0m | [0m515.6    [0m | [0m0.1512   [0m | [0m0.1083   [0m | [0m32.97    [0m | [0m2.352    [0m | [0m1.721    [0m | [0m0.3971   [0m | [0m13.21    [0m | [0m0.5649   [0m | [0m0.2035   [0m |
| [95m3        [0m | [95m2.224e+13[0m | [95m0.3274   [0m | [95m501.3    [0m | [95m0.1841   [0m | [95m0.03119  [0m | [95m56.39    [0m | [95m1.783 

**BEST PARAMS**

In [None]:
params_nn_4 = nn_bo4.max['params']
learning_rate = params_nn_4['learning_rate']
activationL = ['relu', 'relu']
params_nn_4['activation'] = activationL[round(params_nn_4['activation'])]
params_nn_4['batch_size'] = round(params_nn_4['batch_size'])
params_nn_4['epochs'] = round(params_nn_4['epochs'])
params_nn_4['layers1'] = round(params_nn_4['layers1'])
params_nn_4['layers2'] = round(params_nn_4['layers2'])
params_nn_4['neurons'] = round(params_nn_4['neurons'])
optimizerL = ['Adam', 'Adam']
optimizerD= {'Adam':Adam(lr=learning_rate)}
params_nn_4['optimizer'] = optimizerD[optimizerL[round(params_nn_4['optimizer'])]]
params_nn_4

{'activation': 'relu',
 'batch_size': 978,
 'dropout': 0.41131105432155857,
 'dropout_rate': 0.21649932098793595,
 'epochs': 73,
 'layers1': 2,
 'layers2': 2,
 'learning_rate': 0.732481446371382,
 'neurons': 88,
 'normalization': 0.39172036421755163,
 'optimizer': <keras.optimizers.legacy.adam.Adam at 0x121871249a0>}

**MODEL**

In [None]:
model_4 = Sequential()
model_4.add(Dense(params_nn_4['neurons'], input_dim=21, activation=params_nn_4['activation']))
if params_nn_4['normalization'] > 0.5:
    model_4.add(BatchNormalization())
for i in range(params_nn_4['layers1']):
    model_4.add(Dense(params_nn_4['neurons'], activation=params_nn_4['activation']))
if params_nn_4['dropout'] > 0.5:
    model_4.add(Dropout(params_nn_4['dropout_rate'], seed=2023))
for i in range(params_nn_4['layers2']):
    model_4.add(Dense(params_nn_4['neurons'], activation=params_nn_4['activation']))
model_4.add(Dense(1))
model_4.compile(loss='mse', optimizer='adam', metrics=['mse'])
    
model_4.fit(X_train_mi_reg_df, y_train, epochs=params_nn_4['epochs'], batch_size=params_nn_4['batch_size'],verbose=1)

Epoch 1/73
Epoch 2/73
Epoch 3/73
Epoch 4/73
Epoch 5/73
Epoch 6/73
Epoch 7/73
Epoch 8/73
Epoch 9/73
Epoch 10/73
Epoch 11/73
Epoch 12/73
Epoch 13/73
Epoch 14/73
Epoch 15/73
Epoch 16/73
Epoch 17/73
Epoch 18/73
Epoch 19/73
Epoch 20/73
Epoch 21/73
Epoch 22/73
Epoch 23/73
Epoch 24/73
Epoch 25/73
Epoch 26/73
Epoch 27/73
Epoch 28/73
Epoch 29/73
Epoch 30/73
Epoch 31/73
Epoch 32/73
Epoch 33/73
Epoch 34/73
Epoch 35/73
Epoch 36/73
Epoch 37/73
Epoch 38/73
Epoch 39/73
Epoch 40/73
Epoch 41/73
Epoch 42/73
Epoch 43/73
Epoch 44/73
Epoch 45/73
Epoch 46/73
Epoch 47/73
Epoch 48/73
Epoch 49/73
Epoch 50/73
Epoch 51/73
Epoch 52/73
Epoch 53/73
Epoch 54/73
Epoch 55/73
Epoch 56/73
Epoch 57/73
Epoch 58/73
Epoch 59/73
Epoch 60/73
Epoch 61/73
Epoch 62/73
Epoch 63/73
Epoch 64/73
Epoch 65/73
Epoch 66/73
Epoch 67/73
Epoch 68/73
Epoch 69/73
Epoch 70/73
Epoch 71/73
Epoch 72/73
Epoch 73/73


<keras.callbacks.History at 0x12191043220>

**EVALUATION**

In [None]:
X_test_mi_reg_df = pd.DataFrame(X_test_mi_reg_arr)
predicted_prices4 = model_4.predict(X_test_mi_reg_df)
mse4 = mean_squared_error(y_test, predicted_prices4)

rmse4 = mean_squared_error(y_test, predicted_prices4, squared=False)

# Calculate mean absolute error
mae4 = mean_absolute_error(y_test, predicted_prices4)

# Calculate coefficient of determination (R-squared)
r2_4 = r2_score(y_test, predicted_prices4)

print('Standard Deviation:', y_test.std())
print('Range:', y_test.max() - y_test.min())
print('Mean squared error:', mse4)
print('Mean absolute error:', mae4)
print('RMSE:', rmse4)
print('R-squared:', r2_4)
print('Percentage of error compared to SD:', mae4/y_test.std() * 100)

Standard Deviation: price    5.275527e+06
dtype: float64
Range: price    342535000
dtype: int64
Mean squared error: 7424866286318.158
Mean absolute error: 402564.63379297615
RMSE: 2724860.7829241767
R-squared: 0.7331614935833962
Percentage of error compared to SD: price    7.630794
dtype: float64


# EVALUATIONS COMBINED

In [None]:
print('Standard Deviation:', y_test.std())
print('Range:', y_test.max() - y_test.min())

print('\nScaled Training Data: Mean squared error:', mse2)
print('Scaled Training Data: Mean absolute error:', mae2)
print('Scaled Training Data: RMSE:', rmse2)
print('Scaled Training Data: R-squared:', r2_2)
print('Scaled Training Data: RRMSE:', rmse2/y_test.std())

print('\nSelectKBest F Regressor: Mean squared error:', mse3)
print('SelectKBest F Regressor: Mean absolute error:', mae3)
print('SelectKBest F Regressor: RMSE:', rmse3)
print('SelectKBest F Regressor: R-squared:', r2_3)
print('SelectKBest F Regressor: RRMSE:', rmse3/y_test.std())

print('\nSelectKBest Mutual Info Regressor: Mean squared error:', mse4)
print('SelectKBest Mutual Info Regressor: Mean absolute error:', mae4)
print('SelectKBest Mutual Info Regressor: RMSE:', rmse4)
print('SelectKBest Mutual Info Regressor: R-squared:', r2_4)
print('SelectKBest Mutual Info Regressor: RRMSE:', rmse4/y_test.std())

Standard Deviation: price    5.275527e+06
dtype: float64
Range: price    342535000
dtype: int64

Scaled Training Data: Mean squared error: 17739932846785.258
Scaled Training Data: Mean absolute error: 449198.51814622444
Scaled Training Data: RMSE: 4211879.965856726
Scaled Training Data: R-squared: 0.36245354431636856
Scaled Training Data: RRMSE: price    0.798381
dtype: float64

SelectKBest F Regressor: Mean squared error: 900300898706.1031
SelectKBest F Regressor: Mean absolute error: 459639.27273386414
SelectKBest F Regressor: RMSE: 948841.8723402247
SelectKBest F Regressor: R-squared: 0.9676445422890719
SelectKBest F Regressor: RRMSE: price    0.179857
dtype: float64

SelectKBest Mutual Info Regressor: Mean squared error: 7424866286318.158
SelectKBest Mutual Info Regressor: Mean absolute error: 402564.63379297615
SelectKBest Mutual Info Regressor: RMSE: 2724860.7829241767
SelectKBest Mutual Info Regressor: R-squared: 0.7331614935833962
SelectKBest Mutual Info Regressor: RRMSE: price