In [97]:
import itertools
import pandas as pd
from keras import Sequential
from keras.src.layers import Dense, Input
from keras.src.optimizers import Adam
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [93]:
data = pd.read_csv("housing_price_dataset.csv")
labels = data["Price"]
data = data.drop("Price", axis=1)
print(f"Before dummies: {data.shape}")
data = pd.get_dummies(data, columns=["Neighborhood"], dtype=int)
print(f"Before split: {data.shape}")
data, test_data, labels, test_labels = train_test_split(data, labels, test_size=0.2)
print(f"After split: {data.shape}")

Before dummies: (50000, 5)
Before split: (50000, 7)
After split: (40000, 7)


In [95]:
ss = StandardScaler()

data = ss.fit_transform(data)
test_data = ss.transform(test_data)

In [66]:
# Grid Search
# Perceptron with 1 hidden layer
params = {
    "epochs": [10, 15, 25],
    "learning_rate": [0.001],
    "batch_size": [8, 16],
    "layer_size": [512, 1024]
}

param_combinations = list(itertools.product(*params.values()))

print("All combinations:")
for combination in param_combinations:
    print(combination)

All combinations:
(10, 0.001, 8, 512)
(10, 0.001, 8, 1024)
(10, 0.001, 16, 512)
(10, 0.001, 16, 1024)
(15, 0.001, 8, 512)
(15, 0.001, 8, 1024)
(15, 0.001, 16, 512)
(15, 0.001, 16, 1024)
(25, 0.001, 8, 512)
(25, 0.001, 8, 1024)
(25, 0.001, 16, 512)
(25, 0.001, 16, 1024)


In [67]:
results = pd.DataFrame(
    columns=["epochs", "learning_rate", "batch_size", "hidden_layer_size", "mae", "loss", "val_mae",
             "val_loss"])
models = []

In [68]:
for combination in param_combinations:
    model = Sequential()
    model.add(Input(shape=(7,)))
    model.add(Dense(units=combination[3], activation="relu"))
    model.add(Dense(units=1))

    model.compile(loss="mse", optimizer=Adam(learning_rate=combination[1]), metrics="mae")

    r = model.fit(data, labels, epochs=combination[0], batch_size=combination[2], validation_split=0.1)

    row = list(combination) + [r.history["mae"][-1], r.history["loss"][-1], r.history["val_mae"][-1],
                               r.history["val_loss"][-1]]

    models.append(model)
    results.loc[len(results)] = row

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

In [70]:
results

Unnamed: 0,epochs,learning_rate,batch_size,hidden_layer_size,mae,loss,val_mae,val_loss
0,10.0,0.001,8.0,512.0,39928.644531,2498813000.0,39274.695312,2435763000.0
1,10.0,0.001,8.0,1024.0,39929.035156,2498493000.0,39268.140625,2434915000.0
2,10.0,0.001,16.0,512.0,39965.015625,2504259000.0,39315.355469,2438877000.0
3,10.0,0.001,16.0,1024.0,39918.117188,2497333000.0,39264.070312,2435350000.0
4,15.0,0.001,8.0,512.0,39927.617188,2498795000.0,39267.128906,2435404000.0
5,15.0,0.001,8.0,1024.0,39922.21875,2498165000.0,39256.773438,2434730000.0
6,15.0,0.001,16.0,512.0,39906.769531,2496432000.0,39258.820312,2435258000.0
7,15.0,0.001,16.0,1024.0,39912.128906,2496564000.0,39266.027344,2435016000.0
8,25.0,0.001,8.0,512.0,39921.742188,2497817000.0,39255.148438,2434577000.0
9,25.0,0.001,8.0,1024.0,39921.648438,2497756000.0,39267.4375,2434915000.0


In [103]:
best = results["val_mae"].idxmin()

df = pd.DataFrame({
    "true": test_labels,
    "pred": models[best].predict(test_data).ravel()
})

df



Unnamed: 0,true,pred
13283,233789.961348,190912.234375
175,354268.475247,306929.125000
40033,114850.946697,152850.281250
43140,292396.891566,231222.828125
4417,175360.049774,153361.531250
...,...,...
11991,-4910.415323,144974.250000
3644,112637.059289,180517.343750
38356,342591.732777,311957.250000
37543,158719.810064,180922.906250


In [105]:
print(f"Mean absolute error: {mean_absolute_error(df['true'], df['pred'])}")

Mean absolute error: 40122.61707166073
