In [1]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

### Load data

In [2]:
boston_data = pd.read_csv("https://raw.githubusercontent.com/dphi-official/Datasets/master/Boston_Housing/Training_set_boston.csv")
boston_data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,15.0234,0.0,18.1,0.0,0.614,5.304,97.3,2.1007,24.0,666.0,20.2,349.48,24.91,12.0
1,0.62739,0.0,8.14,0.0,0.538,5.834,56.5,4.4986,4.0,307.0,21.0,395.62,8.47,19.9
2,0.03466,35.0,6.06,0.0,0.4379,6.031,23.3,6.6407,1.0,304.0,16.9,362.25,7.83,19.4
3,7.05042,0.0,18.1,0.0,0.614,6.103,85.1,2.0218,24.0,666.0,20.2,2.52,23.29,13.4
4,0.7258,0.0,8.14,0.0,0.538,5.727,69.5,3.7965,4.0,307.0,21.0,390.95,11.28,18.2


In [3]:
# separate IV and DV (input and output)
X = boston_data.drop('MEDV', axis=1)
y = boston_data.MEDV

### Split the data

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
n_features = X.shape[1]
n_features

13

### Training

1. Define model
2. Compile model
3. Fit the model
4. Make predictions
5. Evaluate the model

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from numpy.random import seed

In [7]:
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(n_features,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(1))

In [8]:
from tensorflow.keras.optimizers import RMSprop

lr = 0.01
optimizer = RMSprop(lr)

In [9]:
model.compile(loss='mean_squared_error', optimizer=optimizer)

In [10]:
seed_value = 42
seed(seed_value)

import os
os.environ['PYTHONHASHSEED']=str(seed_value)

import random
random.seed(seed_value)

tf.random.set_seed(seed_value)
model.fit(X_train,y_train, epochs=10, batch_size=32, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x20b8c1978c8>

In [11]:
model.evaluate(X_test, y_test)



77.77896881103516

### Hyperparameter tuning

Most problems require a lot of hyperparameter tuning. Few steps I've learned, experienced and found:
- **Training loss** should decrease, steep then slowly as it the slope reaches zero.
- If **training loss** does not converge, train for more epochs.
- If **training loss** decreases too slowly, increase `learning rate`.
- High `learning rate` may prevent the convergence of loss.
- If **training loss** has high variance, decrease `learning rate`.
- Lowering `learning rate` and increasing `epochs`/`batch size` can help loss convergence.
- Small `batch size` can cause instability, try large size then decrease as degradation begins to show.
- Fit as much training data as possible into memory by adjusting `batch size`.

#### Implementation of hyperparameter tuning with `sklearn`

1. Define the model
2. Define hyperparameters grid to be validated
3. Run `GridSearchCV` process
4. Select the best model.

In [12]:
from sklearn.model_selection import GridSearchCV
from tensorflow.keras import Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

def buildModel(optimizer=RMSprop(0.1)):
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_features,)))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mse', metrics=['mse'], optimizer=optimizer)
    return model

batch_size = [10, 20, 30, 40, 50]
epochs = [10, 100, 200]
param_grid = dict(batch_size=batch_size, epochs=epochs)

In [13]:
K_model = KerasRegressor(build_fn=buildModel, verbose=1)

grid = GridSearchCV(
    estimator=K_model, param_grid=param_grid,
    scoring="neg_mean_squared_error",
    cv=5, n_jobs=-1)

grid_result = grid.fit(X_train, y_train)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [14]:
print(f'Best params: {grid_result.best_params_}')

Best params: {'batch_size': 10, 'epochs': 200}


In [15]:
from sklearn.model_selection import cross_val_score

results = cross_val_score(grid.best_estimator_, X_test, y_test, cv=5)
print(results.mean())
print(results.std())

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

#### Implementing hyperparameter tuning with Keras

1. Define model
2. Define hyperparameters
3. Run tuner process
4. Select best model/parameters

In [16]:
import kerastuner as kt

def buildModel(hp):
    model = Sequential(
        [
            Dense(10, activation='relu', input_shape=(n_features,)),
            Dense(8, activation='relu'),
            Dense(1)
        ]
    )
    hp_lr = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4])
    optimizer = RMSprop(learning_rate=hp_lr)
    model.compile(loss='mse', metrics=['mse'], optimizer=optimizer)
    return model

In [19]:
tuner_rs = kt.RandomSearch(
    buildModel,
    objective='mse',
    seed=42,
    max_trials=5,
    directory='random_search/test_01'
)

In [20]:
tuner_rs.search(X_train, y_train, epochs=10, validation_split=0.2, verbose=1)

Trial 4 Complete [00h 00m 01s]
mse: 80.45980072021484

Best mse So Far: 80.45980072021484
Total elapsed time: 00h 00m 04s
INFO:tensorflow:Oracle triggered exit


In [21]:
best_model = tuner_rs.get_best_models(num_models=1)
best_model

[<tensorflow.python.keras.engine.sequential.Sequential at 0x20b946f1e48>]

In [22]:
best_model = best_model[0]
best_model.evaluate(X_test, y_test)



[55.21622848510742, 55.21622848510742]

In [23]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                140       
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 88        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 237
Trainable params: 237
Non-trainable params: 0
_________________________________________________________________
