### Load and prepare data

In [7]:
from keras.datasets import boston_housing # Predict average house prices base on properties.

(train_data,train_targets), (test_data,test_targets) = boston_housing.load_data()

'''training data: (404 x 13), testing data: (102 x 13). 13 properties for each entry, eg. crime rate, average number of rooms.
Targets are (404 x 1) and (102 x 1) for train and test respectively, with each entry the numerical value (in thousands).'''

'training data: (404 x 13), testing data: (102 x 13). 13 properties for each entry, eg. crime rate, average number of rooms.\nTargets are (404 x 1) and (102 x 1) for train and test respectively, with each entry the numerical value (in thousands).'

In [8]:
import numpy as np

# Normalise each entries data into z values (minus mean, divide by std dev) to reduce input activations and speed learning.
# eg. normalise across all crime rates, and room numbers, etc...
def z_normalisation(data,args=None):
    if args is None:
        data = (data - np.mean(data,axis=0)) / np.std(data,axis=0)
    else:
        mu,sigma = args[0],args[1]
        data = (data - mu) / sigma
    return data

train_data_z = z_normalisation(train_data)
test_data_z = z_normalisation(test_data,args=[np.mean(train_data,axis=0),np.std(train_data,axis=0)])

### Building the model - build as a function for quick repetition

In [9]:
from keras import layers
from keras import models

def build_model(hidden_layers=2, neurons=[13,64,64,1], activations=['relu','relu'],
                optimiser='RMSprop', loss='mse', metrics='mae'):
    assert len(neurons) == hidden_layers + 2, 'Layer number mismatch'
    layer = [None] * len(neurons)
    layer[0] = layers.Input(shape=(neurons[0],))
    for i in range(1,hidden_layers+1):
        layer[i] = layers.Dense(neurons[i],activation=activations[i-1])(layer[i-1])
    layer[-1] = layers.Dense(neurons[-1])(layer[-2]) 
    # Output layer has no activation function (returns true value), note this requires `ReLU` functions to do this (no squashing).
    model = models.Model(inputs=layer[0],outputs=layer[-1])
    model.compile(optimizer=optimiser,loss=loss,metrics=[metrics])
    return  model

### Validate approach with K-fold validation

Better for small training sets, as validation set performance has larger variance, making model evaluation more difficult.

Therefore training data split into 'K' partitions and trained on 'K-1', while validated on 1 partition. The validation score is then the average over each partition.

In [10]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
    try:
        # Restrict TensorFlow to only use the fourth GPU
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Physical devices cannot be modified after being initialized


In [11]:
K = 4

n_epochs = 100
batch = 10
samples_per_part = len(train_data_z) // K

MAEs = []
for i in range(K):
    print('Running fold {0} of {1}'.format(i+1,K))
    validate_x = train_data_z[i*samples_per_part:(i+1)*samples_per_part]
    validate_y = train_targets[i*samples_per_part:(i+1)*samples_per_part]
    partial_x = [vctr for j,vctr in enumerate(train_data_z) if j not in np.arange(i*samples_per_part,(i+1)*samples_per_part)]
    partial_y = [num for j,num in enumerate(train_targets) if j not in np.arange(i*samples_per_part,(i+1)*samples_per_part)]
    
    model = build_model()
    history = model.fit(np.squeeze(partial_x), partial_y, 
              validation_data=(validate_x,validate_y),
              epochs=n_epochs, batch_size=batch, verbose=1)
    mae = history.history['val_mae']
    MAEs.append(mae)
final_mean_mae = np.mean(MAEs)
mean_per_epoch = np.mean(MAEs, axis=0)

Running fold 1 of 4
Train on 303 samples, validate on 101 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
 10/303 [..............................] - ETA: 0s - loss: nan - mae: nan

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.plot(np.arange(n_epochs),mean_per_epoch)
plt.ylabel('Validation MAE')
plt.xlabel('Epoch number')
plt.show()

In [None]:
final_mean_mae

### You would then retrain the model with all training set having optimised parameters with (iterative K-fold validation.