# Keras & Regression

_This notebook contains some sample code from chapter 10._


# Setup

In [None]:
!pip install tensorflow

### Import Needed Modules

In [None]:
# Common imports
import pandas as pd
import numpy as np

# TensorFlow required imports
import tensorflow as tf
from tensorflow import keras

# Sklearn modules
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

# To plot pretty figures
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
# function provided that plots the learning curve for neural networks
def plot_learning_curve( history ):
    pd.DataFrame(history.history).plot(figsize=(8, 5))
    plt.grid(True)
    ymin, ymax = [], []
    for x in history.history.keys():
        ymax.append( max(history.history[x]))
        ymin.append( min(history.history[x]))
    plt.gca().set_ylim(min(ymin), max(ymax))
    plt.xlabel("EPOCHS")
    plt.show()

In [None]:
# define a function that will create AND compile a Sequential model with n_hidden layers
# and n_neurons and a learning_rate
# the model default to using ReLU activitation and is currently designed for 1 output (Regression)
def build_RegMLP_model(n_hidden=1, n_neurons=30, learning_rate=1e-3, input_shape=[8]):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(lr=learning_rate) )
    return model

# Regression MLP

In [None]:
# set the random seed value for this example
np.random.seed(42)
tf.random.set_seed(42)

Let's load, split and scale the California housing dataset (the original one, not the modified one as in chapter 2):

In [None]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
housing.feature_names  #housing is a bunch

In [None]:
# create test and training data sets from housing.data and housing.target
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)

# create a training set and a validation set from the full training data set
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

In [None]:
X_valid.shape

In [None]:
# scale all data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [None]:
# build a Sequential model with 2 layers
# the 1st layer accepts the input data (which is already a single dimensional array)
# the 2nd layer is Denise and will use ReLU activation
# the last layer - the output layer - is just one neuron because a regression model just needs a single output/a value
# note too that the last layer does not have have an activation function (not needed for regression)
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=X_train.shape[1:]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

In [None]:
# examine the model structure
model.summary()

In [None]:
# compile the model
# because this is a regression model, our loss function is mean_squared_error
# note that we are setting the learning rate for the SGD optimizer
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(lr=1e-3))

In [None]:
# fit the model and capture the details of the fit to a variable called history
# note that validation data is being used but that isn't necessary
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

In [None]:
# plot the loss learning curve
plot_learning_curve(history)

### Evaluate Model using the Test Set

In [None]:
# evaluate Test set
mse_test = model.evaluate(X_test, y_test)

In [None]:
# predict 3 test examples
X_new = X_test[5:8]
y_pred = model.predict(X_new)
y_pred

In [None]:
# the actual home price
y_test[5:8]

# Hyperparameter Tuning

In [None]:
# reset Keras backend
keras.backend.clear_session()

# set the random seed value for this example
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
# to do hypertuning of parameters, need to create several models using different values for
# learning rate, # of layers, and # of neurons

# to begin this, need to use the Keras Regressor - this tool basically handles processing multiple
# models by fitting and reporting on the results of each model
# we are sending to this function the function called build_RegMLP_model - a function that constructs
# the layers and neurons for each model configuration
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_RegMLP_model)

In [None]:
# fit the data to the model
# in this case because we aren't sending any parameters, it is just creating a single
# model that is identical to the model we ran above
# NOTE that we actually get the same results on the test set as we did above (See next cell)
keras_reg.fit(X_train, y_train, epochs=20,
              validation_data=(X_valid, y_valid))

In [None]:
 #use the Regressor score function to evaluate performance
mse_test = keras_reg.score(X_test, y_test)

In [None]:
# build a parameter list - the model will be fit agains all of these parameter combinations
param_distribs = {
    "n_hidden": [2, 3, 4],
    "n_neurons": [30, 40, 50],
    "learning_rate": [.01,.005, .001],
}

grid_search = GridSearchCV(keras_reg, param_distribs, cv=3, verbose=2)
grid_search.fit(X_train, y_train, epochs=20,
                  validation_data=(X_valid, y_valid) )

In [None]:
grid_search.best_params_

In [None]:
grid_search.best_score_

In [None]:
best_model = build_RegMLP_model(**grid_search.best_params_)

In [None]:
best_model.summary()

In [None]:
history = best_model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

In [None]:
# plot the loss learning curve
plot_learning_curve(history)

### Evaluate Model using the Test Set

In [None]:
# evaluate Test set
mse_test = best_model.evaluate(X_test, y_test)

In [None]:
# predict 3 test examples
X_new = X_test[5:8]
y_pred = best_model.predict(X_new)
y_pred

In [None]:
# the actual home price
y_test[5:8]