## Load libraries

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

__Load and preprocess MNIST data__

See Session 7 notebook for more details.

In [None]:
# Loading the Digits dataset
digits = datasets.load_digits()

# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
X = digits.images.reshape((n_samples, -1))
y = digits.target

# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=0)

__Initialise Gridsearch parameters__

In [None]:
# Initialise the default model, here given the name 'classifier'
pipe = Pipeline([('classifier' , LogisticRegression())])


# Set tunable parameters for grid search
penalties = ['l1', 'l2'] # different regularization parameters
C = [1.0, 0.1, 0.01]     # different regularization 'strengths'
solvers = ['liblinear']  # different solvers - check all of the sklearn docs

# Create parameter grid (a Python dictionary)
parameters = dict(classifier__penalty = penalties,  # notice how we use the name 'classifier'
                  classifier__C = C,
                  classifier__solver = solvers)

# Choose which metrics on which we want to optimise
scores = ['precision', 'recall', 'f1']

__Iterate over scoring types__

For example, we first optimise for the parameters which result in the best weighted precision score; next we optimse for weighted recall; and lastly for weighted-F1. 

This allows us to inspet the model in a more nuanced way, seeing how different parameters affect performance across different metrics.

In [None]:
for score in scores:
    print(f"# Tuning hyper-parameters for {score}")
    print()
    
    # Initialise Gridsearch with predefined parameters
    clf = GridSearchCV(pipe, 
                       parameters, 
                       scoring= f"{score}_weighted",
                       cv=10) # use 10-fold cross-validation
    # Fit
    clf.fit(X_train, y_train)
    
    # Print best results on training data
    print("Best parameters set found on training data:")
    # add new lines to separate rows
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on training data:")
    print()
    # get all means
    means = clf.cv_results_['mean_test_score']
    # get all standard deviations
    stds = clf.cv_results_['std_test_score']
    # get parameter combinations
    params = clf.cv_results_['params']

    # print means, standard deviations , and parameters for all runs
    i = 0
    for mean, stdev, param in zip(means, stds, params):
        # 2*standard deviation covers 95% of the spread - check out the 68–95–99.7 rule
        print(f"Run {i}: {round(mean,3)} (SD=±{round(stdev*2, 3)}), using {param}")
        i += 1
    print()
    
    # Print details classification report
    print("Detailed classification report:")
    print()
    print("The model is trained on the full training set.")
    print("The scores are computed on the full test set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

## More complex DL models

__Load tools from ```tf.keras()```__

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

__Inspect image shapes for input layer size__

In [None]:
X[0].shape

__Define model__

We begin by defining our model, just as we normally would. The only difference is that we wrap the whole thign in a function definition.

In [None]:
def nn_model(optimizer='adam'):
    # create a sequential model
    model = Sequential()
    # add input layer of 64 nodes and hidden layer of 32, ReLU activation
    model.add(Dense(32, input_shape=(64,), activation="relu"))
    # hidden layer of 16 nodes, ReLU activation
    model.add(Dense(16, activation="relu"))
    # classificaiton layer, 10 classes with softmaxa ctivation
    model.add(Dense(10, activation="softmax")) 
    # categorical cross-entropy, optimizer defined in function call
    model.compile(loss="categorical_crossentropy", 
                  optimizer=optimizer, 
                  metrics=["accuracy"])

    # return the compiled model
    return model

__Create model for ```sklearn```__

We take the predefined neural network model above and run it through ```KerasClassifier```. This returns an object that can be used in the ```sklearn``` pipeline, just like a ```LogisticRegression()``` classifier, for example.

In [None]:
model = KerasClassifier(build_fn=nn_model, # build the model defined in nn_model
                        verbose=0)         # set to 1 for verbose output during training

__Define grid search parameters__

In [None]:
# grid search epochs, batch size and optimizer
optimizers = ['sgd', 'adam']
# range of epochs to run
epochs = [5, 10]
# variable batch sizes
batches = [5, 10]

# create search grid
param_grid = dict(optimizer=optimizers, 
                  epochs=epochs, 
                  batch_size=batches)

__Initialise Gridsearch with model and parameter grid__

In [None]:
grid = GridSearchCV(estimator=model, 
                    param_grid=param_grid, 
                    n_jobs=-1,
                    cv=5,
                    scoring='accuracy')

__Fit to the data and labels__

In [None]:
grid_result = grid.fit(X, y)

__Print best results__

In [None]:
# print best results, rounding values to 3 decimal places
print(f"Best run: {round(grid_result.best_score_,3)} using {grid_result.best_params_}")
print()

__Show all runs__

In [None]:
# get all means
means = grid_result.cv_results_['mean_test_score']
# get all standard deviations
stds = grid_result.cv_results_['std_test_score']
# get parameter combinations
params = grid_result.cv_results_['params']

# print means, standard deviations, and parameters for all runs
i = 0
for mean, stdev, param in zip(means, stds, params):
    print(f"Run {i}: {round(mean,3)} (SD=±{round(2*stdev, 3)}), using {param}")
    i += 1