# Selecting hyperparameters


In [12]:
import tensorflow as tf
tf.config.run_functions_eagerly(True) #this solves some issues with running collab


## Import all needed libraries

In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from abc import ABC, abstractmethod

## Implementing interface and  Grid Search for Random Forest Classifier

This code defines an abstract classifier interface for MNIST models.  
RandomForestMnistClassifier extends it and uses **GridSearchCV** ([docs](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html)) to find the best hyperparameters for RandomForestClassifier. After tuning, it trains the best model on the full dataset and uses it for predictions.


In [5]:
class MnistClassifierInterface(ABC):
    @abstractmethod
    def train(self, X_train, y_train):
        pass

    @abstractmethod
    def predict(self, X_test):
        pass

class RandomForestMnistClassifier(MnistClassifierInterface):
    def __init__(self):
        self.model = RandomForestClassifier(n_jobs=-1)
        self.best = None

    def train(self, X_train, y_train):
        params = {'n_estimators': [100,200],'max_depth': [10,20, None],'min_samples_split': [2, 4],'min_samples_leaf': [1,2],'bootstrap': [True, False]}
        gs = GridSearchCV(self.model, params, scoring='accuracy', n_jobs=-1, cv=3, verbose=2)
        gs.fit(X_train, y_train)
        self.best = gs.best_estimator_
        self.best.fit(X_train, y_train)
        print(self.best)

    def predict(self, X_test):
        return self.best.predict(X_test)

### Data preparation

Here I prepare data for training. This format is suitable for **all** models.

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


### The best params for Random forest and accuracy evaluation

In [None]:
rf = RandomForestMnistClassifier()
rf.train(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))
print(f"Random Forest Test Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")

Fitting 3 folds for each of 48 candidates, totalling 144 fits
RandomForestClassifier(bootstrap=False, n_estimators=200, n_jobs=-1)

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.97      0.97      0.97      1032
           3       0.97      0.97      0.97      1010
           4       0.98      0.98      0.98       982
           5       0.98      0.97      0.98       892
           6       0.98      0.98      0.98       958
           7       0.97      0.97      0.97      1028
           8       0.96      0.96      0.96       974
           9       0.96      0.95      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

Random Forest Test Accuracy: 0.9739


## Implementing and finding parameters for Feed Forward neural network

FeedForwardMnistClassifier extends interface abive and uses [Keras Tuner](https://www.tensorflow.org/tutorials/keras/keras_tuner) (there is even an example with similar to MNIST dataset) with **Hyperband** to find the best hyperparameters for a feedforward neural network model. After tuning, it trains the best model on the dataset and uses it for predictions.


In [8]:
!pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/129.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5


In [9]:
import keras_tuner as kt

class FeedForwardMnistClassifier(MnistClassifierInterface):
    def __init__(self):
        self.model = None

    def build_model(self, hp):
        model = models.Sequential()
        model.add(layers.Dense(units=hp.Int('units1', min_value=128, max_value=256, step=32), activation='relu', input_shape=(28*28,)))
        model.add(layers.Dropout(rate=hp.Float('dropout1', min_value=0.1, max_value=0.3, step=0.1)))
        model.add(layers.Dense(units=hp.Int('units2', min_value=64, max_value=128, step=32), activation='relu'))
        model.add(layers.Dropout(rate=hp.Float('dropout2', min_value=0.1, max_value=0.3, step=0.1)))
        model.add(layers.Dense(10, activation='softmax'))
        model.compile(optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop']), loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train(self, X_train, y_train, epochs=10, batch_size=64):
        y_train = to_categorical(y_train, 10)
        tuner = kt.Hyperband(self.build_model, objective='val_accuracy', max_epochs=epochs, factor=3, directory='mybin1', project_name='mnist_tuning')
        tuner.search(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)
        best_hp = tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters
        for hp_name in best_hp.values:
            print(f"{hp_name}: {best_hp.get(hp_name)}")
        self.model = tuner.get_best_models(num_models=1)[0]

    def predict(self, X_test):
        return np.argmax(self.model.predict(X_test), axis=1)


### The best params for Feed-Forward Neural Network and accuracy evaluation

In [None]:
classifier = FeedForwardMnistClassifier()
classifier.train(X_train, y_train)

Trial 30 Complete [00h 00m 39s]
val_accuracy: 0.9779629707336426

Best val_accuracy So Far: 0.9790740609169006
Total elapsed time: 00h 08m 30s
units1: 192
dropout1: 0.1
units2: 96
dropout2: 0.1
optimizer: adam
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0019


## Implementing and finding parameters for Convolutional neural network

Here I did all the same as above

In [6]:
class CNNMnistClassifier(MnistClassifierInterface):
    def __init__(self):
        self.model = None

    def build_model(self, hp):
        model = models.Sequential()
        model.add(layers.Reshape((28, 28, 1), input_shape=(28*28,)))
        model.add(layers.Conv2D(filters=hp.Int('filters1', min_value=32, max_value=128, step=32),kernel_size=hp.Choice('kernel_size1', values=[3, 5]),activation='relu',input_shape=(28, 28, 1)))
        model.add(layers.MaxPooling2D(pool_size=2))
        model.add(layers.Conv2D(filters=hp.Int('filters2', min_value=64, max_value=128, step=32),kernel_size=hp.Choice('kernel_size2', values=[3, 5]),activation='relu'))
        model.add(layers.MaxPooling2D(pool_size=2))
        model.add(layers.Flatten())
        model.add(layers.Dense(units=hp.Int('dense_units', min_value=64, max_value=256, step=64),activation='relu'))
        model.add(layers.Dropout(rate=hp.Float('dropout', min_value=0.2, max_value=0.4, step=0.1)))
        model.add(layers.Dense(10, activation='softmax'))
        model.compile( optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop']), loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train(self, X_train, y_train, epochs=10, batch_size=64):
        y_train = to_categorical(y_train, 10)
        tuner = kt.Hyperband(self.build_model, objective='val_accuracy', max_epochs=epochs, factor=3)
        tuner.search(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)
        self.model = tuner.get_best_models(num_models=1)[0]

    def predict(self, X_test):
        return np.argmax(self.model.predict(X_test), axis=1)


### The best params for Feed-Forward Neural Network and accuracy evaluation

Unfortunately, I didn't finish because google collab runtime ends. BUT I GOT SOME RESULTS!!!

In [None]:
classifier = CNNMnistClassifier()
classifier.train(X_train, y_train)

best_hps = classifier.model.get_config()
print("\nBest Hyperparameters:")
for layer in best_hps['layers']:
    print(layer)

Trial 21 Complete [00h 16m 34s]
val_accuracy: 0.9912037253379822

Best val_accuracy So Far: 0.9918518662452698
Total elapsed time: 01h 22m 25s

Search: Running Trial #22

Value             |Best Value So Far |Hyperparameter
32                |96                |filters1
3                 |3                 |kernel_size1
96                |128               |filters2
3                 |5                 |kernel_size2
64                |192               |dense_units
0.3               |0.3               |dropout
rmsprop           |adam              |optimizer
10                |4                 |tuner/epochs
4                 |0                 |tuner/initial_epoch
1                 |1                 |tuner/bracket
1                 |0                 |tuner/round
0014              |None              |tuner/trial_id

Epoch 5/10
[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 60ms/step - accuracy: 0.9883 - loss: 0.0383 - val_accuracy: 0.9886 - val_loss: 0.0444
Epoch 6