# Scott Breitbach
# Hyperparameter Tuning

In [1]:
# Load Libraries
import pandas as pd
import numpy as np

# Set random seed
np.random.seed(42)

### Load Data

In [2]:
# Load data to a DataFrame
beanDF = pd.read_excel('data/Dry_Bean_Dataset.xlsx')

In [3]:
# Drop least useful features
beanDF.drop(columns=['Extent', 'Solidity', 'Eccentricity', 'ShapeFactor3'], inplace=True)

## Model Selection & Evaluation
Used GridSearchCV or RandomizedSearchCV to find the best hyperparameters for each modeling type

In [4]:
# Load libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

### Split Training and Testing Data

In [5]:
# Set up features target sets
X = beanDF[beanDF.columns[:-1]]
y = beanDF.Class

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.3, 
                                                    random_state=42)

# Standardize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) 
X_test  = scaler.transform(X_test)

In [6]:
# Set up dictionary for model results
models_dict = {}

### Logistic Regression

In [7]:
# Load libraries
from sklearn.linear_model import LogisticRegression

# Create logistic regression
logistic = LogisticRegression(class_weight='balanced', 
                              random_state=42, 
                              max_iter=300, 
                              multi_class='auto',
                              n_jobs=-1)

# Get a baseline model
baseline = logistic.fit(X_train, y_train)

# Create range of candidate penalty hyperparameter values
parameter_space = {
    'penalty': ['l1', 'l2', 'elasticnet', 'none'], 
    'tol': [1e-3, 1e-4, 1e-5],
    'C': np.logspace(0, 5, 6), 
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 
}
grid = GridSearchCV(logistic, parameter_space, n_jobs=-1, cv=5)

In [8]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

 0.91823147 0.91802166 0.91833657        nan        nan        nan
 0.91455809 0.91455809 0.91455809 0.91466297 0.91466297 0.91466297
 0.91455803 0.91455803 0.91455803 0.91329874 0.91329874 0.91329874
 0.9144531  0.91455803 0.91455803 0.91466297 0.9144531  0.9144531
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan 0.92012091 0.92012085 0.9201208
 0.91865147 0.91865147 0.91865147        nan        nan        nan
 0.91550269 0.91550269 0.91550269 0.91518779 0.91518779 0.91518779
        nan        nan        nan        nan        nan        nan
 0.91718188 0.91938633 0.91928123        nan        nan        nan
 0.91518773 0.91518773 0.91518773 0.91623744 0.91623744 0.91623744
 0.91623744 0.91623744 0.91623744 0.91728676 0.91728676 0.91728676
 0.91571267 0.91571267 0.91571267 0.91529267 0.91529267 0.91529267
        nan        nan        nan        nan        nan        n

Wall time: 55min 29s


In [9]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'C': 1000.0, 'penalty': 'l1', 'solver': 'liblinear', 'tol': 0.0001} 

Baseline Accuracy:	91.92
Tuned Accuracy:		92.24


### Random Forest

In [10]:
# Load libraries
from sklearn.ensemble import RandomForestClassifier

# Create classifier
rfc = RandomForestClassifier(random_state=42,
                             class_weight='balanced', 
                             n_jobs=-1)

# Get a baseline model
baseline = rfc.fit(X_train, y_train)

# Create range of candidate penalty hyperparameter values
parameter_space = {
    'n_estimators': [30, 100, 300, 1000], 
    'criterion': ['gini', 'entropy'], 
    'max_features': ['sqrt', 'log2', None], 
}
grid = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=5)

In [11]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

Wall time: 11min 5s


In [12]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 300} 

Baseline Accuracy:	92.07
Tuned Accuracy:		92.19


### Decision Tree

In [13]:
# Load libraries
from sklearn.tree import DecisionTreeClassifier

# Create decision tree regressor object
decisiontree = DecisionTreeClassifier(random_state=42, class_weight='balanced')

# Get a baseline model
baseline = decisiontree.fit(X_train, y_train)

# Create range of candidate penalty hyperparameter values
parameter_space = {
    'criterion': ['gini', 'entropy'], 
    'splitter': ['best', 'random'], 
    'max_features': ['sqrt', 'log2', None], 
}
grid = GridSearchCV(decisiontree, parameter_space, verbose=2, n_jobs=-1, cv=5)

In [14]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Wall time: 1.24 s


In [15]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'criterion': 'entropy', 'max_features': None, 'splitter': 'random'} 

Baseline Accuracy:	89.42
Tuned Accuracy:		88.96


In [15]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'criterion': 'entropy', 'max_features': 'sqrt', 'splitter': 'best'} 

Baseline Accuracy:	89.42
Tuned Accuracy:		89.03


### AdaBoost

In [16]:
# Load libraries
from sklearn.ensemble import AdaBoostClassifier

# Create classifier
adaboost = AdaBoostClassifier(random_state=42)

# Get a baseline model
baseline = adaboost.fit(X_train, y_train)

# Create range of candidate penalty hyperparameter values
parameter_space = {
    'n_estimators': [10, 30, 100, 300, 1000], 
    'learning_rate': [0.1, 1, 10],
    'algorithm': ['SAMME', 'SAMME.R'], 
}
grid = GridSearchCV(adaboost, parameter_space, n_jobs=-1, cv=5)

In [17]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

 0.80539476 0.83488539 0.80256579 0.73024812 0.2585314         nan
        nan        nan        nan 0.45984858 0.52534618 0.59021287
 0.54812536 0.60323719 0.67177583 0.67177583 0.67177583 0.67177583
 0.67177583 0.39173082 0.48839482 0.52913314 0.51790985 0.48326418]


Wall time: 4min


In [18]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 100} 

Baseline Accuracy:	64.67
Tuned Accuracy:		86.29


### Support Vector Classifier (Linear)

In [20]:
# Load libraries
from sklearn.svm import LinearSVC

# Create support vector classifier
svc = LinearSVC(random_state=42, dual=False, class_weight='balanced', max_iter=100000)

# Get a baseline model
baseline = svc.fit(X_train, y_train)

# Create range of candidate penalty hyperparameter values
parameter_space = {
    'penalty': ['l1', 'l2'], 
    'loss': ['hinge', 'squared_hinge'], 
    'dual': [True, False], 
    'tol': [1e-3, 1e-4, 1e-5],
    'C': [1, 5, 30, 200, 1000], 
    'multi_class': ['ovr', 'crammer_singer'], 
}
grid = GridSearchCV(svc, parameter_space, n_jobs=-1, cv=5)

In [21]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

 0.91518806 0.91508308 0.91508308 0.91518806 0.91508308 0.91508308
        nan        nan        nan 0.9159222  0.9159222  0.9159222
 0.91518806 0.91508308 0.91508308 0.91518806 0.91508308 0.91508308
        nan        nan        nan        nan        nan        nan
 0.91518806 0.91508308 0.91508308 0.91518806 0.91508308 0.91508308
 0.91655179 0.91707673 0.91718171 0.91613207 0.9159222  0.9159222
 0.91518806 0.91508308 0.91508308 0.91518806 0.91508308 0.91508308
        nan        nan        nan 0.91833662 0.91833662 0.91844156
 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747
        nan        nan        nan 0.91707662 0.91707662 0.91707662
 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747
        nan        nan        nan        nan        nan        nan
 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747
 0.91728676 0.91844123 0.91875608 0.91697168 0.91697163 0.91697163
 0.91644747 0.91644747 0.91644747 0.91644747 0.91644747 0.916447

Wall time: 1h 19min 11s




In [22]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'C': 200, 'dual': True, 'loss': 'hinge', 'multi_class': 'crammer_singer', 'penalty': 'l1', 'tol': 0.001} 

Baseline Accuracy:	91.58
Tuned Accuracy:		92.38


### Support Vector Classifier (SVC)

In [23]:
# Load libraries
from sklearn.svm import SVC

# Create support vector classifier
svc = SVC(random_state=42, class_weight='balanced', max_iter=10000)

# Get a baseline model
baseline = svc.fit(X_train, y_train)

# Create range of candidate penalty hyperparameter values
parameter_space = {
    'C': [10, 30, 100, 300, 1000], 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'], 
    'shrinking': [True, False], 
    'tol': [1e-2, 1e-3, 1e-4],
    'decision_function_shape': ['ovo', 'ovr'], 
    'break_ties': [True],
}
grid = GridSearchCV(svc, parameter_space, n_jobs=-1, cv=5)

In [24]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.91991137 0.92022628 0.92012129 0.91991137 0.92012129 0.92012129
 0.9153971  0.91539716 0.91539716 0.9153971  0.91539716 0.91539716
 0.92347993 0.92347993 0.92347993 0.92347993 0.92347993 0.92347993
 0.60354703 0.60365196 0.60365196 0.60354703 0.60365196 0.60365196
 0.91991137 0.92022628 0.92012129 0.91991137 0.92012129 0.92012129
 0.91539716 0.91529217 0.91529217 0.91539716 0.91529217 0.91529217
 0.92358492 0.92347993 0.92347993 0.92358492 0.92347993 0.92347993
 0.60354697 0.6036519  0.6036519  0.60354697 0.6036519  0.6036

Wall time: 14min 38s


In [25]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'C': 100, 'break_ties': True, 'decision_function_shape': 'ovr', 'gamma': 'scale', 'kernel': 'rbf', 'shrinking': True, 'tol': 0.001} 

Baseline Accuracy:	92.83
Tuned Accuracy:		92.85


In [26]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'C': 300, 'decision_function_shape': 'ovo', 'gamma': 'scale', 'kernel': 'rbf', 'shrinking': False} 

Baseline Accuracy:	92.83
Tuned Accuracy:		92.97


### Gaussian Naive Bayes Classifier

In [26]:
# Load libraries
from sklearn.naive_bayes import GaussianNB

# Create Gaussian naive Bayes object
nBayes = GaussianNB()

# Get a baseline model
baseline = nBayes.fit(X_train, y_train)

# Create range of candidate hyperparameter values
parameter_space = {
    'var_smoothing': np.logspace(-10, -1, 10),
}
grid = GridSearchCV(nBayes, parameter_space, n_jobs=-1, cv=5)

In [27]:
%%time
# Fit models
grid_result = grid.fit(X_train, y_train)

Wall time: 241 ms


In [28]:
# Show best parameters
print('Best parameters found:\n', grid_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(grid_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'var_smoothing': 0.01} 

Baseline Accuracy:	89.96
Tuned Accuracy:		89.99


### MLPClassifier

In [35]:
# Load libraries
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV
import random as rd
from scipy.stats import uniform, randint

# Create classifier
mlp = MLPClassifier(max_iter=5000)

# Get a baseline model
baseline = mlp.fit(X_train, y_train)

# Create range of candidate hyperparameter values
parameter_space = {
    'hidden_layer_sizes': [(rd.randint(10,100), rd.randint(10,100), rd.randint(10,100))],
    'activation': ['relu'], #'tanh', 'identity', 'logistic'], 
    'solver': ['adam'], #'sgd', 'lbfgs'], 
    'alpha': [0.001, 0.0001, 0.00001], #uniform(loc=0, scale=0.0005), 
#     'learning_rate': ['constant', 'adaptive', 'invscaling'], # only when solver='sgd' 
    'learning_rate_init': [0.01, 0.001, 0.0001], 
    'tol': [1e-3, 1e-4, 1e-5],
    'early_stopping': [True], 
    'beta_1': [0.9, 0.99], 
    'beta_2': [0.99, 0.999, 0.9999],
    'epsilon': [1e-9, 1e-8, 1e-7],
}
rand = RandomizedSearchCV(mlp, parameter_space, random_state=42, n_iter=500, n_jobs=-1, cv=5)

Note: ~1.14min / iteration

In [36]:
%%time
# Fit models
rand_result = rand.fit(X_train, y_train)



Wall time: 34min 3s


In [37]:
# Show best parameters
print('Best parameters found:\n', rand_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(rand_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'tol': 0.001, 'solver': 'adam', 'learning_rate_init': 0.001, 'hidden_layer_sizes': (77, 67, 65), 'epsilon': 1e-09, 'early_stopping': True, 'beta_2': 0.999, 'beta_1': 0.9, 'alpha': 0.0001, 'activation': 'relu'} 

Baseline Accuracy:	92.97
Tuned Accuracy:		92.56


`n_iter=500`: 9h 30min

In [40]:
# Show best parameters
print('Best parameters found:\n', rand_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(rand_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'activation': 'relu', 'alpha': 0.00020919989433382637, 'early_stopping': True, 'hidden_layer_sizes': (18, 72, 45), 'learning_rate': 'invscaling', 'solver': 'adam'} 

Baseline Accuracy:	93.0
Tuned Accuracy:		92.7


`n_iter=108`: 2h 4min

In [36]:
# Show best parameters
print('Best parameters found:\n', rand_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.score(X_test, y_test)*100, 2)
score = round(rand_result.score(X_test, y_test)*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'solver': 'adam', 'learning_rate': 'invscaling', 'hidden_layer_sizes': (77, 62, 33), 'early_stopping': True, 'alpha': 0.0001, 'activation': 'relu'} 

Baseline Accuracy:	92.85
Tuned Accuracy:		93.02


## Neural Network - Keras

In [38]:
# Load libraries
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

In [39]:
# Encode the target variables
le = LabelEncoder()
le.fit(y)

le_y_train = le.transform(y_train)
le_y_test  = le.transform(y_test)

cat_y_train = to_categorical(le_y_train)
cat_y_test  = to_categorical(le_y_test)

In [40]:
# Load libraries
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Set random seed
np.random.seed(42)

In [41]:
# Number of features
number_of_features = X.shape[1]

# Create function returning a compiled network
def create_network(optimizer='rmsprop', hiddenLayerOne=48, hiddenLayerTwo=24, 
                   initDrop=0.2, hiddenDrop=0.5, learnRate=0.01):
    
    # Start neural network
    network = Sequential()
    
    # Add a dropout layer for input layer
    network.add(Dropout(initDrop, input_shape=(number_of_features,)))
    
    # Add fully connected layer w/a ReLU activation function
    network.add(Dense(units=hiddenLayerOne, activation='relu'))#,
#                       input_shape=(number_of_features,)))
    
    # Add a dropout layer for previous hidden layer
    network.add(Dropout(hiddenDrop))

    # Add fully connected layer w/a ReLU activation function
    network.add(Dense(units=hiddenLayerTwo, activation='relu'))
                
    # Add a dropout layer for previous hidden layer
    network.add(Dropout(hiddenDrop))

    # Add fully connected layer w/a softmax activation function
    network.add(Dense(units=7, activation='softmax'))

    # Compile neural network
    network.compile(loss='categorical_crossentropy', 
                    optimizer=Adam(learning_rate=learnRate),
                    metrics=['accuracy'])

    # Return compiled network
    return network

In [51]:
# Get a baseline model
model = create_network()
baseline = model.fit(X_train, cat_y_train, 
              validation_data=(X_test, cat_y_test), 
              batch_size=8, 
              epochs=10)

accuracy = model.evaluate(X_test, cat_y_test)[1]
print(f"\nAccuracy:  {round(accuracy*100, 2)}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Accuracy:  90.5


In [43]:
# Wrap Keras model so it can be used by scikit-learn
keras = KerasClassifier(build_fn=create_network, verbose=0)

# Create hyperparameter space and initialize random search
parameter_space = {
    'epochs': [5, 10, 15],
    'batch_size': [4, 8, 16, 32],
    'hiddenLayerOne': [48, 96, 120],
    'hiddenLayerTwo': [24, 48, 96],
    'initDrop': [0.1, 0.2],
    'hiddenDrop': [0.3, 0.4, 0.5], 
    'learnRate': [1e-4, 1e-3, 1e-2],
}
rand = RandomizedSearchCV(keras, parameter_space, random_state=42, n_iter=200, 
                          scoring='accuracy', n_jobs=-1, cv=3)

In [44]:
%%time
# Fit models
rand_result = rand.fit(X_train, cat_y_train, 
                       validation_data=(X_test, cat_y_test))

 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan]


Wall time: 51min 43s


In [45]:
# Summarize search information
bestScore = rand_result.best_score_
bestParams = rand_result.best_params_
print(f"Best score is {round(bestScore*100, 2)} using {bestParams}")

Best score is nan using {'learnRate': 0.01, 'initDrop': 0.1, 'hiddenLayerTwo': 48, 'hiddenLayerOne': 120, 'hiddenDrop': 0.5, 'epochs': 5, 'batch_size': 32}


In [49]:
# Show best parameters
print('Best parameters found:\n', rand_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.history['accuracy'][np.argmin(baseline.history['loss'])]*100, 2)
score = round(rand_result.best_score_*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'learnRate': 0.01, 'initDrop': 0.1, 'hiddenLayerTwo': 48, 'hiddenLayerOne': 120, 'hiddenDrop': 0.5, 'epochs': 5, 'batch_size': 32} 

Baseline Accuracy:	78.48
Tuned Accuracy:		nan


In [52]:
# Initialize model with best parameters
model = create_network( 
              learnRate=0.01, 
              initDrop=0.1, 
              hiddenLayerOne=120, 
              hiddenLayerTwo=48, 
              hiddenDrop=0.5)

# Train network 
H = model.fit(X_train, cat_y_train,
              validation_data=(X_test, cat_y_test), 
              batch_size=32, 
              epochs=5)

accuracy = model.evaluate(X_test, cat_y_test)[1]
print(f"\nAccuracy:  {round(accuracy*100, 2)}")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Accuracy:  91.36


### Try Keras without Dropout

In [59]:
# Number of features
number_of_features = X.shape[1]

# Create function returning a compiled network
def create_network(optimizer='rmsprop', hiddenLayerOne=48, hiddenLayerTwo=24, 
                   learnRate=0.01):
    
    # Start neural network
    network = Sequential()
    
    # Add fully connected layer w/a ReLU activation function
    network.add(Dense(units=hiddenLayerOne, activation='relu', 
                input_shape=(number_of_features,)))

    # Add fully connected layer w/a ReLU activation function
    network.add(Dense(units=hiddenLayerTwo, activation='relu'))

    # Add fully connected layer w/a softmax activation function
    network.add(Dense(units=7, activation='softmax'))

    # Compile neural network
    network.compile(loss='categorical_crossentropy', 
                    optimizer=Adam(learning_rate=learnRate),
                    metrics=['accuracy'])

    # Return compiled network
    return network

In [60]:
# Wrap Keras model so it can be used by scikit-learn
keras = KerasClassifier(build_fn=create_network, verbose=0)

# Create hyperparameter space and initialize random search
parameter_space = {
    'epochs': [5, 10, 15],
    'batch_size': [4, 8, 16, 32],
    'hiddenLayerOne': [96, 120, 144],
    'hiddenLayerTwo': [48, 96, 120],
    'learnRate': [1e-5, 1e-4, 1e-3],
}
rand = RandomizedSearchCV(keras, parameter_space, random_state=42, n_iter=200, 
                          scoring='accuracy', n_jobs=-1, cv=3)

In [61]:
%%time
# Fit models
rand_result = rand.fit(X_train, cat_y_train, 
                       validation_data=(X_test, cat_y_test))

Wall time: 54min 2s


In [65]:
# Show best parameters
print('Best parameters found:\n', rand_result.best_params_, '\n')
# Get accuracy scores
baseScore = round(baseline.history['accuracy'][np.argmin(baseline.history['loss'])]*100, 2)
score = round(rand_result.best_score_*100, 2)
print(f"Baseline Accuracy:\t{baseScore}")
print(f"Tuned Accuracy:\t\t{score}")

Best parameters found:
 {'learnRate': 0.0001, 'hiddenLayerTwo': 96, 'hiddenLayerOne': 120, 'epochs': 10, 'batch_size': 8} 

Baseline Accuracy:	77.53
Tuned Accuracy:		nan


In [68]:
# Initialize model with default values
keras = create_network( 
              learnRate=0.0001, 
              hiddenLayerOne=120, 
              hiddenLayerTwo=96)

# Train network 
history = keras.fit(X_train, cat_y_train,
              validation_data=(X_test, cat_y_test), 
              batch_size=8, 
              epochs=10)

accuracy = keras.evaluate(X_test, cat_y_test)[1]
print(f"\nAccuracy:  {round(accuracy*100, 2)}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Accuracy:  92.65


In [71]:
history = keras.evaluate(X_test, cat_y_test)[1]
print(f"\nAccuracy:  {round(accuracy*100, 2)}")


Accuracy:  92.65
