In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn .preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from skimage import feature
from imutils import build_montages
from imutils import paths
import numpy as np
import argparse
import cv2
import os
import xgboost as xgb

In [4]:
def quantify_image(image):
    # compute the HOG feature vector for the input image
    features = feature.hog(image, 
                           orientations=9,
                           pixels_per_cell=(10, 10),
                           cells_per_block=(2, 2),
                           transform_sqrt=True, 
                           block_norm="L1")
    return features

In [5]:
def load_split(path):
    # grab the list of images in the input directory,
    # then initialize the list of data(i.e., images) and class labels
    imagePaths = list(paths.list_images(path))
#     print(imagePaths[0])
    data = []
    labels = []
    
    # loop over the image paths
    for imagePath in imagePaths:
        # extract the class label from the filename
        label = imagePath.split(os.path.sep)[-2]
        # load the input image, convert it to grayscale, 
        # and resize it to 200x2000 pixels, ignoring aspect ratio
        image = cv2.imread(imagePath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (200,200))
        # Threshold the image such that the drawing appears as white on a black background
        image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
        # quantify the image
        features = quantify_image(image)
        # update the data and labels lists, respectively
        data.append(features)
        labels.append(label)
    return (np.array(data), np.array(labels))

### Data loading and pre-processing

In [10]:
dataset_path = r'dataset\wave'
trial_num = 5
trainingPath = os.path.sep.join([dataset_path,"training"])
testingPath = os.path.sep.join([dataset_path, "testing"])

# load the traning and testing data
print("[INFO] loading data...")
(trainX, trainY) = load_split(trainingPath)
(testX, testY) = load_split(testingPath)

# Encode the labels as integers
le = LabelEncoder()
trainY = le.fit_transform(trainY)
testY = le.transform(testY)

#initialize or trials dicitonary
trials = {}

[INFO] loading data...


In [11]:
trainX.shape

(0,)

### Model training with `Random Forest`, and performance displayed in confusion metrics

In [33]:
# loop over the number of trials to run
for i in range(0, trial_num):
    # train the model
    print(f"[INFO] training model {i+1} of {trial_num}...")
    model = RandomForestClassifier(n_estimators=100)
    model.fit(trainX, trainY)
    
    # make predictions on the testing data and initialize a dictionary to store our computed metreics
    predictions = model.predict(testX)
    metrics = {}
    
    # compute the confusion matrix and use it to derive the raw accuracy, ensitivity, and specificity
    cm = confusion_matrix(testY, predictions).flatten()
    (tn, fp, fn, tp) = cm
    metrics["acc"] = (tp + tn) / float(cm.sum())
    metrics["sensitivity"] = tp / float(tp + fn)
    metrics["specificity"] = tn / float(tn + fp)
    
    # loop over the metrics
    for (k,v) in metrics.items():
        # update the trialas dictionary with the list of values for the current metric
        l = trials.get(k, [])
        l.append(v)
        trials[k] = l

# loop over our metrics
for metric in ("acc", "sensitivity", "specificity"):
    # grab the list of values for the current metric, then compute the mean and standard deviation
    values = trials[metric]
    mean = np.mean(values)
    std = np.std(values)
    
    # show the computed metrics for the statistic
    print(metric)
    print('='*len(metric))
    print(f'mean={mean:.4f}, std={std:.4f} \n')    

[INFO] training model 1 of 5...
[INFO] training model 2 of 5...
[INFO] training model 3 of 5...
[INFO] training model 4 of 5...
[INFO] training model 5 of 5...
acc
===
mean=0.8133, std=0.0400 

sensitivity
mean=0.7467, std=0.0777 

specificity
mean=0.8800, std=0.0267 



### Hyper parameter tunining for RF

In [34]:
print(trainX.shape)
print(trainY.shape)

(72, 12996)
(72,)


In [35]:
from hyperopt import space_eval, hp, tpe, STATUS_OK, Trials, fmin
from hyperopt.pyll import scope
from sklearn.model_selection import cross_val_score, KFold

In [93]:
# Bayesian optimization for RandomForest
space = {
        'n_estimators' : hp.quniform('n_estimators', 100, 1000, 25),
        'max_depth' : hp.quniform('max_depth', 1,20,1),
        'max_features': hp.choice('max_features', ['auto','sqrt','log2', 0.5, None]),
        'min_samples_leaf': hp.quniform('min_samples_leaf', 4,30,2),
}
trial = Trials()
def hyperopt_RandomForest(space):
    model = RandomForestClassifier(n_estimators= int(space['n_estimators']),
                                   max_features= space['max_features'],
                                   max_depth= int(space['max_depth']),
                                   min_samples_leaf= int(space['min_samples_leaf'])                        
                           )
    
    score = cross_val_score(model,
                            trainX, 
                            trainY, 
                            cv  = 5,
                            scoring='accuracy'                            
                           )
    
    print(f"Accuracy Score {score.mean():.3f} params {space}")
    return {'loss':(1 - score.mean()), 'status': STATUS_OK}
best = fmin(fn=hyperopt_RandomForest,
            space=space,
            algo=tpe.suggest,
            trials=trial,
            max_evals=300)
print(best)
clf = RandomForestClassifier(n_estimators= int(best['n_estimators']),
                                           max_features= ['auto','sqrt','log2', 0.5, None][best['max_features']],
                                           max_depth= int(best['max_depth']),
                                           min_samples_leaf= int(best['min_samples_leaf']))                      
                           
# final_score3 = cross_val_score(clf,
#                                trainX,
#                                trainY,
#                                cv  = 5,
#                                scoring='accuracy'                            
#                        )


print('Cooresponding loss:', final_score3.mean(), sep='\n')

Accuracy Score 0.655 params {'max_depth': 14.0, 'max_features': 'auto', 'min_samples_leaf': 8.0, 'n_estimators': 550.0}
Accuracy Score 0.500 params {'max_depth': 17.0, 'max_features': 'log2', 'min_samples_leaf': 24.0, 'n_estimators': 925.0}
Accuracy Score 0.500 params {'max_depth': 7.0, 'max_features': 'auto', 'min_samples_leaf': 30.0, 'n_estimators': 125.0}
Accuracy Score 0.629 params {'max_depth': 9.0, 'max_features': 'sqrt', 'min_samples_leaf': 12.0, 'n_estimators': 625.0}
Accuracy Score 0.500 params {'max_depth': 17.0, 'max_features': 'auto', 'min_samples_leaf': 26.0, 'n_estimators': 550.0}
Accuracy Score 0.500 params {'max_depth': 16.0, 'max_features': 'sqrt', 'min_samples_leaf': 26.0, 'n_estimators': 250.0}
Accuracy Score 0.684 params {'max_depth': 13.0, 'max_features': 'sqrt', 'min_samples_leaf': 10.0, 'n_estimators': 200.0}
Accuracy Score 0.598 params {'max_depth': 17.0, 'max_features': 0.5, 'min_samples_leaf': 12.0, 'n_estimators': 600.0}
Accuracy Score 0.500 params {'max_dept

Accuracy Score 0.641 params {'max_depth': 9.0, 'max_features': 'sqrt', 'min_samples_leaf': 10.0, 'n_estimators': 325.0}
Accuracy Score 0.570 params {'max_depth': 6.0, 'max_features': 'log2', 'min_samples_leaf': 16.0, 'n_estimators': 225.0}
Accuracy Score 0.614 params {'max_depth': 3.0, 'max_features': None, 'min_samples_leaf': 14.0, 'n_estimators': 150.0}
Accuracy Score 0.668 params {'max_depth': 1.0, 'max_features': 'log2', 'min_samples_leaf': 6.0, 'n_estimators': 375.0}
Accuracy Score 0.657 params {'max_depth': 5.0, 'max_features': 0.5, 'min_samples_leaf': 4.0, 'n_estimators': 200.0}
Accuracy Score 0.682 params {'max_depth': 2.0, 'max_features': 'log2', 'min_samples_leaf': 8.0, 'n_estimators': 250.0}
Accuracy Score 0.670 params {'max_depth': 4.0, 'max_features': 'auto', 'min_samples_leaf': 8.0, 'n_estimators': 100.0}
Accuracy Score 0.655 params {'max_depth': 8.0, 'max_features': 'log2', 'min_samples_leaf': 12.0, 'n_estimators': 300.0}
Accuracy Score 0.698 params {'max_depth': 10.0, '

Accuracy Score 0.714 params {'max_depth': 4.0, 'max_features': 'log2', 'min_samples_leaf': 4.0, 'n_estimators': 375.0}
Accuracy Score 0.680 params {'max_depth': 1.0, 'max_features': 'log2', 'min_samples_leaf': 6.0, 'n_estimators': 300.0}
Accuracy Score 0.684 params {'max_depth': 2.0, 'max_features': None, 'min_samples_leaf': 8.0, 'n_estimators': 475.0}
Accuracy Score 0.695 params {'max_depth': 3.0, 'max_features': 'log2', 'min_samples_leaf': 6.0, 'n_estimators': 425.0}
Accuracy Score 0.614 params {'max_depth': 5.0, 'max_features': 0.5, 'min_samples_leaf': 8.0, 'n_estimators': 250.0}
Accuracy Score 0.668 params {'max_depth': 4.0, 'max_features': 'log2', 'min_samples_leaf': 4.0, 'n_estimators': 400.0}
Accuracy Score 0.696 params {'max_depth': 5.0, 'max_features': 'log2', 'min_samples_leaf': 10.0, 'n_estimators': 325.0}
Accuracy Score 0.654 params {'max_depth': 1.0, 'max_features': 'auto', 'min_samples_leaf': 6.0, 'n_estimators': 275.0}
Accuracy Score 0.698 params {'max_depth': 2.0, 'max_

In [90]:
score = [0.6,0.5, 0.5, 0.5, 0.5]

## Bayesian optimization for XGBClassifier

In [36]:
# Define the search parameter spacs
space = {
        'max_depth': hp.quniform('max_depth', 2, 20, 1),
        'min_child_samples': hp.quniform ('min_child_samples', 1, 20, 1),
        'subsample': hp.uniform ('subsample', 0.8, 1),
        'n_estimators' : hp.quniform('n_estimators', 50,1000,25),
        'learning_rate' : hp.loguniform('learning_rate', np.log(0.005), np.log(0.2)),
        'gamma' : hp.uniform('gamma', 0, 1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.3, 1.0)
}
trial = Trials()

# Define the objective to minimize
def hyperopt_XGBClassifier(space):
    model = xgb.XGBClassifier(n_estimators= int(space['n_estimators']),
                               max_depth = int(space['max_depth']),
                               min_child_samples = space['min_child_samples'],
                               subsample = space['subsample'],
                               learning_rate= space['learning_rate'],
                               gamma = space['gamma'],
                               colsample_bytree = space['colsample_bytree']
                               )
    
    model.fit(trainX, trainY)
    
    # make predictions on the testing data and initialize a dictionary to store our computed metreics
    predictions = model.predict(testX)
    metrics = {}
    
    # compute the confusion matrix and use it to derive the raw accuracy, ensitivity, and specificity
    cm = confusion_matrix(testY, predictions).flatten()
    (tn, fp, fn, tp) = cm
    metrics["acc"] = (tp + tn) / float(cm.sum())
    print("Accuracy Score {:.3f} params {}".format(metrics["acc"], space))
    
    # Maximize the accuracy by minimize (1-accuracy)
    return {'loss':(1 - metrics["acc"]), 'status': STATUS_OK}



best = fmin(fn=hyperopt_XGBClassifier,
            space=space,
            algo=tpe.suggest,
            trials=trial,
            max_evals=500)
print(best)
clf = xgb.XGBClassifier(n_estimators = int(best['n_estimators']),
                   max_depth = int(best['max_depth']),
                   min_child_samples = best['min_child_samples'],
                   subsample = best['subsample'],
                   learning_rate = best['learning_rate'],
                   gamma = best['gamma'],
                   colsample_bytree = best['colsample_bytree'])
# final_score3 = cross_val_score(clf,
#                         train_cl[ML_columns], 
#                         train_cl['Target'], 
#                         cv  = cv,
#                         scoring='roc_auc'                            
#                        )
# clf.fit(trainX,trainY)
# final_prediction = clf.predict(testX)
# cm_final = confusion_matrix(testY,predictions).flatten()
# (tn, fp, fn, tp) = cm_final
# accuracy_final = (tp + tn) / float(cm_final.sum())


print('Cooresponding loss:', accuracy_final, sep='\n')

Accuracy Score 0.833 params {'colsample_bytree': 0.6103680944248027, 'gamma': 0.5724439047623937, 'learning_rate': 0.16624515548640778, 'max_depth': 9.0, 'min_child_samples': 19.0, 'n_estimators': 500.0, 'subsample': 0.8592170507317379}
Accuracy Score 0.767 params {'colsample_bytree': 0.8103404540412369, 'gamma': 0.6444931213198691, 'learning_rate': 0.028266758865937196, 'max_depth': 14.0, 'min_child_samples': 19.0, 'n_estimators': 675.0, 'subsample': 0.8787338410762001}
Accuracy Score 0.767 params {'colsample_bytree': 0.7831970267527404, 'gamma': 0.3626056250434858, 'learning_rate': 0.060040938757403906, 'max_depth': 5.0, 'min_child_samples': 3.0, 'n_estimators': 300.0, 'subsample': 0.930797607492455}
Accuracy Score 0.767 params {'colsample_bytree': 0.6676109250623357, 'gamma': 0.6412361578254506, 'learning_rate': 0.020682612579811882, 'max_depth': 14.0, 'min_child_samples': 15.0, 'n_estimators': 850.0, 'subsample': 0.8370209813250722}
Accuracy Score 0.767 params {'colsample_bytree': 

Accuracy Score 0.767 params {'colsample_bytree': 0.45488923869820796, 'gamma': 0.43553043329209495, 'learning_rate': 0.08895723659001387, 'max_depth': 14.0, 'min_child_samples': 17.0, 'n_estimators': 400.0, 'subsample': 0.8901309970852033}
Accuracy Score 0.767 params {'colsample_bytree': 0.4089312388649835, 'gamma': 0.8868944838500497, 'learning_rate': 0.19549475003741743, 'max_depth': 3.0, 'min_child_samples': 12.0, 'n_estimators': 625.0, 'subsample': 0.8571963301520451}
Accuracy Score 0.800 params {'colsample_bytree': 0.5766618957056168, 'gamma': 0.6225163037090209, 'learning_rate': 0.11895438106264519, 'max_depth': 8.0, 'min_child_samples': 9.0, 'n_estimators': 725.0, 'subsample': 0.8778129363122741}
Accuracy Score 0.767 params {'colsample_bytree': 0.43581697246115647, 'gamma': 0.045995154548102934, 'learning_rate': 0.09770762372603763, 'max_depth': 19.0, 'min_child_samples': 13.0, 'n_estimators': 250.0, 'subsample': 0.9288212402347136}
Accuracy Score 0.833 params {'colsample_bytree

Accuracy Score 0.767 params {'colsample_bytree': 0.47036963520182795, 'gamma': 0.6420232689370586, 'learning_rate': 0.19895034665622607, 'max_depth': 6.0, 'min_child_samples': 13.0, 'n_estimators': 600.0, 'subsample': 0.8465744364655781}
Accuracy Score 0.767 params {'colsample_bytree': 0.505285625286181, 'gamma': 0.7989183688711874, 'learning_rate': 0.010387567058481073, 'max_depth': 7.0, 'min_child_samples': 12.0, 'n_estimators': 800.0, 'subsample': 0.8561386672326445}
Accuracy Score 0.733 params {'colsample_bytree': 0.6386021031956721, 'gamma': 0.7872090037090476, 'learning_rate': 0.12964544929700586, 'max_depth': 4.0, 'min_child_samples': 4.0, 'n_estimators': 500.0, 'subsample': 0.8625166167715681}
Accuracy Score 0.800 params {'colsample_bytree': 0.9910740271998448, 'gamma': 0.9895261107695605, 'learning_rate': 0.11822491331173071, 'max_depth': 4.0, 'min_child_samples': 3.0, 'n_estimators': 550.0, 'subsample': 0.8296161727777344}
Accuracy Score 0.767 params {'colsample_bytree': 0.55

Accuracy Score 0.867 params {'colsample_bytree': 0.6265915726937883, 'gamma': 0.9013893823196186, 'learning_rate': 0.1653075165723116, 'max_depth': 5.0, 'min_child_samples': 7.0, 'n_estimators': 575.0, 'subsample': 0.8352196979695199}
Accuracy Score 0.767 params {'colsample_bytree': 0.7391465836553219, 'gamma': 0.7853664461334013, 'learning_rate': 0.021648962064725642, 'max_depth': 4.0, 'min_child_samples': 8.0, 'n_estimators': 575.0, 'subsample': 0.8260862338161601}
Accuracy Score 0.733 params {'colsample_bytree': 0.5366987946699406, 'gamma': 0.4746401167709876, 'learning_rate': 0.07238180057165368, 'max_depth': 19.0, 'min_child_samples': 9.0, 'n_estimators': 275.0, 'subsample': 0.8205390012126265}
Accuracy Score 0.733 params {'colsample_bytree': 0.31658989569273194, 'gamma': 0.3837659488756278, 'learning_rate': 0.026654251427635813, 'max_depth': 16.0, 'min_child_samples': 20.0, 'n_estimators': 150.0, 'subsample': 0.8742386582680441}
Accuracy Score 0.800 params {'colsample_bytree': 0.

Accuracy Score 0.800 params {'colsample_bytree': 0.5513424168789567, 'gamma': 0.7795368385370307, 'learning_rate': 0.19904009296161598, 'max_depth': 4.0, 'min_child_samples': 4.0, 'n_estimators': 550.0, 'subsample': 0.8383688441277373}
Accuracy Score 0.800 params {'colsample_bytree': 0.5190887422393302, 'gamma': 0.05053032380385968, 'learning_rate': 0.16569644676225945, 'max_depth': 4.0, 'min_child_samples': 5.0, 'n_estimators': 675.0, 'subsample': 0.8429252417424581}
Accuracy Score 0.733 params {'colsample_bytree': 0.638258571050524, 'gamma': 0.32192689433836114, 'learning_rate': 0.09834779813030416, 'max_depth': 16.0, 'min_child_samples': 19.0, 'n_estimators': 175.0, 'subsample': 0.9582735416824553}
Accuracy Score 0.733 params {'colsample_bytree': 0.3404389995720548, 'gamma': 0.45448890851366186, 'learning_rate': 0.07108188057965475, 'max_depth': 12.0, 'min_child_samples': 17.0, 'n_estimators': 725.0, 'subsample': 0.9078249658619221}
Accuracy Score 0.800 params {'colsample_bytree': 0

Accuracy Score 0.800 params {'colsample_bytree': 0.5589663062584331, 'gamma': 0.904048094366994, 'learning_rate': 0.10747534287604243, 'max_depth': 9.0, 'min_child_samples': 1.0, 'n_estimators': 925.0, 'subsample': 0.8378806919322227}
Accuracy Score 0.767 params {'colsample_bytree': 0.32427050604012897, 'gamma': 0.39505374624448497, 'learning_rate': 0.08199601694609718, 'max_depth': 13.0, 'min_child_samples': 16.0, 'n_estimators': 775.0, 'subsample': 0.894201537035208}
Accuracy Score 0.800 params {'colsample_bytree': 0.6851813105866281, 'gamma': 0.7763124943561845, 'learning_rate': 0.1430347819771408, 'max_depth': 3.0, 'min_child_samples': 5.0, 'n_estimators': 425.0, 'subsample': 0.8602818808948547}
Accuracy Score 0.767 params {'colsample_bytree': 0.6452923307096224, 'gamma': 0.9534295922377641, 'learning_rate': 0.12425223433778035, 'max_depth': 6.0, 'min_child_samples': 6.0, 'n_estimators': 550.0, 'subsample': 0.8056708470669608}
Accuracy Score 0.800 params {'colsample_bytree': 0.5040

Accuracy Score 0.767 params {'colsample_bytree': 0.6303277627806482, 'gamma': 0.9186487521554887, 'learning_rate': 0.12938997644916717, 'max_depth': 7.0, 'min_child_samples': 13.0, 'n_estimators': 800.0, 'subsample': 0.8183092428881436}
Accuracy Score 0.800 params {'colsample_bytree': 0.7301918286446111, 'gamma': 0.7917423466244199, 'learning_rate': 0.14473571035344862, 'max_depth': 2.0, 'min_child_samples': 4.0, 'n_estimators': 575.0, 'subsample': 0.8401130263991419}
Accuracy Score 0.800 params {'colsample_bytree': 0.7558028661132592, 'gamma': 0.8859095513144456, 'learning_rate': 0.18411008441784024, 'max_depth': 3.0, 'min_child_samples': 6.0, 'n_estimators': 525.0, 'subsample': 0.8605858851654324}
Accuracy Score 0.800 params {'colsample_bytree': 0.5906057831443964, 'gamma': 0.9650917209150547, 'learning_rate': 0.14799533394016132, 'max_depth': 5.0, 'min_child_samples': 14.0, 'n_estimators': 675.0, 'subsample': 0.8692310236306082}
Accuracy Score 0.733 params {'colsample_bytree': 0.953

Accuracy Score 0.800 params {'colsample_bytree': 0.35757430846177146, 'gamma': 0.8876800532265126, 'learning_rate': 0.13515209550206947, 'max_depth': 6.0, 'min_child_samples': 11.0, 'n_estimators': 400.0, 'subsample': 0.8686684974841113}
Accuracy Score 0.767 params {'colsample_bytree': 0.3248744243631154, 'gamma': 0.8526368430599044, 'learning_rate': 0.15341869260575308, 'max_depth': 6.0, 'min_child_samples': 12.0, 'n_estimators': 375.0, 'subsample': 0.8642447107823812}
Accuracy Score 0.767 params {'colsample_bytree': 0.41647895644344834, 'gamma': 0.9037558371255334, 'learning_rate': 0.12498252732755316, 'max_depth': 7.0, 'min_child_samples': 12.0, 'n_estimators': 450.0, 'subsample': 0.8747148184221001}
Accuracy Score 0.733 params {'colsample_bytree': 0.48279351078951177, 'gamma': 0.5459090093747272, 'learning_rate': 0.057091314689673114, 'max_depth': 17.0, 'min_child_samples': 17.0, 'n_estimators': 425.0, 'subsample': 0.9198268020786518}
Accuracy Score 0.767 params {'colsample_bytree'

In [37]:
best

{'colsample_bytree': 0.6265915726937883,
 'gamma': 0.9013893823196186,
 'learning_rate': 0.1653075165723116,
 'max_depth': 5.0,
 'min_child_samples': 7.0,
 'n_estimators': 575.0,
 'subsample': 0.8352196979695199}

In [43]:
# loop over the number of trials to run
for i in range(0, trial_num):
    # train the model
    print(f"[INFO] training model {i+1} of {trial_num}...")
    clf = xgb.XGBClassifier(n_estimators = int(best['n_estimators']),
                           max_depth = int(best['max_depth']),
                           min_child_samples = best['min_child_samples'],
                           subsample = best['subsample'],
                           learning_rate = best['learning_rate'],
                           gamma = best['gamma'],
                           colsample_bytree = best['colsample_bytree'])
    clf.fit(trainX, trainY)
    
    # make predictions on the testing data and initialize a dictionary to store our computed metreics
    predictions = clf.predict(testX)
    metrics = {}
    
    # compute the confusion matrix and use it to derive the raw accuracy, ensitivity, and specificity
    cm = confusion_matrix(testY, predictions).flatten()
    (tn, fp, fn, tp) = cm
    metrics["acc"] = (tp + tn) / float(cm.sum())
    metrics["sensitivity"] = tp / float(tp + fn)
    metrics["specificity"] = tn / float(tn + fp)
    
    # loop over the metrics
    for (k,v) in metrics.items():
        # update the trialas dictionary with the list of values for the current metric
        l = trials.get(k, [])
        l.append(v)
        trials[k] = l

# loop over our metrics
for metric in ("acc", "sensitivity", "specificity"):
    # grab the list of values for the current metric, then compute the mean and standard deviation
    values = trials[metric]
    mean = np.mean(values)
    std = np.std(values)
    
    # show the computed metrics for the statistic
    print(metric)
    print('='*len(metric))
    print(f'mean={mean:.4f}, std={std:.4f} \n') 

[INFO] training model 1 of 5...
[INFO] training model 2 of 5...
[INFO] training model 3 of 5...
[INFO] training model 4 of 5...
[INFO] training model 5 of 5...
acc
===
mean=0.8667, std=0.0000 

sensitivity
mean=0.8000, std=0.0000 

specificity
mean=0.9333, std=0.0000 



In [44]:
trials['acc']

[0.8666666666666667,
 0.8666666666666667,
 0.8666666666666667,
 0.8666666666666667,
 0.8666666666666667]

### Demonstrate testing image predictions

In [66]:
# randomly select a few images and then initialize the output image for the montage
testingPaths = list(paths.list_images(testingPath))
idxs = np.arange(0, len(testingPaths))
idxs = np.random.choice(idxs, size=(25,), replace=False)
images = []

# loop over the trsting samples
for i in idxs:
    # load the teting image, clone it, and resize it
    image = cv2.imread(testingPaths[i])
    output = image.copy()
    output = cv2.resize(output, (128,128))
    
    # pre-process the image in the same manner we did earlier
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (200, 200))
    image = cv2.threshold(image, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    
    # quantify the image and make predictions based on the extracted features using the last trained RF
    features = quantify_image(image)
    preds = model.predict([features])
    label = le.inverse_transform(preds)[0]
    
    # draw the colored class label on the output image and add it to the set of output images
    color = (0, 255, 0) if label == "healthy" else (0, 0, 255)
    cv2.putText(output, label, (3, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    images.append(output)    

# create a montage using 128x128 "titles" with 5 rows and 5 columns
montage = build_montages(images, (128,128), (5,5))[0]
# show the output montage
cv2.imshow("Output", montage)
cv2.waitKey(0)
cv2.destroyAllWindows()