# Testing different kinds of models
 

In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
sys.path.insert(1, module_path)
import utility

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

## Code to run grid search on different combinations of classifiers

In [2]:
def get_KNN_grid():
    leaf_size = list(range(1,5))
    n_neighbors = list(range(1,3))
    p=[1,2]
    hyperparameters = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, p=p)
    knn= KNeighborsClassifier()
    return GridSearchCV(knn, hyperparameters, cv=5)
def get_SVM_grid():
    param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']}  
    return GridSearchCV(SVC(), param_grid, refit = True, verbose = 3, cv = 5) 
def get_ANN_grid():
    parameter_space = {
    'hidden_layer_sizes': [(1000,500), (1000,500, 250)],
    'activation': [ 'relu'],
    'solver': ['adam'],
    'alpha': [0.0001, 0.05]}
    return GridSearchCV(MLPClassifier(), parameter_space, cv=5)
def get_random_forest_grid():
    parameters = {
    'n_estimators'      : [100,200,300],
    'max_depth'         : [8, 10, 12],
    'random_state'      : [0]}
    return GridSearchCV(RandomForestClassifier(), parameters, cv=5)

grids = {
    'SVM': get_SVM_grid(),
    'KNN': get_KNN_grid(),
    'ANN': get_ANN_grid(),
    'random_forest': get_random_forest_grid()
}

In [3]:
decomp_methods = ['noDecomp', 'EMD', 'EEMD', 'DWT',  'EMD_DWT','EEMD_DWT']

def run_grid_search(classifiers, decomp_methods = decomp_methods):
    decomp_dict = {}
    classifier_dict = {}
    for classifier in classifiers:
        for decomp in decomp_methods:
            X, y = utility.get_X_y(decomp, feature_type = 'all')
            kwargs = dict(test_size=0.2, random_state=1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, **kwargs)
            grid = grids[classifier]
            grid.fit(X_train, y_train)
            decomp_dict[decomp] = grid.best_estimator_
            classifier_dict[classifier] = grid.best_estimator_
    return decomp_dict, classifier_dict 

In [17]:
decomp_dict, classifier_dict = run_grid_search(['random_forest','SVM','ANN','KNN'], decomp_methods = ['noDecomp'])

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.673, total=   2.9s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.9s remaining:    0.0s


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.664, total=   3.2s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    6.0s remaining:    0.0s


[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.667, total=   4.8s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.678, total=   4.4s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.637, total=   2.6s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.658, total=   3.2s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.681, total=   2.8s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.670, total=   2.3s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.650, total=   2.6s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] .

[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.667, total=   2.6s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.680, total=   2.6s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.692, total=   2.6s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.644, total=   2.5s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.648, total=   2.6s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.634, total=   2.9s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.672, total=   2.9s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] .

[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.661, total=   2.6s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.675, total=   2.6s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.670, total=   2.6s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.670, total=   2.6s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.683, total=   2.6s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.645, total=   2.7s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.642, total=   2.6s


[Parallel(n_jobs=1)]: Done 125 out of 125 | elapsed:  6.1min finished


In [4]:
classifier_dict = {'random_forest': RandomForestClassifier(max_depth=12, n_estimators=300, random_state=0),
 'SVM': SVC(C=10, gamma=1),
 'ANN': MLPClassifier(hidden_layer_sizes=(1000, 500, 250)),
 'KNN': KNeighborsClassifier(leaf_size=1, n_neighbors=1, p=1)}

## Supervised learning

In [14]:
def get_predicitions(classifier, decomp = 'noDecomp', feature_type = 'all'):
    X, y =  utility.get_X_y(decomp, feature_type, pure = True, normal = True,
            fs_filter = False,
            fs_auto_encoder = False,
            fs_pca = False, k = 10)
    kwargs = dict(test_size=0.2, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, **kwargs)
    classifier.fit(X_train, y_train) 
    
    return classifier.predict(X_test), y_test, X_test

y_pred,y_test,X_test = get_predicitions(classifier_dict['ANN'])
rep = classification_report(y_test, y_pred, output_dict=True)
print(f'Number of mislabeled points out of a total {X_test.shape[0]} points : {(y_test != y_pred).sum()}')
print(classification_report(y_test, y_pred))

Number of mislabeled points out of a total 634 points : 99
              precision    recall  f1-score   support

     crackle       0.80      0.89      0.85       303
  no-crackle       0.89      0.80      0.84       331

    accuracy                           0.84       634
   macro avg       0.85      0.85      0.84       634
weighted avg       0.85      0.84      0.84       634

