## 1. All Classifiers hyper parameter range

In [None]:
import warnings
warnings.filterwarnings('ignore')

### 1. AdaboostClassifier

In [139]:
from sklearn.ensemble import AdaBoostClassifier
param_dist_ada = {'algorithm':['SAMME.R','SAMME'],
              'n_estimators':[50,100,500],
              'learning_rate':[0.01,0.1,2]}

### 2. BernoulliNB

In [114]:
from sklearn.naive_bayes import BernoulliNB
param_dist_ber = {'alpha':[0.01,0.1,1,2],
             'fit_prior':[True,False]}

### 3. DecisionTree

In [115]:
from sklearn.tree import DecisionTreeClassifier
param_dist_dt = {'criterion':['gini','entropy'],
             'min_samples_split':[2,10,20],
             'min_samples_leaf':[2,10,20]}

### 4. ExtraTree

In [155]:
from sklearn.tree import ExtraTreeClassifier
param_dist_et = {'criterion':['gini','entropy'],
             'min_samples_split':[2,10,20]}

### 5. KNN

In [111]:
from sklearn.neighbors import KNeighborsClassifier
param_dist_knn = {'n_neighbors':[3,5,10],
             'weights':["uniform", "distance"],
             'p':[1,2]}

### 6. LDA

In [12]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
param_dist_lda = {'n_components':[1,10,100],
             'tol':[0.001,0.01,0.1]}

### 7. SGD

In [14]:
from sklearn.linear_model import SGDClassifier
param_dist_sgd = {'loss':["hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
             'penalty':["l1", "l2", "elasticnet"],
             'alpha':[0.01,0.1,1.0]}

### 8. LibLinear_SVC

In [16]:
from sklearn.svm import LinearSVC
param_dist_lsvc = {'C':[0.01,0.1,1.0,10.0,100.0],
             'tol':[0.001,0.01,0.1,1.0]}

### 9. LibSVM_SVC

In [20]:
from sklearn.svm import SVC
param_dist_svd = {'C':[0.01,0.1,1.0,10.0,100.0],
             'tol':[0.001,0.01,0.1,1.0]}

### 10. MultinomialNB

In [22]:
from sklearn.naive_bayes import MultinomialNB
param_dist_mnb = {'alpha':[0.01,0.1,1.0,10],
             'fit_prior':[True,False]}

### 11. PassiveAggressive

In [26]:
from sklearn.linear_model import PassiveAggressiveClassifier
param_dist_pac = {'C':[0.001,0.01,0.1,1.0],
             'loss':["hinge", "squared_hinge"],
             'tol':[0.001,0.01,0.1,1.0]}

### 12. QDA

In [30]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
param_dist_qda = {'reg_param':[0.0,0.5,1.0]}

### 13. RandomForest

In [33]:
from sklearn.ensemble import RandomForestClassifier
param_dist_rf = {'criterion':["gini", "entropy"],
             'n_estimators':[10,50,100]}

### Other 

In [None]:
### GaussianNB (no hyperparameters)
from sklearn.naive_bayes import GaussianNB
param_dist_gnb = None

In [None]:
###GradientBoosting (to long time)
from sklearn.ensemble import GradientBoostingClassifier
param_dist_gbc = {'learning_rate':[0.01,0.1,1.0],
                  'n_estimators':[50,100,500],
                  'criterion':['friedman_mse', 'mse', 'mae']}

## 2. Load Data and pepare for Grid Search

In [42]:
print(__doc__)

import numpy as np

from time import time
from scipy.stats import randint as sp_randint

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits,load_iris
from sklearn.ensemble import RandomForestClassifier

Automatically created module for IPython interactive environment


In [43]:
digits = load_digits()
X, y = digits.data, digits.target

In [103]:
# Utility function to report best scores
def report(results):
    candidates = np.flatnonzero(results['rank_test_score'] == 1)
    candidate = candidates[0]
    #for candidate in candidates:
    #print(type(candidates),type(candidate))
    print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
    print("Parameters: {0}".format(results['params'][candidate]))
    return results['mean_test_score'][candidate],results['params'][candidate]

In [102]:
def grid_search_one_class(clf,param_grid):
    grid_search = GridSearchCV(clf, param_grid=param_grid, cv=5, iid=False)
    start = time()
    grid_search.fit(X, y)
    used_time = time() - start
    print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
      % (used_time, len(grid_search.cv_results_['params'])))
    score,parameter = report(grid_search.cv_results_)
    return used_time,score,parameter

In [None]:
def random_search_one_class(clf,param_grid,n_iter=20):
    print("The {}th random search".format(i+1))
    random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=n_iter,cv=5, iid=False)
    start = time()
    random_search.fit(X, y)
    used_time = time() - start
    print("RandomSearchCV took %.2f seconds for %d candidate parameter settings."
      % (used_time, len(random_search.cv_results_['params'])))
    score,parameter = report(random_search.cv_results_)
    return used_time,score,parameter

In [150]:
classifiers = [AdaBoostClassifier(),
               #BernoulliNB(),
               DecisionTreeClassifier(),
               ExtraTreeClassifier(),
               #GaussianNB(),
               GradientBoostingClassifier(),
               KNeighborsClassifier(),
               LinearDiscriminantAnalysis(),
               SGDClassifier(),
               LinearSVC(),
               SVC(),
               MultinomialNB(),
               PassiveAggressiveClassifier(),
               QuadraticDiscriminantAnalysis(),
               RandomForestClassifier()]

In [197]:
param_dists = [param_dist_ada,
              #param_dist_ber,
              param_dist_dt,
              param_dist_et,
              #param_dist_gnb,
              param_dist_gbc,
              param_dist_knn,
              param_dist_lda,
              param_dist_sgd,
              param_dist_lsvc,
              param_dist_svd,
              param_dist_mnb,
              param_dist_pac,
              param_dist_qda,
              param_dist_rf]

### 3. Storage

In [7]:
import pandas as pd
gs = pd.read_csv('Storage/GS_best.csv')

In [8]:
gs

Unnamed: 0.1,Unnamed: 0,time,parameter,score
0,0,55.299455,"{'algorithm': 'SAMME', 'learning_rate': 2, 'n_...",0.840545
1,1,0.171836,"{'alpha': 0.01, 'fit_prior': False}",0.82645
2,2,1.280948,"{'criterion': 'entropy', 'min_samples_leaf': 2...",0.806872
3,3,0.093728,"{'criterion': 'entropy', 'min_samples_split': 2}",0.742219
4,4,2.952457,"{'n_neighbors': 3, 'p': 2, 'weights': 'distance'}",0.966665
5,5,0.624827,"{'n_components': 1, 'tol': 0.001}",0.908142
6,6,129.032135,"{'alpha': 1.0, 'loss': 'hinge', 'penalty': 'l2'}",0.929208
7,7,8.279301,"{'C': 0.01, 'tol': 0.001}",0.922142
8,8,35.226117,"{'C': 10.0, 'tol': 1.0}",0.538534
9,9,0.078076,"{'alpha': 10, 'fit_prior': True}",0.875387


In [13]:
gs_time = gs['time'].sum()
gs_time

241.052618265152

In [9]:
rs = pd.read_csv('Storage/RS_best.csv')

In [10]:
rs

Unnamed: 0.1,Unnamed: 0,time,parameter,score
0,0,35.569784,"{'n_estimators': 500, 'learning_rate': 2, 'alg...",0.840545
1,1,0.171804,"{'fit_prior': False, 'alpha': 0.01}",0.82645
2,2,0.71861,"{'min_samples_split': 2, 'min_samples_leaf': 2...",0.808578
3,3,0.093727,"{'min_samples_split': 2, 'criterion': 'gini'}",0.731754
4,4,2.561895,"{'weights': 'distance', 'p': 2, 'n_neighbors': 3}",0.966665
5,5,0.624827,"{'tol': 0.001, 'n_components': 1}",0.908142
6,6,13.762386,"{'penalty': 'l2', 'loss': 'hinge', 'alpha': 1.0}",0.925915
7,7,4.592668,"{'tol': 0.1, 'C': 0.1}",0.919351
8,8,18.152009,"{'tol': 1.0, 'C': 1.0}",0.533548
9,9,0.078078,"{'fit_prior': True, 'alpha': 10}",0.875387


In [14]:
rs_time = rs['time'].sum()
rs_time

81.80887269973755