# Testing the functionalities of MetaTuner on bcancer dataset

In [1]:
from mango import MetaTuner

In [2]:
# Define different classifiers

from scipy.stats import uniform

from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

data = datasets.load_breast_cancer()
X = data.data
Y = data.target

# XGBoost

In [3]:
from xgboost import XGBClassifier

param_dict_xgboost = {"learning_rate": uniform(0, 1),
              "gamma": uniform(0, 5),
              "max_depth": range(1, 16),
              "n_estimators": range(1, 4),
               "booster":['gbtree','gblinear','dart']
             }


X_xgboost = X 
Y_xgboost = Y

# import warnings
# warnings.filterwarnings('ignore')

def objective_xgboost(args_list):
    global X_xgboost, Y_xgboost

    results = []
    for hyper_par in args_list:
        #clf = XGBClassifier(**hyper_par)
        clf = XGBClassifier(verbosity = 0, random_state = 0)
        
        #clf = XGBClassifier()
        clf.set_params(**hyper_par)

        result = cross_val_score(clf, X_xgboost, Y_xgboost, scoring='accuracy', cv=3).mean()
        results.append(result)
    return results

# KNN

In [4]:
param_dict_knn = {"n_neighbors": range(1, 101),
              'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute']
             }
X_knn = X
Y_knn = Y

def objective_knn(args_list):
    global X_knn,Y_knn
    
    results = []
    for hyper_par in args_list:
        clf = KNeighborsClassifier()
        
        clf.set_params(**hyper_par)
        
        result  = cross_val_score(clf, X_knn, Y_knn, scoring='accuracy', cv=3).mean()
        results.append(result)
    return results

# SVM

In [5]:
from mango.domain.distribution import loguniform
from sklearn import svm

param_dict_svm = {"gamma": uniform(0.1, 4),
              "C": loguniform(-7, 10)}

X_svm = X 
Y_svm = Y


def objective_svm(args_list):
    global X_svm,Y_svm
    
    #print('SVM:',args_list)
    results = []
    for hyper_par in args_list:
        clf = svm.SVC(random_state = 0)
        
        clf.set_params(**hyper_par)
        
        result  = cross_val_score(clf, X_svm, Y_svm, scoring='accuracy', cv= 3).mean()
        results.append(result)
    return results

# Decision Tree

In [6]:
from sklearn.tree import DecisionTreeClassifier

param_dict_dtree = {
              "max_features": ['auto', 'sqrt', 'log2'],
               "max_depth": range(1,21),     
               "splitter":['best','random'],
               "criterion":['gini','entropy']
             }


X_dtree = X 
Y_dtree = Y

print(X_dtree.shape, Y_dtree.shape)

def objective_dtree(args_list):
    global X_dtree,Y_dtree
    
    results = []
    for hyper_par in args_list:
        clf =  DecisionTreeClassifier(random_state = 0)
        
        clf.set_params(**hyper_par)
        result  = cross_val_score(clf, X_dtree, Y_dtree, scoring='accuracy', cv=3).mean()
        results.append(result)
    return results


(569, 30) (569,)


In [7]:
param_space_list = [param_dict_knn, param_dict_svm, param_dict_dtree, param_dict_xgboost]
objective_list = [objective_knn, objective_svm, objective_dtree, objective_xgboost]

In [8]:
metatuner = MetaTuner(param_space_list, objective_list)

In [9]:
results = metatuner.run()

  0%|          | 0/20 [00:00<?, ?it/s]

In [10]:
# see the keys results of evaluations
for k in results:
    print(k)

random_params
random_params_objective
random_objective_fid
params_tried
objective_values
objective_fid
best_objective
best_params
best_objective_fid


In [11]:
print('best_objective:',results['best_objective'])
print('best_params:',results['best_params'])
print('best_objective_fid:',results['best_objective_fid'])

best_objective: 0.9420124385036667
best_params: {'booster': 'gbtree', 'gamma': 0.10353367759089294, 'learning_rate': 0.9651837278385165, 'max_depth': 9, 'n_estimators': 3}
best_objective_fid: 3


In [12]:
#order of function evaluation, initial order is random
print(results['objective_fid'])

[0, 0, 1, 1, 2, 2, 3, 3, 0, 3, 3, 3, 2, 3, 2, 2, 3, 2, 3, 2, 2, 3, 2, 2, 2, 1, 0, 2]


In [13]:
# See the evaluation order of function values
print(results['objective_values'])

[0.9016522788452613, 0.9016244314489928, 0.6264204028589994, 0.6274204028589994, 0.924412884062007, 0.924403601596584, 0.9208948296667595, 0.9402394876079088, 0.91914972616727, 0.8700083542188805, 0.9208948296667595, 0.8893158822983386, 0.9384665367121507, 0.8752900770444629, 0.7431913116123643, 0.924403601596584, 0.8840248770073332, 0.9173767752715122, 0.9208948296667595, 0.9156595191682911, 0.924412884062007, 0.9420124385036667, 0.9261579875614964, 0.9103777963427087, 0.9261951174231876, 0.6274204028589994, 0.9050960735171261, 0.933194096351991]


# A simple chart of function evaluations

In [14]:
def count_elements(seq):
    """Tally elements from `seq`."""
    hist = {}
    for i in seq:
        hist[i] = hist.get(i, 0) + 1
    return hist

def ascii_histogram(seq):
    """A horizontal frequency-table/histogram plot."""
    counted = count_elements(seq)
    for k in sorted(counted):
        print('{0:5d} {1}'.format(k, '+' * counted[k]))
        
ascii_histogram(results['objective_fid'])

    0 ++++
    1 +++
    2 ++++++++++++
    3 +++++++++
