In [1]:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

  from numpy.core.umath_tests import inner1d


In [2]:
from auto_opt import EstimatorSelectionHelper

In [3]:
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

In [4]:
models1 = {
    'RandomForestClassifier': RandomForestClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC()
}

In [5]:
#Separeting a sample to external validation
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)

In [6]:
min_samples = int(X_train.shape[0]/100) #defining min samples according to some percentage of training data

In [7]:
#Grid Params for GradientBoosting
gradient_boosting_params = { 'n_estimators': [250],# 500, 1000],
                            'learning_rate': [0.1],# 0.001],
                            'max_depth':[3,4,5],
                            'min_samples_leaf':[min_samples],#min_samples*2,min_samples*5],
                            'min_samples_split':[min_samples],#min_samples*2,min_samples*5]
                           }

In [8]:
#Grid for SVC
svc_params = [
                {'kernel': ['linear'], 'C': [1, 10]},
                {'kernel': ['rbf'], 'C': [1, 10], 'gamma': [0.001, 0.0001]},
            ]

In [9]:
#Grid Params for RandomForestClassifier
random_forest_params = { 'n_estimators': [250], #500, 1000],
                        'max_depth':[3,4,5],
                        'min_samples_leaf':[min_samples],#,min_samples*2,min_samples*5],
                        'min_samples_split':[min_samples],#,min_samples*2,min_samples*5]
                       }

In [10]:
params1 = {
    'RandomForestClassifier': random_forest_params,
    'GradientBoostingClassifier': gradient_boosting_params,
    'SVC': svc_params
}

# First test should work fine

In [11]:
helper1 = EstimatorSelectionHelper(models1, params1)
helper1.fit(X_train, y_train)

Running GridSearchCV for RandomForestClassifier.
Running GridSearchCV for GradientBoostingClassifier.
Running GridSearchCV for SVC.


In [12]:
helper1.score_summary(sort_by='mean_score')

RandomForestClassifier
GradientBoostingClassifier
SVC


Unnamed: 0,estimator,min_score,mean_score,max_score,std_score,C,gamma,kernel,learning_rate,max_depth,min_samples_leaf,min_samples_split,n_estimators
4,GradientBoostingClassifier,0.957447,0.966486,0.973262,0.006652,,,,0.1,4.0,4.0,4.0,250.0
3,GradientBoostingClassifier,0.962963,0.964812,0.967742,0.00209547,,,,0.1,3.0,4.0,4.0,250.0
5,GradientBoostingClassifier,0.957447,0.964529,0.96875,0.00503875,,,,0.1,5.0,4.0,4.0,250.0
6,SVC,0.953846,0.963334,0.978261,0.0106836,1.0,,linear,,,,,
7,SVC,0.957895,0.961262,0.963731,0.00246594,10.0,,linear,,,,,
1,RandomForestClassifier,0.946809,0.957876,0.972973,0.0110551,,,,,4.0,4.0,4.0,250.0
2,RandomForestClassifier,0.946809,0.957876,0.972973,0.0110551,,,,,5.0,4.0,4.0,250.0
0,RandomForestClassifier,0.941799,0.957845,0.972973,0.0127433,,,,,3.0,4.0,4.0,250.0
9,SVC,0.942408,0.95227,0.967033,0.0106336,1.0,0.0001,rbf,,,,,
11,SVC,0.921466,0.943666,0.962162,0.0168192,10.0,0.0001,rbf,,,,,


In [13]:
model = helper1.get_best_model()

In [18]:
print(classification_report(y_test,model.predict(X_test)))

             precision    recall  f1-score   support

          0       0.97      0.95      0.96        37
          1       0.97      0.99      0.98        77

avg / total       0.97      0.97      0.97       114



# Testing Errors

In [14]:
models1 = {
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC()
}

In [15]:
params1 = {
    'RandomForestClassifier': random_forest_params,
    'GradientBoostingClassifier': gradient_boosting_params,
    'SVC': svc_params
}

In [16]:
helper1 = EstimatorSelectionHelper(models1, params1) #Parametros sem estimadores

ValueError: Some parameters are missing estimators: ['RandomForestClassifier']

In [23]:
models1 = {
    'RandomForestClassifier': RandomForestClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC()
}

In [24]:
params1 = {
    'GradientBoostingClassifier': gradient_boosting_params,
    'SVC': svc_params
}

In [25]:
helper1 = EstimatorSelectionHelper(models1, params1) #Estimadores sem parametros


ValueError: Some estimators are missing parameters: ['RandomForestClassifier']

In [19]:
helper1 = EstimatorSelectionHelper(models1, ["teste"]) #Verificando assert

AssertionError: Params should be a dict

In [21]:
helper1.score_summary(sort_by="estimator")

AssertionError: sort_by should be min_score,max_score or mean_score