In [1]:
"""
This code is based on the Sklearn example:
https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html?highlight=classifier%20comparison
"""

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
## data set
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_moons
from sklearn.datasets import make_circles
## learners
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier


## the data sets
datasets = [
    load_breast_cancer(return_X_y=True),  ## X input, y output
    make_moons(n_samples = 100, noise=0.3, random_state=1),
    make_circles(n_samples = 100, noise=0.2, factor=0.5, random_state=1)
]

## setting the learners parameters
classifiers = [
    MLPClassifier(hidden_layer_sizes = (100,), alpha=0.0001, max_iter=200, \
                    random_state=1),
    AdaBoostClassifier(n_estimators = 100, random_state=1),
    GradientBoostingClassifier(n_estimators=100, learning_rate=1, \
                                 max_depth=1, random_state=1), \
    RandomForestClassifier(max_depth=4, random_state=1)
]


In [3]:
data_names = ["Breast cancer", "Moons", "Circles"]

learner_names = ["Neural Net", "AdaBoost", "Gradient Boosting", "Random Forest"]

## the data sets
datasets = [
    load_breast_cancer(return_X_y=True),  ## X input, y output
    make_moons(n_samples = 100, noise=0.3, random_state=1),
    make_circles(n_samples = 100, noise=0.2, factor=0.5, random_state=1)
]

## 

## setting the learners parameters
classifiers = [
    MLPClassifier (hidden_layer_sizes = (100,), alpha=0.0001, max_iter=200,
                    random_state=1),
    AdaBoostClassifier (n_estimators = 100, random_state=1),
    GradientBoostingClassifier (n_estimators=100, learning_rate=1, \
                                max_depth=1, random_state=1), \
RandomForestClassifier (max_depth=4, random_state=1)
]

xscore_auc = np.zeros((len (datasets), len (classifiers)))

# iterate over datasets
for ds_cnt, ds in enumerate(datasets):
    # preprocess dataset, split into training and test part 
    X, y = ds
    ## transform the output
    y = 2 * y -1 ## from {0,1} to {-1,+1}
    ## scale the input data
    ## X = StandardScaler().fit_transform (X)
    ## select training and test
    X_train, X_test, y_train, y_test = train_test_split (X, y, \
            test_size=0.5, random_state=1)

    # iterate over classifiers
    ilearner = 0
    for name, clf in zip (learner_names, classifiers):
        clf.fit(X_train, y_train)
        yprediction = clf.predict(X_test) 
        ## score= clf.score (X_test, y_test)
        roc_auc = roc_auc_score(y_test, yprediction)
        print(name, roc_auc)
        xscore_auc [ds_cnt, ilearner] = roc_auc
        ilearner += 1
    xmeanscore = np.mean(xscore_auc, 0)
    lscores = [(xmeanscore [i], learner_names[i]) for i in range(len(learner_names))] 
    lscores.sort(reverse=True)
    print()
    print(20*'-')
    for i in range (len(lscores)):
        print(lscores [i] [1], '%6.4f '%lscores [i][0])



Neural Net 0.888536221060493
AdaBoost 0.9370799103808813
Gradient Boosting 0.9474287848074257
Random Forest 0.9315854048863758

--------------------
Gradient Boosting 0.3158 
AdaBoost 0.3124 
Random Forest 0.3105 
Neural Net 0.2962 




Neural Net 0.96
AdaBoost 0.8199999999999998
Gradient Boosting 0.8199999999999998
Random Forest 0.9600000000000001

--------------------
Random Forest 0.6305 
Neural Net 0.6162 
Gradient Boosting 0.5891 
AdaBoost 0.5857 




Neural Net 0.88
AdaBoost 0.76
Gradient Boosting 0.7799999999999999
Random Forest 0.9

--------------------
Random Forest 0.9305 
Neural Net 0.9095 
Gradient Boosting 0.8491 
AdaBoost 0.8390 
