In [0]:
import warnings
warnings.filterwarnings("ignore")

In [0]:
from sklearn.datasets import load_breast_cancer
ds = load_breast_cancer()
X = ds['data']
y = ds['target']

In [0]:
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)

In [0]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [0]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost importu XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import numpy as np

In [0]:
tuned_parameters = [
                    [{'max_depth' : [10,100,1000,10000], 'min_samples_split' : [2,10,100], 'min_samples_leaf': [1,5,10], 'max_features' : ["sqrt","log2"]}],
                    [{'activation' : ['logistic','tanh','relu'],'hidden_layer_sizes' : [(3,),(6,)], 'max_iter' : [100,500],'alpha' : [0.0001,0.0005]}],
					          [{'kernel': ['rbf', 'linear','poly'], 'gamma': [1e-3, 1e-4],'C': [1, 10, 100, 1000]}],
                    [{'priors': [np.array([0.1,0.9])]}], 
                    [{'penalty': ['l1','l2'], 'tol' : [1e-5,1e-6], 'max_iter' : [10,100,1000], 'fit_intercept' : [True, False]}],
                    [{'n_neighbors' : [3,5,10],'weights': ['uniform','distance'], 'algorithm' : ['ball_tree', 'kd_tree', 'brute'],'p' : [1,2,3]}],
                    [{'n_estimators':[10,25,50,100],'max_samples':[0.5,1.0],'max_features':[0.5,1.0],'random_state':[None]}],
                    [{'n_estimators':[10,25,50,100],'max_features':[0.5,1.0],'criterion' : ['gini','entropy'],'max_depth': [None,100,200]}],
                    [{'n_estimators' : [50,100,150,200],'random_state' : [None], 'learning_rate' : [1.,0.8,0.5],'algorithm' : ['SAMME','SAMME.R']}],
                    [{'loss': ['deviance', 'exponential'],'learning_rate':[0.1,0.5],'n_estimators' : [100,250,400,500],'max_features' : ['log2','sqrt'],'max_depth' : [3,10,50] }],
                    [{'learning_rate' : [0.1,0.05,0.2],'n_estimators' : [100,250,400,500],'booster': ['gbtree', 'gblinear' ,'dart'], 'min_child_weight' : [1], 'max_delta_step' : [0]}]
                    ]

scores = ['precision', 'recall']

In [0]:
classifier = [DecisionTreeClassifier(),MLPClassifier(),SVC(),GaussianNB(),LogisticRegression(),KNeighborsClassifier(),BaggingClassifier(),RandomForestClassifier(),AdaBoostClassifier(),GradientBoostingClassifier(),XGBClassifier()]
cls_name = ["Decision Tree","Neural Networks","Support Vector Machine","Naive Bayes","Logistic Regression","K-Nearest Neighbors","Bagging Method","Random Forest Model","AdaBoost Model","Gradient Boosting","XGB Model"]


In [0]:
for i in range(0,len(classifier)):
  print("\n-------------Showing below the results obtained using %s-----------------\n" %cls_name[i])
  for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(classifier[i], tuned_parameters[i], cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print("Average Accuracy Score: \n")
    print(clf.score(X_test, y_test))
    print()
    print("#############################################################################################################################")


-------------Showing below the results obtained using Decision Tree-----------------

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'max_depth': 1000, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 10}

Grid scores on development set:

0.905 (+/-0.038) for {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2}
0.899 (+/-0.043) for {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 10}
0.914 (+/-0.052) for {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 100}
0.911 (+/-0.068) for {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 2}
0.894 (+/-0.060) for {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 10}
0.875 (+/-0.065) for {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 100}
0.914 (+/-0.039) for {'max_depth'