In [111]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

np.random.seed(123)

df = pd.read_csv("Breastdata.csv")

df= df.drop('concave points_worst', axis = 1)
df= df.drop('concave points_mean', axis = 1)
df= df.drop('radius_worst', axis = 1)
df= df.drop('perimeter_worst', axis = 1)

df= df.drop('id', axis = 1)

df['diagnosis']=df['diagnosis'].apply(lambda x: 1 if x == 'M' else 0 )


df.dropna(inplace= True)
print ("df Shape: ", df.shape)
df = pd.get_dummies(df)
labels = np.array(df['diagnosis'])
df= df.drop('diagnosis', axis = 1)
df_list = list(df.columns)

#df.isnull().sum()

train_x, test_x, train_y, test_y = train_test_split(df, labels, test_size = 0.2, random_state = 42)

print ("Train_x Shape: ",train_x.shape)
print ("Train_y Shape: ", train_y.shape)
print ("Test_x Shape: ", test_x.shape)
print ("Test_y Shape: ", test_y.shape)


df Shape:  (569, 27)
Train_x Shape:  (455, 26)
Train_y Shape:  (455,)
Test_x Shape:  (114, 26)
Test_y Shape:  (114,)


# 1-Decision Tree Classifier

In [257]:
seed_1 = 15
# Create Decision Tree classifer object with max_depth = 2 and criterion = gini
clf = DecisionTreeClassifier(random_state = seed_1,max_depth = 2,criterion = "gini")

X = df
Y = labels
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

# Train Decision Tree Classifer
clf = clf.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = clf.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_DTC.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9450
Mean of Accuracy of kfold cross validation results:0.8824
Confusion Matrix:
[[69  2]
 [ 7 36]]
Accuracy: 0.9211
AUC Score:0.9045
Precision:0.9474
Recall:0.8372
F1 Score:0.8889
                      importance
area_worst              0.840670
compactness_worst       0.117026
concavity_mean          0.042304
radius_mean             0.000000
compactness_se          0.000000
symmetry_worst          0.000000
concavity_worst         0.000000
smoothness_worst        0.000000
texture_worst           0.000000
fractal_dimension_se    0.000000


In [258]:
# Create Decision Tree classifer object with max_depth = 4 and criterion = gini
clf = DecisionTreeClassifier(random_state = seed_1,max_depth = 4,criterion = "gini")

X = df
Y = labels
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

# Train Decision Tree Classifer
clf = clf.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = clf.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_DTC.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9465
Mean of Accuracy of kfold cross validation results:0.9350
Confusion Matrix:
[[68  3]
 [ 3 40]]
Accuracy: 0.9474
AUC Score:0.9440
Precision:0.9302
Recall:0.9302
F1 Score:0.9302
                         importance
area_worst                 0.724879
compactness_worst          0.098404
smoothness_mean            0.038581
concavity_mean             0.035573
texture_mean               0.031073
texture_worst              0.023971
area_mean                  0.016553
concavity_worst            0.013211
fractal_dimension_worst    0.008878
radius_se                  0.008878


In [259]:
# Create Decision Tree classifer object with max_depth = 8 and criterion = gini
clf = DecisionTreeClassifier(random_state = seed_1,max_depth = 8,criterion = "gini")

X = df
Y = labels
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

# Train Decision Tree Classifer
clf = clf.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = clf.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_DTC.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9331
Mean of Accuracy of kfold cross validation results:0.9263
Confusion Matrix:
[[65  6]
 [ 4 39]]
Accuracy: 0.9123
AUC Score:0.9112
Precision:0.8667
Recall:0.9070
F1 Score:0.8864
                   importance
area_worst           0.697739
compactness_worst    0.092748
texture_worst        0.045713
smoothness_mean      0.036363
concavity_mean       0.033528
texture_mean         0.029287
compactness_se       0.015899
area_mean            0.015601
concavity_worst      0.012452
radius_mean          0.011296


In [260]:

# Create Decision Tree classifer object with max_depth = 2 and criterion = entropy
clf = DecisionTreeClassifier(random_state = seed_1,max_depth = 2,criterion = "entropy")

X = df
Y = labels
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

# Train Decision Tree Classifer
clf = clf.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = clf.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_DTC.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9549
Mean of Accuracy of kfold cross validation results:0.8965
Confusion Matrix:
[[69  2]
 [ 6 37]]
Accuracy: 0.9298
AUC Score:0.9161
Precision:0.9487
Recall:0.8605
F1 Score:0.9024
                      importance
area_worst              0.755866
concavity_worst         0.154771
concavity_mean          0.089363
radius_mean             0.000000
compactness_se          0.000000
symmetry_worst          0.000000
compactness_worst       0.000000
smoothness_worst        0.000000
texture_worst           0.000000
fractal_dimension_se    0.000000


In [261]:
# Create Decision Tree classifer object with max_depth = 4 and criterion = entropy
clf = DecisionTreeClassifier(random_state = seed_1,max_depth = 4,criterion = "entropy")

X = df
Y = labels
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

clf = clf.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = clf.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_DTC.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9608
Mean of Accuracy of kfold cross validation results:0.9456
Confusion Matrix:
[[67  4]
 [ 3 40]]
Accuracy: 0.9386
AUC Score:0.9369
Precision:0.9091
Recall:0.9302
F1 Score:0.9195
                         importance
area_worst                 0.624179
concavity_worst            0.122762
concavity_mean             0.070881
smoothness_worst           0.043117
fractal_dimension_se       0.039194
concave points_se          0.032907
texture_mean               0.028292
texture_worst              0.014676
radius_mean                0.012819
fractal_dimension_worst    0.011171


In [262]:
# Create Decision Tree classifer object with max_depth = 8 and criterion = entropy
clf = DecisionTreeClassifier(random_state = seed_1,max_depth = 8,criterion = "entropy")

X = df
Y = labels
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(clf, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())
# Train Decision Tree Classifer
clf = clf.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = clf.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_DTC.head(10))



Mean of Roc_Auc of kfold cross validation results:0.9441
Mean of Accuracy of kfold cross validation results:0.9421
Confusion Matrix:
[[67  4]
 [ 3 40]]
Accuracy: 0.9386
AUC Score:0.9369
Precision:0.9091
Recall:0.9302
F1 Score:0.9195
                      importance
area_worst              0.571840
concavity_worst         0.114935
concavity_mean          0.066362
smoothness_worst        0.050827
fractal_dimension_se    0.036695
perimeter_mean          0.032303
concave points_se       0.030809
texture_worst           0.030236
texture_mean            0.026488
perimeter_se            0.013552


In [276]:
parameters = {"criterion" : ["entropy", "gini"], 
              "max_depth" : [2,4,8,16],
              "min_samples_split" : [2,3,4,5,6]
             }
#grid search when scoring is accuracy
grid_search = GridSearchCV(estimator = DecisionTreeClassifier(random_state = seed_1),param_grid = parameters,scoring = "accuracy",cv = 10,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_DTC.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    7.0s finished


{'criterion': 'entropy', 'max_depth': 4, 'min_samples_split': 2}
Confusion Matrix:
[[67  4]
 [ 3 40]]
Accuracy: 0.9386
AUC Score:0.9369
Precision:0.9091
Recall:0.9302
F1 Score:0.9195
                      importance
area_worst              0.571840
concavity_worst         0.114935
concavity_mean          0.066362
smoothness_worst        0.050827
fractal_dimension_se    0.036695
perimeter_mean          0.032303
concave points_se       0.030809
texture_worst           0.030236
texture_mean            0.026488
perimeter_se            0.013552
Mean of Roc_Auc of kfold cross validation results:0.9608
Mean of Accuracy of kfold cross validation results:0.9456


In [277]:
parameters = {"criterion" : ["entropy", "gini"], 
              "max_depth" : [2,4,8,16],
              "min_samples_split" : [2,3,4,5,6]
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = DecisionTreeClassifier(random_state = seed_1),param_grid = parameters,scoring = "roc_auc",cv = 10,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_DTC = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_DTC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_DTC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_DTC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_DTC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_DTC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_DTC))

feature_importances_DTC = pd.DataFrame(clf.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_DTC.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 10 folds for each of 40 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Done 119 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    5.3s finished


{'criterion': 'entropy', 'max_depth': 2, 'min_samples_split': 2}
Confusion Matrix:
[[69  2]
 [ 6 37]]
Accuracy: 0.9298
AUC Score:0.9161
Precision:0.9487
Recall:0.8605
F1 Score:0.9024
                      importance
area_worst              0.571840
concavity_worst         0.114935
concavity_mean          0.066362
smoothness_worst        0.050827
fractal_dimension_se    0.036695
perimeter_mean          0.032303
concave points_se       0.030809
texture_worst           0.030236
texture_mean            0.026488
perimeter_se            0.013552
Mean of Roc_Auc of kfold cross validation results:0.9549
Mean of Accuracy of kfold cross validation results:0.8965


# 2-Bagging Classifier

In [290]:
from sklearn import model_selection 

seed = 8
seed_1 = 15


X = df
Y = labels
# Create Decision Tree classifer object with max_depth = 4 and criterion = gini
clf2 = DecisionTreeClassifier(random_state = seed_1,max_depth = 4,criterion = "entropy")
bgc = BaggingClassifier(base_estimator=clf2, n_estimators=100, random_state=seed_1,bootstrap = True)


kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(bgc, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(bgc, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())
# Train Bagging Classifer
bgc = bgc.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_BC = bgc.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_BC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_BC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_BC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_BC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_BC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_BC))

featimp = np.mean([
    tree.feature_importances_ for tree in bgc.estimators_
], axis=0)

feature_importances_BC = pd.DataFrame(featimp,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_BC.head(10))


Mean of Roc_Auc of kfold cross validation results:0.9904
Mean of Accuracy of kfold cross validation results:0.9561
Confusion Matrix:
[[70  1]
 [ 3 40]]
Accuracy: 0.9649
AUC Score:0.9581
Precision:0.9756
Recall:0.9302
F1 Score:0.9524
                   importance
area_worst           0.599467
concavity_mean       0.112651
concavity_worst      0.054494
texture_mean         0.041440
compactness_worst    0.037755
texture_worst        0.031168
smoothness_worst     0.022508
symmetry_worst       0.014221
smoothness_mean      0.012453
radius_se            0.010548


In [279]:
parameters = {"n_estimators" : [100,1000], 
              "max_samples" : [0.6,0.8,1],
              "bootstrap": [True,False],
              "bootstrap_features":[True,False]
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = bgc,param_grid = parameters,scoring = "accuracy",cv = 5,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_BC = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_BC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_BC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_BC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_BC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_BC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_BC))

featimp = np.mean([
    tree.feature_importances_ for tree in best_model.estimators_
], axis=0)

feature_importances_BC = pd.DataFrame(featimp,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_BC.head())

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:  1.5min finished


{'bootstrap': False, 'bootstrap_features': True, 'max_samples': 0.6, 'n_estimators': 100}
Confusion Matrix:
[[71  0]
 [ 3 40]]
Accuracy: 0.9737
AUC Score:0.9651
Precision:1.0000
Recall:0.9302
F1 Score:0.9639
                         importance
smoothness_worst           0.056529
smoothness_se              0.054568
fractal_dimension_mean     0.052405
concavity_se               0.051882
fractal_dimension_worst    0.046055
Mean of Roc_Auc of kfold cross validation results:0.9903
Mean of Accuracy of kfold cross validation results:0.9579


In [280]:
parameters = {"n_estimators" : [100,1000], 
              "max_samples" : [0.6,0.8,1],
              "bootstrap": [True,False],
              "bootstrap_features":[True,False]
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = bgc,param_grid = parameters,scoring = "roc_auc",cv = 5,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_BC = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_BC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_BC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_BC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_BC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_BC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_BC))

featimp = np.mean([
    tree.feature_importances_ for tree in best_model.estimators_
], axis=0)

feature_importances_BC = pd.DataFrame(featimp,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_BC.head())

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   30.5s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:  1.6min finished


{'bootstrap': True, 'bootstrap_features': True, 'max_samples': 0.8, 'n_estimators': 100}
Confusion Matrix:
[[70  1]
 [ 3 40]]
Accuracy: 0.9649
AUC Score:0.9581
Precision:0.9756
Recall:0.9302
F1 Score:0.9524
                         importance
smoothness_worst           0.065241
fractal_dimension_mean     0.062554
concavity_se               0.055346
concavity_worst            0.053217
fractal_dimension_worst    0.052543
Mean of Roc_Auc of kfold cross validation results:0.9905
Mean of Accuracy of kfold cross validation results:0.9579


# 3-Random Forest

In [281]:
seed = 8
seed_1 = 15

X = df
Y = labels

rfc = RandomForestClassifier(random_state=seed_1)

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(rfc, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(rfc, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())
# Train Bagging Classifer
rfc = rfc.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_RFC = rfc.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_RFC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_RFC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_RFC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_RFC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_RFC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_RFC))

feature_importances_RFC = pd.DataFrame(rfc.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_RFC.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9865
Mean of Accuracy of kfold cross validation results:0.9579
Confusion Matrix:
[[70  1]
 [ 3 40]]
Accuracy: 0.9649
AUC Score:0.9581
Precision:0.9756
Recall:0.9302
F1 Score:0.9524
                  importance
perimeter_mean      0.179203
area_se             0.172862
concavity_mean      0.126308
compactness_mean    0.085659
concavity_worst     0.077190
area_worst          0.060078
area_mean           0.039746
perimeter_se        0.035574
texture_mean        0.026540
symmetry_worst      0.025045


In [282]:
parameters = {"n_estimators" : [100,1000], 
              "criterion" : ["gini","entropy"],
              "max_features" : ["auto","log2","sqrt"],
              "max_depth":[4,8,None] 
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = rfc,param_grid = parameters,scoring = "accuracy",cv = 3,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_RFC = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_RFC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_RFC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_RFC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_RFC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_RFC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_RFC))

feature_importances_RFC = pd.DataFrame(best_model.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_BC.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   53.5s finished


{'criterion': 'entropy', 'max_depth': 8, 'max_features': 'log2', 'n_estimators': 100}
Confusion Matrix:
[[70  1]
 [ 2 41]]
Accuracy: 0.9737
AUC Score:0.9697
Precision:0.9762
Recall:0.9535
F1 Score:0.9647
                         importance
smoothness_worst           0.065241
fractal_dimension_mean     0.062554
concavity_se               0.055346
concavity_worst            0.053217
fractal_dimension_worst    0.052543
texture_worst              0.046775
smoothness_se              0.046534
radius_mean                0.043660
compactness_mean           0.041512
area_worst                 0.040747
Mean of Roc_Auc of kfold cross validation results:0.9925
Mean of Accuracy of kfold cross validation results:0.9631


In [283]:
parameters = {"n_estimators" : [100,1000], 
              "criterion" : ["gini","entropy"],
              "max_features" : ["auto","log2","sqrt"],
              "max_depth":[4,8,None] 
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = rfc,param_grid = parameters,scoring = "roc_auc",cv = 3,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_RFC = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_RFC))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_RFC))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_RFC))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_RFC))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_RFC))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_RFC))

feature_importances_RFC = pd.DataFrame(best_model.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)

print(feature_importances_BC.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   22.0s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:   54.6s finished


{'criterion': 'gini', 'max_depth': 8, 'max_features': 'auto', 'n_estimators': 1000}
Confusion Matrix:
[[70  1]
 [ 2 41]]
Accuracy: 0.9737
AUC Score:0.9697
Precision:0.9762
Recall:0.9535
F1 Score:0.9647
                         importance
smoothness_worst           0.065241
fractal_dimension_mean     0.062554
concavity_se               0.055346
concavity_worst            0.053217
fractal_dimension_worst    0.052543
texture_worst              0.046775
smoothness_se              0.046534
radius_mean                0.043660
compactness_mean           0.041512
area_worst                 0.040747
Mean of Roc_Auc of kfold cross validation results:0.9923
Mean of Accuracy of kfold cross validation results:0.9596


# 4-AdaBoost

In [284]:
seed = 8
seed_1 = 15

X = df
Y = labels

ada = AdaBoostClassifier(random_state = seed_1)

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(ada, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(ada, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())
# Train Bagging Classifer
ada = ada.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_ada = rfc.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_ada))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_ada))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_ada))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_ada))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_ada))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_ada))

feature_importances_ada = pd.DataFrame(ada.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_ada.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9924
Mean of Accuracy of kfold cross validation results:0.9701
Confusion Matrix:
[[70  1]
 [ 3 40]]
Accuracy: 0.9649
AUC Score:0.9581
Precision:0.9756
Recall:0.9302
F1 Score:0.9524
                   importance
area_worst               0.14
smoothness_worst         0.10
texture_mean             0.08
compactness_se           0.08
symmetry_worst           0.06
concavity_worst          0.06
concavity_mean           0.06
concave points_se        0.04
symmetry_mean            0.04
texture_worst            0.04


In [285]:
parameters = {"n_estimators" : [100,1000], 
              "learning_rate" : [0.01,0.1,0.3,0.5,0.7,1],
              "algorithm" : ["SAMME","SAMME.R"]
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = ada,param_grid = parameters,scoring = "accuracy",cv = 3,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_ada = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_ada))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_ada))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_ada))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_ada))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_ada))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_ada))

feature_importances_ada = pd.DataFrame(ada.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_ada.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   25.2s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   42.6s finished


{'algorithm': 'SAMME.R', 'learning_rate': 1, 'n_estimators': 1000}
Confusion Matrix:
[[71  0]
 [ 2 41]]
Accuracy: 0.9825
AUC Score:0.9767
Precision:1.0000
Recall:0.9535
F1 Score:0.9762
                   importance
area_worst               0.14
smoothness_worst         0.10
texture_mean             0.08
compactness_se           0.08
symmetry_worst           0.06
concavity_worst          0.06
concavity_mean           0.06
concave points_se        0.04
symmetry_mean            0.04
texture_worst            0.04
Mean of Roc_Auc of kfold cross validation results:0.9973
Mean of Accuracy of kfold cross validation results:0.9789


In [286]:
parameters = {"n_estimators" : [100,1000], 
              "learning_rate" : [0.01,0.1,0.3,0.5,0.7,1],
              "algorithm" : ["SAMME","SAMME.R"]
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = ada,param_grid = parameters,scoring = "roc_auc",cv = 3,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_ada = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_ada))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_ada))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_ada))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_ada))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_ada))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_ada))

feature_importances_ada = pd.DataFrame(ada.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_ada.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   23.9s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:   40.3s finished


{'algorithm': 'SAMME.R', 'learning_rate': 1, 'n_estimators': 1000}
Confusion Matrix:
[[71  0]
 [ 2 41]]
Accuracy: 0.9825
AUC Score:0.9767
Precision:1.0000
Recall:0.9535
F1 Score:0.9762
                   importance
area_worst               0.14
smoothness_worst         0.10
texture_mean             0.08
compactness_se           0.08
symmetry_worst           0.06
concavity_worst          0.06
concavity_mean           0.06
concave points_se        0.04
symmetry_mean            0.04
texture_worst            0.04
Mean of Roc_Auc of kfold cross validation results:0.9973
Mean of Accuracy of kfold cross validation results:0.9789


## 5-Gradient Boosting

In [287]:
seed = 8
seed_1 = 15

X = df
Y = labels

gb = GradientBoostingClassifier(random_state = seed_1)

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(gb, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(gb, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())
# Train Bagging Classifer
gb = gb.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_gb = rfc.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_gb))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_gb))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_gb))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_gb))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_gb))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_gb))

feature_importances_ada = pd.DataFrame(gb.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_ada.head(10))

Mean of Roc_Auc of kfold cross validation results:0.9907
Mean of Accuracy of kfold cross validation results:0.9561
Confusion Matrix:
[[70  1]
 [ 3 40]]
Accuracy: 0.9649
AUC Score:0.9581
Precision:0.9756
Recall:0.9302
F1 Score:0.9524
                   importance
area_worst           0.272352
texture_worst        0.108591
concavity_worst      0.069642
texture_mean         0.051667
smoothness_worst     0.049074
concavity_mean       0.046772
compactness_mean     0.044862
perimeter_mean       0.041557
symmetry_worst       0.036497
compactness_worst    0.033946


In [288]:
parameters = {"n_estimators" : [100,1000], 
              "learning_rate" : [0.01,0.1,0.3,0.7,1],
              "loss" : ["deviance","exponential"],
              "min_samples_split" : [2,4,8],
              "max_depth" : [3,4,8]
              
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = gb,param_grid = parameters,scoring = "accuracy",cv = 3,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_gb = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_gb))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_gb))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_gb))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_gb))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_gb))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_gb))

feature_importances_gb = pd.DataFrame(best_model.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_gb.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   17.3s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   47.7s
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 540 out of 540 | elapsed:  1.2min finished


{'learning_rate': 1, 'loss': 'exponential', 'max_depth': 3, 'min_samples_split': 4, 'n_estimators': 100}
Confusion Matrix:
[[71  0]
 [ 2 41]]
Accuracy: 0.9825
AUC Score:0.9767
Precision:1.0000
Recall:0.9535
F1 Score:0.9762
                      importance
area_worst              0.045348
texture_worst           0.030113
smoothness_worst        0.027925
texture_mean            0.025794
symmetry_worst          0.024639
compactness_se          0.023908
compactness_mean        0.022344
fractal_dimension_se    0.014514
concavity_se            0.014456
texture_se              0.014436
Mean of Roc_Auc of kfold cross validation results:0.9939
Mean of Accuracy of kfold cross validation results:0.9701


In [289]:
parameters = {"n_estimators" : [100,1000], 
              "learning_rate" : [0.01,0.1,0.3,0.7,1],
              "loss" : ["deviance","exponential"],
              "min_samples_split" : [2,4,8],
              "max_depth" : [3,4,8]
              
             }
#grid search when scoring is roc_auc score
grid_search = GridSearchCV(estimator = gb,param_grid = parameters,scoring = "roc_auc",cv = 3,verbose = 1,n_jobs = -1)
grid_search = grid_search.fit(train_x,train_y)
print(grid_search.best_params_)
best_model = grid_search.best_estimator_
best_model = best_model.fit(train_x,train_y)

#Predict the response for test dataset
y_pred_gb = best_model.predict(test_x)

print("Confusion Matrix:")
print(confusion_matrix(test_y, y_pred_gb))
print("Accuracy: %0.4f"%metrics.accuracy_score(test_y, y_pred_gb))
print ("AUC Score:%0.4f"%roc_auc_score(test_y, y_pred_gb))
print ("Precision:%0.4f"%precision_score(test_y, y_pred_gb))
print ("Recall:%0.4f"%recall_score(test_y, y_pred_gb))
print ("F1 Score:%0.4f"%f1_score(test_y, y_pred_gb))

feature_importances_gb = pd.DataFrame(best_model.feature_importances_,
                                   index = train_x.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances_gb.head(10))

kfold = model_selection.KFold(n_splits = 10, random_state = seed)
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "roc_auc") 
print("Mean of Roc_Auc of kfold cross validation results:%0.4f"%results.mean())
results = model_selection.cross_val_score(best_model, X, Y, cv = kfold, scoring = "accuracy") 
print("Mean of Accuracy of kfold cross validation results:%0.4f"%results.mean())

Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   17.2s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   48.7s
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 540 out of 540 | elapsed:  1.2min finished


{'learning_rate': 0.7, 'loss': 'exponential', 'max_depth': 3, 'min_samples_split': 4, 'n_estimators': 100}
Confusion Matrix:
[[71  0]
 [ 2 41]]
Accuracy: 0.9825
AUC Score:0.9767
Precision:1.0000
Recall:0.9535
F1 Score:0.9762
                        importance
area_worst                0.068738
texture_worst             0.055628
compactness_se            0.040899
symmetry_worst            0.028879
texture_mean              0.025220
fractal_dimension_mean    0.022094
radius_se                 0.021599
concavity_worst           0.021458
smoothness_worst          0.016654
concavity_mean            0.014798
Mean of Roc_Auc of kfold cross validation results:0.9927
Mean of Accuracy of kfold cross validation results:0.9666
