In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [3]:
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [4]:
cancer=datasets.load_breast_cancer()

In [5]:
print(cancer.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, f

In [24]:
x=cancer.data
y=cancer.target

In [7]:
x.shape , y.shape

((569, 30), (569,))

In [8]:
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)
x_scaled

array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.29607613,
         2.75062224,  1.93701461],
       [ 1.82982061, -0.35363241,  1.68595471, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ...,
       [ 0.70228425,  2.0455738 ,  0.67267578, ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  2.28998549,
         1.91908301,  2.21963528],
       [-1.80840125,  1.22179204, -1.81438851, ..., -1.74506282,
        -0.04813821, -0.75120669]])

In [14]:
from sklearn.ensemble import RandomForestClassifier , AdaBoostClassifier, GradientBoostingClassifier
import xgboost as xgb

In [15]:
X_train,X_test,y_train,y_test=train_test_split(x_scaled,y , test_size=0.2,random_state=1,stratify=y)

In [17]:
rfc=RandomForestClassifier(n_estimators=200,random_state=1)
abc=AdaBoostClassifier(n_estimators=200,random_state=1,learning_rate=0.01)
gbc=GradientBoostingClassifier(n_estimators=200,random_state=1,learning_rate=0.01)
xgb_clf=xgb.XGBClassifier(n_estimators=200,random_state=1,learning_rate=0.1)

In [18]:
rfc.fit(X_train,y_train)
abc.fit(X_train,y_train)
gbc.fit(X_train,y_train)
xgb_clf.fit(X_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=200, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=1,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [20]:
print("Random Forest Score",rfc.score(X_test,y_test))
print("Ada Boost",abc.score(X_test,y_test))
print("Gradiet Boost",gbc.score(X_test,y_test))
print("XGBoost",xgb_clf.score(X_test,y_test))

Random Forest Score 0.9473684210526315
Ada Boost 0.9473684210526315
Gradiet Boost 0.9736842105263158
XGBoost 0.9736842105263158
