In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
cancer = load_breast_cancer()
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,random_state=1)


In [2]:
cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
print(cancer.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, f

In [7]:
print(cancer.target_names)

['malignant' 'benign']


In [9]:
print(cancer.data[:2])

[[1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
  1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
  6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01
  1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01
  4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 1.326e+03 8.474e-02 7.864e-02 8.690e-02
  7.017e-02 1.812e-01 5.667e-02 5.435e-01 7.339e-01 3.398e+00 7.408e+01
  5.225e-03 1.308e-02 1.860e-02 1.340e-02 1.389e-02 3.532e-03 2.499e+01
  2.341e+01 1.588e+02 1.956e+03 1.238e-01 1.866e-01 2.416e-01 1.860e-01
  2.750e-01 8.902e-02]]


In [10]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,100))
scaler.fit(X_train)


MinMaxScaler(copy=True, feature_range=(0, 100))

In [11]:
X_train_scaled = scaler.transform(X_train)

In [12]:
X_train_scaled

array([[38.99379999, 70.71356104, 41.19273029, ..., 80.96219931,
        59.95722433, 91.75500589],
       [37.76326376, 31.75515725, 36.79082303, ..., 51.16838488,
        33.17490494, 31.59867223],
       [36.4380709 , 35.23841731, 35.20834773, ..., 55.29209622,
        34.76711027, 28.85747939],
       ...,
       [48.36480666, 50.08454515, 48.65593255, ..., 65.25773196,
        41.5161597 , 84.2595567 ],
       [33.36173032, 39.02603991, 31.78771336, ..., 27.36426117,
        15.68441065, 22.44351644],
       [28.62889867, 29.45552925, 26.82606592, ..., 17.22680412,
        10.00475285,  6.93864439]])

In [13]:
print("transformedshape: %s" % (X_train_scaled.shape,))
print("per-feature minimum before scaling:\n %s" % X_train.min(axis = 0))
print("per-feature maximum before scaling:\n %s" % X_train.max(axis = 0))
print("per-feature minimum after scaling:\n %s" % X_train_scaled.min(axis = 0))
print("per-feature maximum after scaling:\n %s" % X_train_scaled.max(axis = 0))


transformedshape: (426, 30)
per-feature minimum before scaling:
 [6.981e+00 9.710e+00 4.379e+01 1.435e+02 5.263e-02 1.938e-02 0.000e+00
 0.000e+00 1.060e-01 5.024e-02 1.153e-01 3.602e-01 7.570e-01 6.802e+00
 1.713e-03 2.252e-03 0.000e+00 0.000e+00 9.539e-03 8.948e-04 7.930e+00
 1.202e+01 5.041e+01 1.852e+02 7.117e-02 2.729e-02 0.000e+00 0.000e+00
 1.566e-01 5.521e-02]
per-feature maximum before scaling:
 [2.811e+01 3.928e+01 1.885e+02 2.501e+03 1.634e-01 2.867e-01 4.268e-01
 2.012e-01 3.040e-01 9.575e-02 2.873e+00 4.885e+00 2.198e+01 5.422e+02
 3.113e-02 1.354e-01 3.960e-01 5.279e-02 6.146e-02 2.984e-02 3.604e+01
 4.954e+01 2.512e+02 4.254e+03 2.226e-01 9.379e-01 1.170e+00 2.910e-01
 5.774e-01 1.486e-01]
per-feature minimum after scaling:
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
per-feature maximum after scaling:
 [100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.

In [14]:
X_test_scaled = scaler.transform(X_test)
print("per-feature min after scaling: %s" % X_test_scaled.min(axis=0))
print("per-feature mac after scaling: %s" % X_test_scaled.max(axis=0))


per-feature min after scaling: [ 3.36031047  2.26580994  3.14421947  1.14103924 14.12837411  4.40670358
  0.          0.         15.4040404  -0.6152494  -0.13779599  0.59450141
  0.43066484  0.07956698  3.91950233  1.12205966  0.          0.
 -3.19138691  0.66401338  2.66097474  5.81023454  2.0319737   0.9437672
 10.9423496   2.63779225  0.          0.         -0.02376426 -0.18203234]
per-feature mac after scaling: [ 95.78777983  81.50152181  95.57736162  89.35312831  81.13207547
 121.95870118  87.95688847  93.33996024  93.23232323 103.71346957
  42.66961598  49.7657355   44.11723131  28.3710436   48.70313084
  73.8636705   76.71717172  62.92858496 133.68579188  39.05725302
  89.61223764  79.31769723  84.85980378  74.48879276  91.54724955
 113.18896125 107.00854701  92.37113402 120.53231939 163.06885105]
