### Importing Libraries

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

import pandas as pd 
import numpy as np

from keras.models import Sequential
from keras.layers import Dense

In [2]:
data = load_breast_cancer()                                                     # loading the dataset

In [3]:
data                                                                            # printing the dataset

 'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'feature_names': array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
        'mean smoothness', 'mean compactness', 'mean concavity',
        'mean concave points', 'mean symmetry', 'mean fractal dimension',
        'radius error', 'texture error', 'perimeter error', 'area error',
        'smoothness error', 'compactness error', 'concavity error',
        'concave points error', 'symmetry error',
        'fractal di

In [4]:
data.keys()                                                                     # printing the keys of dataset

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

### Printing the value of every key of the data dataset

In [5]:
print(data['DESCR'])                                                            # Reading out the DESCR key

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, f

In [6]:
print(data['data'])                                                             # Reading out the data key of the dataset
print(data['data'].shape)                                                       # Printing the shape of the data i.e 569 instances and 30 attributes of each instance

[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
(569, 30)


In [7]:
print(data['feature_names'])                                                    # Reading out the 30 features dataset
print(len(data['feature_names']))

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
30


In [8]:
print(data['target'])                                                           # printing the target of each instance 

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0
 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1
 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1
 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 0 1 0 1 1 0 

In [9]:
print(data['target_names'])                                                     # Reading out the target names 
                                                                                # 0 for malignant and 1 for benign

['malignant' 'benign']


In [10]:
print(data['filename'])                                                         # finding out the path of the library used

/usr/local/lib/python3.7/dist-packages/sklearn/datasets/data/breast_cancer.csv


In [11]:
j=0                                                                             # printing the original values of the first instance with their corresponding feature names 
for i in data['feature_names']:
  print(i , data['data'][0][j])
  j += 1

mean radius 17.99
mean texture 10.38
mean perimeter 122.8
mean area 1001.0
mean smoothness 0.1184
mean compactness 0.2776
mean concavity 0.3001
mean concave points 0.1471
mean symmetry 0.2419
mean fractal dimension 0.07871
radius error 1.095
texture error 0.9053
perimeter error 8.589
area error 153.4
smoothness error 0.006399
compactness error 0.04904
concavity error 0.05373
concave points error 0.01587
symmetry error 0.03003
fractal dimension error 0.006193
worst radius 25.38
worst texture 17.33
worst perimeter 184.6
worst area 2019.0
worst smoothness 0.1622
worst compactness 0.6656
worst concavity 0.7119
worst concave points 0.2654
worst symmetry 0.4601
worst fractal dimension 0.1189


In [12]:
feature = data['data']                                                          # naming the data as the features of the model
label = data['target']                                                          # naming the target as the labels of the model

In [13]:
df1 = pd.DataFrame(feature , columns=data['feature_names'])                     
df2 = pd.DataFrame(label , columns=['target'])
df = pd.concat([df1,df2] , axis = 1)                                            # joining the two dataframes , one of feature and other of label
df = df.sample(frac = 1)                                                        # shuffling the dataframe 
df                                                                              # printing the dataframe 

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
388,11.27,15.50,73.38,392.0,0.08365,0.11140,0.10070,0.02757,0.1810,0.07252,0.3305,1.0670,2.569,22.97,0.010380,0.06669,0.094720,0.020470,0.01219,0.012330,12.04,18.93,79.73,450.0,0.1102,0.2809,0.30210,0.08272,0.2157,0.10430,1
115,11.93,21.53,76.53,438.6,0.09768,0.07849,0.03328,0.02008,0.1688,0.06194,0.3118,0.9227,2.000,24.79,0.007803,0.02507,0.018350,0.007711,0.01278,0.003856,13.67,26.15,87.54,583.0,0.1500,0.2399,0.15030,0.07247,0.2438,0.08541,1
438,13.85,19.60,88.68,592.6,0.08684,0.06330,0.01342,0.02293,0.1555,0.05673,0.3419,1.6780,2.331,29.63,0.005836,0.01095,0.005812,0.007039,0.02014,0.002326,15.63,28.01,100.90,749.1,0.1118,0.1141,0.04753,0.05890,0.2513,0.06911,1
552,12.77,29.43,81.35,507.9,0.08276,0.04234,0.01997,0.01499,0.1539,0.05637,0.2409,1.3670,1.477,18.76,0.008835,0.01233,0.013280,0.009305,0.01897,0.001726,13.87,36.00,88.10,594.7,0.1234,0.1064,0.08653,0.06498,0.2407,0.06484,1
91,15.37,22.76,100.20,728.2,0.09200,0.10360,0.11220,0.07483,0.1717,0.06097,0.3129,0.8413,2.075,29.44,0.009882,0.02444,0.045310,0.017630,0.02471,0.002142,16.43,25.84,107.50,830.9,0.1257,0.1997,0.28460,0.14760,0.2556,0.06828,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498,18.49,17.52,121.30,1068.0,0.10120,0.13170,0.14910,0.09183,0.1832,0.06697,0.7923,1.0450,4.851,95.77,0.007974,0.03214,0.044350,0.015730,0.01617,0.005255,22.75,22.88,146.40,1600.0,0.1412,0.3089,0.35330,0.16630,0.2510,0.09445,0
31,11.84,18.70,77.93,440.6,0.11090,0.15160,0.12180,0.05182,0.2301,0.07799,0.4825,1.0300,3.475,41.00,0.005551,0.03414,0.042050,0.010440,0.02273,0.005667,16.82,28.12,119.40,888.7,0.1637,0.5775,0.69560,0.15460,0.4761,0.14020,0
384,13.28,13.72,85.79,541.8,0.08363,0.08575,0.05077,0.02864,0.1617,0.05594,0.1833,0.5308,1.592,15.26,0.004271,0.02073,0.028280,0.008468,0.01461,0.002613,14.24,17.37,96.59,623.7,0.1166,0.2685,0.28660,0.09173,0.2736,0.07320,1
377,13.46,28.21,85.89,562.1,0.07517,0.04726,0.01271,0.01117,0.1421,0.05763,0.1689,1.1500,1.400,14.91,0.004942,0.01203,0.007508,0.005179,0.01442,0.001684,14.69,35.63,97.11,680.6,0.1108,0.1457,0.07934,0.05781,0.2694,0.07061,1


In [14]:
feature = df.values[:,:30]                                                      # changing the feature to shuffled feature
label = df.values[:,30:]                                                        # changing the label to shuffled label

### Changing the scale of the data features

In [15]:
scale = StandardScaler()                                                        # instantiating the StandardScaler function as scale
feature = scale.fit_transform(feature)                                          # changing the values of the features to a particular range

In [16]:
j=0                                                                             # printing the processed values of the first instance with their corresponding feature names 
for i in data['feature_names']:
  print(i,":" , feature[0][j])
  j += 1

mean radius : -0.8115114166826678
mean texture : -0.8818766459501768
mean perimeter : -0.7656860076570489
mean area : -0.747683586904543
mean smoothness : -0.9045327785782015
mean compactness : 0.13377879292026043
mean concavity : 0.1494127460085422
mean concave points : -0.5506794912926888
mean symmetry : -0.005909523496798196
mean fractal dimension : 1.378249973695418
radius error : -0.269507133596133
texture error : -0.2718856508801749
perimeter error : -0.14705341040655623
area error : -0.382105431342041
smoothness error : 1.1130521577922077
compactness error : 2.3033119393758033
concavity error : 2.0831325283710678
concave points error : 1.4069844030723981
symmetry error : -1.0112838363350205
fractal dimension error : 3.2284115674408445
worst radius : -0.8757913250155224
worst texture : -1.0987434074391706
worst perimeter : -0.8200402516581248
worst area : -0.75692755442657
worst smoothness : -0.9717800789826759
worst compactness : 0.1694355340002532
worst concavity : 0.1435012050

### Dividing the data for training , testing and validation

In [17]:
# 500 training samples
X_train = feature[:500]                                                         
Y_train = label[:500]

# 35 Validation samples
X_val = feature[500:535]
Y_val = label[500:535]

# 34 Testing samples
X_test = feature[535:]
Y_test = label[535:]

### Training the model

In [32]:
model = Sequential()

model.add(Dense(64 , activation = 'relu' , input_dim =30))
model.add(Dense(32 , activation = 'relu'))
model.add(Dense(16 , activation = 'relu'))
model.add(Dense(1 , activation = 'sigmoid'))

model.compile(loss='binary_crossentropy' , optimizer='adam' , metrics=['accuracy'])

In [33]:
model.fit( X_train , Y_train , batch_size = 1 , epochs = 10 , validation_data = (X_val , Y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f18cc2e8a10>

In [34]:
model.evaluate(X_test,Y_test)                                                   # printing out the loss and accuracy of testing data



[0.007466648239642382, 1.0]

In [35]:
model.evaluate(X_val,Y_val)                                                     # printing out the loss and accuracy of validation data



[0.5503902435302734, 0.9142857193946838]

### Testing whether the actual and the predicted values are same or not

In [22]:
for i in range(34):
  sample = X_val[i]
  sample = np.reshape(sample , (1,30))
  if model.predict(sample)[0][0] > 0.5:
    print('Predicted tumor : Benign')
  else:
    print('Predicted tumor : Malignant')

  if Y_val[i] == 0:
    print('Actual tumor : Malignant')
  else:
    print('Actual tumor : Benign')
  print('-----------------------')

Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Malignant
Actual tumor : Malignant
-----------------------
Predicted tumor : Malignant
Actual tumor : Benign
-----------------------
Predicted tumor : Malignant
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Benign
Actual tumor : Benign
-----------------------
Predicted tumor : Malignant
Actual tumor : Malignant
------------

### Checking how many predicted values are same as of the actual values in case of training , validation and testing

In [24]:
# Training dataset checking
j = 0
for i in range(500):
  sample = X_train[i]
  sample = np.reshape(sample , (1,30))
  if ((model.predict(sample)[0][0]) > 0.5 and (Y_train[i] == 1) or (model.predict(sample)[0][0]) <= 0.5 and (Y_train[i] == 0)):
    j += 1
print(j,'/',len(Y_train))

498 / 500


In [25]:
# Validation dataset checking
j = 0
for i in range(35):
  sample = X_val[i]
  sample = np.reshape(sample , (1,30))
  if ((model.predict(sample)[0][0]) > 0.5 and (Y_val[i] == 1) or (model.predict(sample)[0][0]) <= 0.5 and (Y_val[i] == 0)):
    j += 1
print(j,'/',len(Y_val))

31 / 35


In [27]:
# Testing dataset checking
j = 0
for i in range(34):
  sample = X_test[i]
  sample = np.reshape(sample , (1,30))
  if ((model.predict(sample)[0][0]) > 0.5 and (Y_test[i] == 1) or (model.predict(sample)[0][0]) <= 0.5 and (Y_test[i] == 0)):
    j += 1
print(j,'/',len(Y_test))

34 / 34
