In [None]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_breast_cancer         #load dataset from sklearn library
from sklearn.preprocessing import StandardScaler        #import standerdScaler function from sklearn

from keras.models import Sequential                     #import keras for making neural network
from keras.layers import Dense

In [None]:
data = load_breast_cancer()

In [None]:
data                                                    # all the data loaded in dictionary format

 'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'feature_names': array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
        'mean smoothness', 'mean compactness', 'mean concavity',
        'mean concave points', 'mean symmetry', 'mean fractal dimension',
        'radius error', 'texture error', 'perimeter error', 'area error',
        'smoothness error', 'compactness error', 'concavity error',
        'concave points error', 'symmetry error',
        'fractal di

In [None]:
data.keys()                              #all the keys of dataset    

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [None]:
dscr = data['DESCR']                    #description of dataset
print(dscr)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry 
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 3 is Mean Radius, f

In [None]:
data['data'].shape                        #number of samples and parameters

(569, 30)

In [None]:
data['feature_names']                   #all the parameters

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [None]:
data['data'][0]                    #to  get all the parameters value of any particular data

array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])

In [None]:
j = 0
for i in data['feature_names']:
    print(i,':',data['data'][0][j])
    j += 1

mean radius : 17.99
mean texture : 10.38
mean perimeter : 122.8
mean area : 1001.0
mean smoothness : 0.1184
mean compactness : 0.2776
mean concavity : 0.3001
mean concave points : 0.1471
mean symmetry : 0.2419
mean fractal dimension : 0.07871
radius error : 1.095
texture error : 0.9053
perimeter error : 8.589
area error : 153.4
smoothness error : 0.006399
compactness error : 0.04904
concavity error : 0.05373
concave points error : 0.01587
symmetry error : 0.03003
fractal dimension error : 0.006193
worst radius : 25.38
worst texture : 17.33
worst perimeter : 184.6
worst area : 2019.0
worst smoothness : 0.1622
worst compactness : 0.6656
worst concavity : 0.7119
worst concave points : 0.2654
worst symmetry : 0.4601
worst fractal dimension : 0.1189


In [None]:
data['target']                               #all the labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [None]:
data['target_names']                     #all the label name

array(['malignant', 'benign'], dtype='<U9')

In [None]:
feature = data['data']
label = data['target']

In [None]:
feature.shape

(569, 30)

In [None]:
label.shape

(569,)

In [None]:
scale = StandardScaler()
feature = scale.fit_transform(feature)                   #standarize all the values of freature in data preprocessing

In [None]:
feature[0]

array([ 1.09706398, -2.07333501,  1.26993369,  0.9843749 ,  1.56846633,
        3.28351467,  2.65287398,  2.53247522,  2.21751501,  2.25574689,
        2.48973393, -0.56526506,  2.83303087,  2.48757756, -0.21400165,
        1.31686157,  0.72402616,  0.66081994,  1.14875667,  0.90708308,
        1.88668963, -1.35929347,  2.30360062,  2.00123749,  1.30768627,
        2.61666502,  2.10952635,  2.29607613,  2.75062224,  1.93701461])

In [None]:
label[0]

0

In [None]:
print(feature[200])
print(data['target_names'][label][200])

[-0.53885779  0.06291261 -0.55314457 -0.55144051 -0.03560273 -0.44480987
 -0.5891962  -0.20246073  0.61109991 -0.37816145 -0.18685622  0.19802942
 -0.2762564  -0.28815813  0.15768009 -0.42968654 -0.59297079 -0.06425726
 -0.66742089 -0.1728245  -0.37879325  0.43687355 -0.45010014 -0.4257368
  0.46165357 -0.31848363 -0.64521166 -0.1005903  -0.37654768 -0.12223716]
benign


In [None]:
df_frt = pd.DataFrame(feature,columns=data['feature_names'])             #convert feature into dataframe format
df_lbl = pd.DataFrame(label,columns=['label'])                            #convert label into dataframe
df = pd.concat([df_frt,df_lbl],axis=1)                                   #concatinate both
df = df.sample(frac = 1)                                                 #for shuffle the data 

feature = df.values[ : , : 30]                                             #for feature taking first 30 cloumn
label = df.values[ : , 30 : ]                                              #for label taking last cloumn      

In [None]:
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
305,-0.717787,1.210157,-0.730674,-0.676012,-1.538617,-0.899456,-0.867038,-0.923145,0.450458,-0.569538,-0.555717,1.032628,-0.448032,-0.486834,-0.306339,-0.117823,-0.528315,-0.778304,1.380017,-0.599114,-0.792958,0.967744,-0.770596,-0.710343,-1.618359,-0.751695,-0.958538,-1.011902,0.555294,-0.914129,1
54,0.276263,0.635371,0.217936,0.164705,-0.41278,-0.635462,-0.45536,-0.401848,-0.714193,-0.844553,-0.341691,-0.694809,-0.380708,-0.229414,-0.788692,-0.848299,-0.525994,-0.41755,-1.161421,-0.819256,0.379129,0.979143,0.310928,0.262662,0.28631,-0.308942,-0.004742,0.584607,-0.365223,-0.289039,0
53,1.162387,-0.137215,1.166958,1.075386,1.312271,0.836879,1.109868,1.472343,1.023657,0.042867,1.546647,0.682459,0.995477,1.286504,-1.047369,0.230928,-0.123467,-0.41755,0.750408,0.503489,0.896835,-0.251956,0.829202,0.774214,-0.191501,-0.156268,-0.04744,0.272461,0.194529,-0.225865,0
230,0.830091,-0.048787,0.882746,0.6829,1.262455,1.001757,1.283127,1.549724,1.166043,0.064131,-0.394386,-0.975851,-0.352986,-0.18387,-0.503013,-0.301699,-0.044554,0.331534,-1.062137,-0.551832,0.687682,-0.128194,0.781544,0.54217,1.662757,0.885093,1.101567,2.127061,0.336894,0.369301,0
241,-0.484895,-0.988922,-0.550261,-0.507357,-1.216949,-1.334392,-0.982669,-0.976022,-0.969759,-0.745321,-1.046209,-0.984016,-1.044049,-0.689337,-1.258711,-1.148426,-0.842212,-1.303215,-0.404681,-0.921006,-0.635576,-0.864248,-0.697323,-0.592739,-1.256713,-1.122819,-1.006321,-1.128081,0.000395,-0.893071,1


In [None]:
# deviding the dataset into three parts training testing validation

# 500 samples for training
x_train = feature[:500]
y_train = label[:500]

# 35 samples for validation
x_val = feature[500:535]
y_val = label[500:535]

# 34 samples for testing
x_test = feature[535:]
y_test = label[535:]

In [None]:
model = Sequential()                                     #creating empty stack/neural network

model.add(Dense(32, activation='relu', input_dim=30))       #creating frist hidden layer
model.add(Dense(64, activation = 'relu'))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))                      #rest hidden layer
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train, batch_size=5, epochs=10, validation_data=(x_val,y_val))             #training and validating data

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f609c431850>

In [None]:
model.evaluate(x_test,y_test)                        #testing data



[0.0014445792185142636, 1.0]

In [None]:
model.evaluate(x_val,y_val)    



[0.14387299120426178, 0.9428571462631226]

In [None]:
for i in range(10):
  sample = x_test[i]
  sample = np.reshape(sample,(1,30))                                 #reshape the sample
  #predicted value
  if (model.predict(sample)[0][0] > 0.5):
    print("-Benign")
  else:
    print("-malignant")
  #actual value
  if (y_test[i] == 1):
    print("*Benign")
  else:
    print("*malignant")

  print("********")

-malignant
*malignant
********
-Benign
*Benign
********
-Benign
*Benign
********
-malignant
*malignant
********
-Benign
*Benign
********
-malignant
*malignant
********
-malignant
*malignant
********
-malignant
*malignant
********
-Benign
*Benign
********
-Benign
*Benign
********
