## Data Collection


In [2]:
from sklearn.datasets import load_breast_cancer

In [3]:
dataset=load_breast_cancer()

In [4]:
dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

## Description of Dataset

In [5]:
print(dataset['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [6]:
dataset['data'].shape

(569, 30)

## Attributes or properties of a cancerous cell

In [7]:
dataset['feature_names']

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

## Data of one breast cancer cell

In [8]:
j=0
for i in dataset['feature_names']:
    print(i,"=",dataset['data'][1][j])
    j+=1

mean radius = 20.57
mean texture = 17.77
mean perimeter = 132.9
mean area = 1326.0
mean smoothness = 0.08474
mean compactness = 0.07864
mean concavity = 0.0869
mean concave points = 0.07017
mean symmetry = 0.1812
mean fractal dimension = 0.05667
radius error = 0.5435
texture error = 0.7339
perimeter error = 3.398
area error = 74.08
smoothness error = 0.005225
compactness error = 0.01308
concavity error = 0.0186
concave points error = 0.0134
symmetry error = 0.01389
fractal dimension error = 0.003532
worst radius = 24.99
worst texture = 23.41
worst perimeter = 158.8
worst area = 1956.0
worst smoothness = 0.1238
worst compactness = 0.1866
worst concavity = 0.2416
worst concave points = 0.186
worst symmetry = 0.275
worst fractal dimension = 0.08902


In [9]:
features=dataset['data']

In [10]:
features

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

## Type of label of 30 cell data
### 0 represents malignant and 1 represents benign cell type 

In [12]:
labels=dataset['target']
labels 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [13]:
dataset['target_names']

array(['malignant', 'benign'], dtype='<U9')

In [14]:
labels.shape

(569,)

In [34]:
dataset['target_names']

array(['malignant', 'benign'], dtype='<U9')

In [15]:
print(features[250])
print(dataset['target_names'][labels[250]])

[2.094e+01 2.356e+01 1.389e+02 1.364e+03 1.007e-01 1.606e-01 2.712e-01
 1.310e-01 2.205e-01 5.898e-02 1.004e+00 8.208e-01 6.372e+00 1.379e+02
 5.283e-03 3.908e-02 9.518e-02 1.864e-02 2.401e-02 5.002e-03 2.558e+01
 2.700e+01 1.653e+02 2.010e+03 1.211e-01 3.172e-01 6.991e-01 2.105e-01
 3.126e-01 7.849e-02]
malignant


## Representation of dataset in form of DATAFRAME and shuffle data

In [16]:
import numpy as np
import pandas as pd

In [19]:
df_feat=pd.DataFrame(features, columns=dataset['feature_names'])
df_lab=pd.DataFrame(labels,columns=['labels'])
df = pd.concat([df_feat, df_lab], axis=1)  ## concat features and label in a single data frame
df = df.sample(frac = 1)  ## shuffling of data

features = df.values[ : , : 30]
labels = df.values[ : ,30: ]

In [20]:
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,labels
178,13.010,22.22,82.01,526.4,0.06251,0.01938,0.001595,0.001852,0.1395,0.05234,...,29.02,88.18,608.8,0.08125,0.03432,0.007977,0.009259,0.2295,0.05843,1
233,20.510,27.81,134.40,1319.0,0.09159,0.10740,0.155400,0.083400,0.1448,0.05592,...,37.38,162.70,1872.0,0.12230,0.27610,0.414600,0.156300,0.2437,0.08328,0
187,11.710,17.19,74.68,420.3,0.09774,0.06141,0.038090,0.032390,0.1516,0.06095,...,21.39,84.42,521.5,0.13230,0.10400,0.152100,0.109900,0.2572,0.07097,1
38,14.990,25.20,95.54,698.8,0.09387,0.05131,0.023980,0.028990,0.1565,0.05504,...,25.20,95.54,698.8,0.09387,0.05131,0.023980,0.028990,0.1565,0.05504,0
97,9.787,19.94,62.11,294.5,0.10240,0.05301,0.006829,0.007937,0.1350,0.06890,...,26.29,68.81,366.1,0.13160,0.09473,0.020490,0.023810,0.1934,0.08988,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,15.850,23.95,103.70,782.7,0.08401,0.10020,0.099380,0.053640,0.1847,0.05338,...,27.66,112.00,876.5,0.11310,0.19240,0.232200,0.111900,0.2809,0.06287,0
125,13.850,17.21,88.44,588.7,0.08785,0.06136,0.014200,0.011410,0.1614,0.05890,...,23.58,100.30,725.9,0.11570,0.13500,0.081150,0.051040,0.2364,0.07182,1
338,10.050,17.53,64.41,310.8,0.10070,0.07326,0.025110,0.017750,0.1890,0.06331,...,26.84,71.98,384.0,0.14020,0.14020,0.105500,0.064990,0.2894,0.07664,1
99,14.420,19.77,94.48,642.5,0.09752,0.11410,0.093880,0.058390,0.1879,0.06390,...,30.86,109.50,826.4,0.14310,0.30260,0.319400,0.156500,0.2718,0.09353,0


In [21]:
df_lab

Unnamed: 0,labels
0,0
1,0
2,0
3,0
4,0
...,...
564,0
565,0
566,0
567,0


## Standardization of dataset

In [22]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
features = scale.fit_transform(features)

In [23]:
features

array([[-3.17326715e-01,  6.81912463e-01, -4.10214580e-01, ...,
        -1.60407975e+00, -9.79979846e-01, -1.41397987e+00],
       [ 1.81277976e+00,  1.98274299e+00,  1.74774001e+00, ...,
         6.34854353e-01, -7.50254999e-01, -3.68968057e-02],
       [-6.86545170e-01, -4.88602302e-01, -7.12138747e-01, ...,
        -7.16597659e-02, -5.31854616e-01, -7.19065519e-01],
       ...,
       [-1.15800874e+00, -4.09482019e-01, -1.13516210e+00, ...,
        -7.55486254e-01, -1.09292595e-02, -4.04857834e-01],
       [ 8.31333012e-02,  1.11781017e-01,  1.03427217e-01, ...,
         6.37899672e-01, -2.95658647e-01,  5.31115323e-01],
       [ 2.25140399e-01, -1.01451947e+00,  1.84571912e-01, ...,
         1.42790160e-03, -3.44192065e-01, -6.73755053e-02]])

## Data Division

In [24]:
#450 Training
X_train = features[:450]
Y_train = labels[:450]

#60 Validation
X_val = features[450:510]
Y_val = labels[450:510]

#59 Testing
X_test = features[510:]
Y_test = labels[510:]

In [26]:
from keras.models import Sequential
from keras.layers import Dense

## Creating Neural Network (Deep Learning Model)

In [27]:
model = Sequential()

model.add(Dense(45, activation = 'relu', input_dim = 30))
model.add(Dense(30, activation = 'relu'))
model.add(Dense(15, activation = 'relu'))
model.add(Dense(8, activation = 'sigmoid'))
model.add(Dense(1, activation = 'sigmoid'))

model.compile( loss = 'binary_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'])

In [29]:
model.fit( X_train , Y_train, epochs = 5, batch_size = 2, validation_data = (X_val, Y_val))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x20e72376790>

## Testing Accuracy and loss of testing data

In [30]:
model.evaluate(X_test , Y_test)



[0.08987893909215927, 0.9830508232116699]

## Testing Accuracy and loss of validation data

In [31]:
model.evaluate(X_val , Y_val)



[0.13454142212867737, 0.9666666388511658]

## Testing model using 10 input test data

In [33]:
for i in range(10):
  sample = X_test[i]
  sample = np.reshape(sample, (1,30))

  if (model.predict(sample)[0][0] > 0.5):
    print("Prediction : Banign")
  else:
    print("Prediction : Malignant")

  if (Y_test[i] == 1):
    print("Actual : Banign")
  else:
    print("Actual : Melignant")
  print("***********************")

Prediction : Malignant
Actual : Melignant
***********************
Prediction : Banign
Actual : Banign
***********************
Prediction : Banign
Actual : Banign
***********************
Prediction : Banign
Actual : Banign
***********************
Prediction : Banign
Actual : Banign
***********************
Prediction : Banign
Actual : Banign
***********************
Prediction : Banign
Actual : Banign
***********************
Prediction : Malignant
Actual : Melignant
***********************
Prediction : Malignant
Actual : Melignant
***********************
Prediction : Malignant
Actual : Melignant
***********************
