## Artifical Neural Network

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Breast Cancer Dataset 

<img src="Images/Large14.jpg" width="300" height="60" />


Attribute Information:

1. Sample code number: id number
2. Clump Thickness: 1 - 10
3. Uniformity of Cell Size: 1 - 10
4. Uniformity of Cell Shape: 1 - 10
5. Marginal Adhesion: 1 - 10
6. Single Epithelial Cell Size: 1 - 10
7. Bare Nuclei: 1 - 10
8. Bland Chromatin: 1 - 10
9. Normal Nucleoli: 1 - 10
10. Mitoses: 1 - 10
11. Class: (2 for benign, 4 for malignant)

#### Data to csv

In [2]:
with open("breast-cancer-wisconsin.data") as f: #reading the data
    text = f.readlines()

In [3]:
## Data cleaning
breast_cancer = []
for l in text:
    breast_cancer.append(l.strip())

In [4]:
with open("breast_cancer.csv", "w") as f:
    f.write("ID_Num,Clump_Thickness,Cell_Size,Cell_Shape,"\
               "Marginal_Adhesion,Epithelial_Cell_Size,Bare_Nuclei,"\
               "Bland_Chromatin,Normal_Nucleoli,Mitoses,Class\n")
    for line in breast_cancer:
        f.write(line + "\n")
        

#### Data Preprocessing

In [5]:
dataset = pd.read_csv("breast_cancer.csv", na_values = '?')
#dataset = dataset.replace('?', np.nan)
#dataset = dataset.fillna(dataset.mean())
dataset = dataset.dropna()
X = dataset.iloc[:, 1:10].values #select all Except ID numbers and Class
Y = dataset.iloc[:, 10].values
Y = np.where(Y==2, 0, 1) #Class 2 for Benign = 0 else 1 (malignant)


In [6]:
dataset

Unnamed: 0,ID_Num,Clump_Thickness,Cell_Size,Cell_Shape,Marginal_Adhesion,Epithelial_Cell_Size,Bare_Nuclei,Bland_Chromatin,Normal_Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1.0,3,1,1,2
1,1002945,5,4,4,5,7,10.0,3,2,1,2
2,1015425,3,1,1,1,2,2.0,3,1,1,2
3,1016277,6,8,8,1,3,4.0,3,7,1,2
4,1017023,4,1,1,3,2,1.0,3,1,1,2
...,...,...,...,...,...,...,...,...,...,...,...
694,776715,3,1,1,1,3,2.0,1,1,1,2
695,841769,2,1,1,1,2,1.0,1,1,1,2
696,888820,5,10,10,3,7,3.0,8,10,2,4
697,897471,4,8,6,4,3,4.0,10,6,1,4


In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = .15, random_state=0)

In [9]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
X_train, 

(array([[ 1.9967197 ,  2.20519765,  0.93821373, ...,  0.22275834,
          0.01546113,  0.23858226],
        [-0.14064867, -0.69725614, -0.73613693, ..., -0.18353046,
         -0.6250714 , -0.33799154],
        [-1.20933285, -0.69725614, -0.73613693, ..., -0.99610807,
         -0.6250714 , -0.33799154],
        ...,
        [-1.20933285, -0.69725614, -0.73613693, ..., -0.99610807,
         -0.6250714 , -0.33799154],
        [-0.49687673, -0.69725614, -0.73613693, ..., -0.58981926,
         -0.6250714 , -0.33799154],
        [ 1.9967197 ,  1.88270278,  1.27308387, ...,  1.44162475,
          1.2965262 , -0.33799154]]),)

In [11]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


#### Artifical neural network with two hidden layers

<img src="Images/ANNDiagram.png" width="300" height="60" />


In [12]:
model = Sequential()

In [13]:
#First input layer and hidden layer
## Add picture of rectifier function


In [14]:
##Output layer is sigmoid function
#Stocastic gradient descent 

In [15]:
model.add(Dense(units = 5, kernel_initializer = 'uniform', activation = 'relu', input_dim = 9))

In [16]:
model.add(Dense(units = 5,  kernel_initializer = 'uniform', activation = 'relu'))

In [17]:
model.add(Dense(units = 1,  kernel_initializer = 'uniform', activation = 'sigmoid'))

In [18]:
#Stocastic gradient descent 
#Logarithmic Loss 235 of Udemy
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])



In [22]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 5)                 50        
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 6         
Total params: 86
Trainable params: 86
Non-trainable params: 0
_________________________________________________________________


In [19]:
history = model.fit(X_train, y_train, batch_size = 5, epochs = 100, validation_data=(X_test, y_test))

Train on 580 samples, validate on 103 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


From our results above, we were able to achieve 97% of validation score. 
I am fairly confident our model can accurately classify between a benign 
or malignant cases given a new data. 

In [20]:
X_train

array([[ 1.9967197 ,  2.20519765,  0.93821373, ...,  0.22275834,
         0.01546113,  0.23858226],
       [-0.14064867, -0.69725614, -0.73613693, ..., -0.18353046,
        -0.6250714 , -0.33799154],
       [-1.20933285, -0.69725614, -0.73613693, ..., -0.99610807,
        -0.6250714 , -0.33799154],
       ...,
       [-1.20933285, -0.69725614, -0.73613693, ..., -0.99610807,
        -0.6250714 , -0.33799154],
       [-0.49687673, -0.69725614, -0.73613693, ..., -0.58981926,
        -0.6250714 , -0.33799154],
       [ 1.9967197 ,  1.88270278,  1.27308387, ...,  1.44162475,
         1.2965262 , -0.33799154]])

In [21]:
y_train

array([1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,