# Breast Cancer Classification
## Comparing self built model with library models
In this Jupyter Notebook a self built Neural Network, used for classification is compared to standard library models.

### Import Data
All imported data was downloaded from https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29.

In [37]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer

data = np.genfromtxt('BreastCancer.csv', delimiter=',')
data[np.isnan(data) == True] = 0
Data = data[:,1:10]/10
Tg = data[:,10]

Target = np.zeros((Tg.shape[0],2))

for i in range(len(Tg)):
    if Tg[i] == 4:
        Target[i,1] = 1
    else: Target[i,0] = 1

Train_Features = Data[0:400,:]
Train_Target = Target[0:400,:]
Test_Features = Data[401:,:]
Test_Target = Target[401:,:]


## Self developed Neural Network
Neural Network with variable architecture. Sigmoid as activation function.

In [38]:
def sigmoid(x):
    return 1/(1+np.e ** -x)
def d_sigmoid(x):
    return x * (1-x)

activation_func = sigmoid
d_activation_func = d_sigmoid

NN_architecture = [
    {"inputs":9, "neurons":5},
    {"inputs":5, "neurons":2}
]

def forward_single_layer(out_prev, w_curr, b_curr):
        return activation_func(np.dot(w_curr, out_prev)+b_curr)

class NeuralNetwork:
    def __init__(self, architecture):
        self.architecture = architecture
        self.parameters = {}
        for idx, layer in enumerate(architecture):
            layer_idx = idx+1
            layer_input = layer["inputs"]
            layer_output = layer["neurons"]
            self.parameters['W'+str(layer_idx)] = np.random.randn(layer_output,layer_input)
            self.parameters['B'+str(layer_idx)] = np.random.randn(layer_output,1)

    def forward_propagation(self, inputs):
        inputs = np.array(inputs, ndmin=2).T
        out_curr = inputs
        memory = {}
        for idx, layer in enumerate(self.architecture):
            layer_idx = idx+1
            w_curr = self.parameters["W"+str(layer_idx)]
            b_curr = self.parameters["B"+str(layer_idx)]
            out_curr = forward_single_layer(out_prev=out_curr,w_curr=w_curr,b_curr=b_curr)
            memory[idx] = out_curr
        return out_curr, memory


    def train_iteration(self, inputs, target, learn_rate):
        target = np.array(target, ndmin=2).T
        output, memory = self.forward_propagation(inputs)
        inputs = np.array(inputs, ndmin=2).T
        loss = 0
        for idx, layer in reversed(list(enumerate(self.architecture))):
            layer_idx = idx+1
            if layer_idx == len(self.architecture):
                loss += target - output
                adjust_w = learn_rate*1/layer["neurons"]*(loss*d_activation_func(memory[idx])).dot(memory[idx-1].T)
                adjust_b = learn_rate*1/layer["neurons"]*loss*d_activation_func(memory[idx])
                self.parameters["W"+str(layer_idx)] += (adjust_w/learn_rate - adjust_b)*learn_rate
                self.parameters["B"+str(layer_idx)] += adjust_b
            elif layer_idx == 1:
                loss = self.parameters["W"+str(layer_idx+1)].T.dot(loss)
                adjust_w = learn_rate*1/layer["neurons"]*(loss*d_activation_func(memory[idx])).dot(inputs.T)
                adjust_b = learn_rate*1/layer["neurons"]*loss*d_activation_func(memory[idx])
                self.parameters["W"+str(layer_idx)] += (adjust_w/learn_rate - adjust_b)*learn_rate
                self.parameters["B"+str(layer_idx)] += adjust_b
            else:
                loss = self.parameters["W"+str(layer_idx+1)].T.dot(loss)
                adjust_w = learn_rate*1/layer["neurons"]*(loss*d_activation_func(memory[idx])).dot(memory[idx-1].T)
                adjust_b = learn_rate*1/layer["neurons"]*loss*d_activation_func(memory[idx])
                self.parameters["W"+str(layer_idx)] += (adjust_w/learn_rate - adjust_b)*learn_rate
                self.parameters["B"+str(layer_idx)] += adjust_b

    def train(self, inputs, targets, epoches):
        for epoch in range(epoches):
            shuffler = np.random.permutation(len(targets))
            inputs = inputs[shuffler]
            targets = targets[shuffler]
            for i in range(0,400):
                inputdata = inputs[i,:]
                target = targets[i,:]
                self.train_iteration(inputdata, target, 0.1)
            if epoch/epoches * 100 % 10 == 0:
                print('Training Processed: '+str(epoch/epoches*100)+'%')
NN = NeuralNetwork(NN_architecture)

### Train Neural Network with Training Data

In [39]:
NN.train(Train_Features,Train_Target,10)

Training Processed: 0.0%
Training Processed: 10.0%
Training Processed: 20.0%
Training Processed: 30.0%
Training Processed: 40.0%
Training Processed: 50.0%
Training Processed: 60.0%
Training Processed: 70.0%
Training Processed: 80.0%
Training Processed: 90.0%


### Test Neural Network on Testing Data

In [40]:
count = 0
predict = np.empty((298,2))
for i in range(298):
    out, mem = NN.forward_propagation(Test_Features[i,:])
    predict[[i],:] = out.T
    if np.argmax(out) == np.argmax(Test_Target[i,:]):
        count += 1
performance = (count/298)*100
print('Performance of prediction: ', performance, '%')

Performance of prediction:  98.99328859060402 %


## SKLEARN Toolbox Neural Network

In [41]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='sgd',hidden_layer_sizes=(5,), random_state=1, learning_rate_init=0.1, )
clf.fit(Train_Features, Train_Target)

count = 0
predict = np.empty((298,2))
for i in range(298):
    out = clf.predict(Test_Features[i,:].reshape(1,-1))
    predict[[i],:] = out
    if np.argmax(out) == np.argmax(Test_Target[i,:]):
        count += 1
performance = (count/298)*100
print('Performance of prediction: ', performance, '%')

Performance of prediction:  98.65771812080537 %


## NEUROLAB Toolbox Neural Network

In [42]:
import neurolab as nl
net = nl.net.newff([[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1]],[5, 2])
error = net.train(Train_Features,Train_Target,epochs=20)

count = 0
predict = np.empty((298,2))
for i in range(298):
    out = net.sim(Test_Features[i,:].reshape(1,-1))
    predict[[i],:] = out
    if np.argmax(out) == np.argmax(Test_Target[i,:]):
        count += 1
performance = (count/298)*100
print('Performance of prediction: ', performance, '%')





The maximum number of train epochs is reached
Performance of prediction:  98.65771812080537 %
