# Breast Cancer Diagnostic with Adaline

In [1]:
import pandas as pd # for loading data csv into dataframe and cleaning data
import os # for building url path
import numpy as np 
import matplotlib.colors # will use ListedColorMap to plot results
import matplotlib.pyplot as pp

## Read cancer data from ML database

In [2]:
cancer_data_url = os.path.join ('https://archive.ics.uci.edu', 'ml', 'machine-learning-databases', 'breast-cancer-wisconsin', 'breast-cancer-wisconsin.data')
cancer_data = pd.read_csv(cancer_data_url, header=None, encoding='utf-8')
cancer_data.columns = ['id', 'thickness', 'size uniformity', # assign columns more useful names
                       'shape uniformity', 'marginal adhesion',
                       'epi cell size', 'bare nuclei', 'bland chromatin',
                       'normal nucleoi', 'mitoses', 'class']
cancer_data.head()

Unnamed: 0,id,thickness,size uniformity,shape uniformity,marginal adhesion,epi cell size,bare nuclei,bland chromatin,normal nucleoi,mitoses,class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


In [None]:
def every_unique_pair(mylist):
    """returns every positionally unique pair in a given list: assumes that no elements in the list appear more than once"""
    ret_list = []
    list_copy = mylist[1:].copy()
    for i in mylist:
        for j in list_copy:
            ret_list.append([i,j])
        list_copy = list_copy[1:]
    return ret_list

# figure,subplotx = pp.subplots(len(every_unique_pair(cancer_data.columns[1:len(cancer_data.columns)-1])))
# figure.suptitle('Visual Comparison of Linear Separability')
# figure.set_size_inches(20, 40)
# for i, pair in enumerate(every_unique_pair(cancer_data.columns[1:len(cancer_data.columns) - 1])):
#     c1,c2 = pair
#     plot_classes_by_columns(subplotx[i], cancer_data, c1, c2)

## Adaline SGD Classifier Class

### As implemented in Ch02

In [4]:
class AdalineSGD(object):
    """ADAptive LInear NEuron classifier."""
    def __init__(self, learning_rate=0.01, epochs=10, shuffle=True, random_seed=None):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights_initialized = False
        self.shuffle = shuffle
        self.random_seed = random_seed
        
    def fit(self, X, y):
        """"""
        self._initialize_weights(X.shape[1])
        self.cost_ = []
        for i in range(self.n_iter):
            if self.shuffle:
                X, y = self._shuffle(X, y)
            cost = []
            for xi, target in zip(X, y):
                cost.append(self._update_weights(xi, target))
            avg_cost = sum(cost) / len(y)
            self.cost_.append(avg_cost)
        return self

    def partial_fit(self, X, y):
        """Fit training data without reinitializing the weights"""
        if not self.weights_initialized:
            self._initialize_weights(X.shape[1])
        if y.ravel().shape[0] > 1:
            for xi, target in zip(X, y):
                self._update_weights(xi, target)
        else:
            self._update_weights(X, y)
        return self

    def _shuffle(self, X, y):
        """Shuffle training data"""
        r = self.rgen.permutation(len(y))
        return X[r], y[r]
    
    def _initialize_weights(self, m):
        """Initialize weights to small random numbers"""
        self.rgen = np.random.RandomState(self.random_seed)
        self.w_ = self.rgen.normal(loc=0.0, scale=0.01, size=1 + m)
        self.weights_initialized = True
        
    def _update_weights(self, xi, target):
        """Apply Adaline learning rule to update the weights"""
        output = self.activation(self.net_input(xi))
        error = (target - output)
        self.weights[1:] += self.learning_rate * xi.dot(error)
        self.weights[0] += self.learning_rate * error
        cost = 0.5 * error**2
        return cost
    
    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.weights[1:]) + self.weights[0]

    def activation(self, X):
        """Compute linear activation"""
        return X

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.activation(self.net_input(X)) >= 0.0, 1, -1)