### SINGLE LAYER PERCEPTRON (breast cancer dataset)

#### importing libraries

In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

#### loading dataset and setting X and y

In [93]:
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()
X = cancer_data.data
y = cancer_data.target


In [94]:
print(X.shape)
print(X)

(569, 30)
[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]


#### label encoding for y (M or B)

In [95]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
print(y.shape)

(569,)


#### reshaping X and y to form matrices using closed form

In [96]:
rows = X.shape[0]
columns = X.shape[1]
#y has a shape of (569,) i.e 569 rows. to make it explicity a 1d np array, we'll reshape it with cols=1
y = y.reshape(y.shape[0], 1)
print("no. of samples/rows: {}\nno. of input features: {}, namely: {}".format(rows, columns, cancer_data.feature_names))
print("X.shape: {}".format(X.shape))
print("y.shape: {}\n".format(y.shape))

ones = np.ones((rows, 1))

#adding this ones column to X
X = np.concatenate((ones, X), axis=1)
#for updated X, first col of 1s represents x0 i.e 1(coef of intercept), 
#other columns represent other attributes
#no. columns gets updated by 1
columns = X.shape[1]
print('after adding one column, updated shape for X: {}'.format(X.shape[1]))

no. of samples/rows: 569
no. of input features: 30, namely: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
X.shape: (569, 30)
y.shape: (569, 1)

after adding one column, updated shape for X: 31


#### train test split of (80/20)

In [97]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 10)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(455, 31) (455, 1)
(114, 31) (114, 1)


#### feature scaling

In [98]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
#always fit_transform on training set and as testing set is unseen, we'll use transform to normalise
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### defining learning rate and #iterations

In [99]:
l_rate = 0.1
n_epochs = 1000

#### estimating perceptron weights using stochastic gradient

In [100]:
def predict(X, weights):
    activation = X.dot(weights)
    prediction = np.where(activation >= 0.0, 1.0, 0.0)
    return prediction

In [101]:
def train_weights(X, y, l_rate, n_epochs):
    weights = np.zeros((columns, 1))
    for i in range(n_epochs):
        prediction = predict(X, weights)
        error = y - prediction
        weights[0] = weights[0] + l_rate*error[0]
        for i in range(X.shape[0]):
            for j in range(1, columns):
                weights[j] = weights[j] + l_rate*error[j]*X[i][j]
        #weights = weights + l_rate*(((error.T).dot(X)/rows).T)
    return weights

#### training our model and adjusting weights

In [102]:
weights = train_weights(X_train, y_train, l_rate, n_epochs)
print(weights.T)

[[-1.00000000e+02 -2.30698793e-11  2.12274753e-10 -4.26048086e-14
  -4.55191440e-15 -3.46044964e-11  7.09803744e-12 -2.26484873e-11
   0.00000000e+00  4.45421505e-10  2.26259081e-10  0.00000000e+00
  -4.63656891e-14  2.70896916e-11 -2.39328557e-11  4.96880037e-11
   7.09840520e-12  1.24900090e-13  3.68108322e-14 -8.69549294e-11
   2.17385415e-11 -1.38491441e-11  0.00000000e+00 -1.27009625e-10
  -9.75041714e-11 -8.59659566e-14 -9.18344394e-11  2.22807883e-14
  -7.56478213e-14 -9.22781998e-11  0.00000000e+00]]


#### predicting label for test set using the weights obtained during training

In [103]:
pred_label = predict(X_test, weights)

#### getting F1 score, accuracy and ROC curve

In [104]:
from sklearn.metrics import accuracy_score, f1_score
acc = accuracy_score(y_test,pred_label).round(5)
f1 = f1_score(y_test, pred_label, average='weighted').round(5)
print("\nf1 score : {}".format(f1))
print("accuracy score : {}".format(acc))


f1 score : 0.55552
accuracy score : 0.54386
