### SINGLE LAYER PERCEPTRON (breast cancer dataset)

#### importing libraries

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

#### loading dataset and setting X and y

In [8]:
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()
X = cancer_data.data
y = cancer_data.target


In [9]:
print(X.shape)
print(X)

(569, 30)
[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]


#### label encoding for y (M or B)

In [10]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
print(y.shape)

(569,)


#### train test split of (80/20)

In [11]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 10)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(455, 30) (455,)
(114, 30) (114,)


In [23]:
print(X)

[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]


#### feature scaling

In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
#always fit_transform on training set and as testing set is unseen, we'll use transform to normalise
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### defining learning rate and #iterations

In [13]:
l_rate = 0.1
n_epochs = 1000

#### estimating perceptron weights using stochastic gradient

In [14]:
def predict(row, weights):
    activation = weights[0]
    for i in range(row.shape[0]):
        activation += weights[i+1]*row[i]
    return 1.0 if activation >= 0.0 else 0.0

In [17]:
def train_weights(X, y, l_rate, n_epochs):
    rows = X.shape[0]
    columns = X.shape[1]
    weights = np.zeros((columns+1, 1))

    for epoch in range(n_epochs):
        for i in range(rows):
            prediction = predict(X[i], weights)
            error = y[i] - prediction
            weights[0] = weights[0] + l_rate*error
            for j in range(1, columns+1):
                weights[j] = weights[j] + l_rate*error*X[i][j-1]
        
    return weights

#### training our model and adjusting weights

In [18]:
weights = train_weights(X_train, y_train, l_rate, n_epochs)
print(weights.T)

[[-1.17478026e+06 -1.43714825e+04 -9.03183763e+03 -1.46680200e+04
  -1.41876641e+04 -7.02024369e+03 -1.19817827e+04 -1.45987519e+04
  -1.55516292e+04 -7.08186554e+03  2.03781276e+02 -1.15745140e+04
  -6.40434803e+02 -1.17427185e+04 -1.09382261e+04  2.19788179e+02
  -5.83221904e+03 -7.34814259e+03 -8.31574421e+03  9.48984223e+01
  -2.15176149e+03 -1.52363620e+04 -9.26735076e+03 -1.55845250e+04
  -1.46302127e+04 -8.39691867e+03 -1.24257924e+04 -1.45207051e+04
  -1.59182442e+04 -8.48620822e+03 -7.27095077e+03]]


#### predicting label for test set using the weights obtained during training

In [21]:
predicted = []
for i in range(X_test.shape[0]):
    pred_value = predict(X_test[i], weights)
    predicted.append(pred_value)

#### getting F1 score, accuracy and ROC curve

In [22]:
from sklearn.metrics import accuracy_score, f1_score
acc = accuracy_score(y_test,predicted).round(5)
f1 = f1_score(y_test, predicted, average='weighted').round(5)
print("\nf1 score : {}".format(f1))
print("accuracy score : {}".format(acc))


f1 score : 0.60855
accuracy score : 0.60526
