In [92]:
#The following implementation demonstrates a simple Perceptron model used for classifying binary search results on 2 types of datasets:
#1. The breast_cancer dataset (well-known-toy dataset). It was not known to be linearly separable earlier, and was a total naive run as such. However it produced good results.
#2. A sample dataset generated by chatGPT for Perceptron model. This gave over 99% accuracy as it was already linearly separable and such.

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_classification
#Imported all datasets and required libraries.

In [93]:
#1. Implementation on the breast_cancer dataset.

data1 = load_breast_cancer()
X_raw_1 = data1.data
Y_1 = data1.target.reshape(-1, 1)
#Fetching the data.
Y_1 = (2 * Y_1) - 1
#Since perceptron model works with {-1, 1} binary range instead of {0, 1} (which is originally given), we transform the target space accordingly.
print(X_raw_1.shape)
print(Y_1.shape)
#Verifying the shapes for the 2 matrices.

(569, 30)
(569, 1)


In [94]:
scaler = StandardScaler()
X_scaled_1 = scaler.fit_transform(X_raw_1)
#Scaling the X matrix for prevention of overflows, underflows and such.
one = np.ones((X_scaled_1.shape[0], 1))
X_1 = np.hstack([one, X_scaled_1])
#Adding bias term.
print(X_1.shape)
print(Y_1.shape)
#Verifying the shapes.

(569, 31)
(569, 1)


In [100]:
Q_1 = np.zeros((X_1.shape[1], 1))
#Initialising the theta vector.
epochs = 10
#Arbitrarily assigned 10 epochs to run the model for.
i = 0
#Initialised epoch iterator at 0.
while(i < epochs):
    j = 0
    #Initialised the dataset iterator at 0.
    while(j < X_1.shape[0]):
        Zi = Q_1.T @ X_1[[j], :].T
        #Generating a linear function in terms of X and theta, for further use in perceptron.
        Yi = Y_1[j, 0]
        #The jth row of Y matrix is to be used.
        Hi = 1 if Zi >= 0 else -1
        #Defining the Hi matrix as a binary expression (in range {-1, 1})
        Q_1 += (Yi - Hi) * X_1[[j], :].T
        #Basic updation rule for translating/rotating the hyperplane.
        j += 1
    i += 1
#Looping through the dataset for our algorithm.

In [101]:
i = 0
#Using this iterator now for checking accuracy.
H = X_1 @ Q_1
#Fetching the linearly predicted values.
H = np.where(H >= 0, 1, -1)
#Converting them into the correct binary predicted values.
acc = (H == Y_1)
#Making an accuracy vector for checking accuracy of algorithm. 1 indicates correct prediction, otherwise wrong.
k = 0
# k+=1 implies a correct prediction.
while(i < acc.shape[0]):
    if acc[i, 0] == 1:
        k += 1
    i += 1
print((k / Y_1.shape[0])*100)
#Printing the percentage of accuracy.

98.24253075571178


In [102]:
#2. Implementation on our own linearly separable dataset.

X_raw_2, Y_2 = make_classification(n_samples = 500, n_features = 2, n_informative = 2, n_redundant = 0, n_clusters_per_class = 1, class_sep = 2.0, flip_y = 0, random_state = 42)
Y_2 = Y_2.reshape(-1, 1)
#Defining and reforming the dataset.
Y_2 = (2 * Y_2) - 1
#Transforming dataset space range from {0, 1} to {-1, 1}.
print(X_raw_2.shape)
print(Y_2.shape)
#Verify the shapes.

(500, 2)
(500, 1)


In [103]:
scaler = StandardScaler()
X_scaled_2 = scaler.fit_transform(X_raw_2)
#Scaling the X matrix.
one = np.ones((X_scaled_2.shape[0], 1))
X_2 = np.hstack([one, X_scaled_2])
#Adding the bias term.
print(X_2.shape)
print(Y_2.shape)
#Verifying respective shapes.

(500, 3)
(500, 1)


In [105]:
Q_2 = np.zeros((X_2.shape[1], 1))
#Initialising the theta vector.
epochs = 10
#Arbitrary no. of epochs.
i = 0
#Initialising epoch iterator at 0.
while(i < epochs):
    j = 0
    #Initialising dataset iterator at 0.
    while(j < X_2.shape[0]):
        Zi = Q_2.T @ X_2[[j], :].T
        #Generating linear function in X and theta for further use in implementation.
        Yi = Y_2[j, 0]
        #The jth row of Y matrix to be used.
        Hi = 1 if Zi >= 0 else -1
        #Getting our prediction value in {-1, 1} space.
        Q_2 += (Yi - Hi) * X_2[[j], :].T
        #Updating theta accordingly.
        j += 1
    i += 1
#Completing our while loop.

In [106]:
i = 0
#Using the iterator for accuracy checking.
H = X_2 @ Q_2
#Making our predicted value/hypothesis matrix.
H = np.where(H >= 0, 1, -1)
#Converting the hypothesis matrix into correct binary values.
acc = (H == Y_2)
#Generating a binary vector that fetches total no. of correct predictions.
k = 0
#Counter for total no. of correct predictions.
while(i < acc.shape[0]):
    if acc[i, 0] == 1:
        #1 indicates correct prediction.
        k += 1
        #Counter incrementation.
    i += 1
print((k / Y_2.shape[0]) * 100)
#Printing accuracy

99.0


# Overview
I would like to share my own implementation for the perceptron model. I made a few mistakes along the way, which actually stemmed into a few ideas I would love to implement. The demonstration displays how the perceptron classifies binary results for two datasets: the breast_cancer toy dataset available from the scikit learn libraries; and a custom binary dataset to show how Perceptron model works on these sets.
# Mistakes, doubts, ideas:
1. Why do we need the biasing term for calculation of $Z^{i} = X^{i} \theta$? What will be the effect on the decision boundary if I don't take it? (Found that the decision boundary will always pass through the origin no matter what, which flaws our accuracy on the algorithm).
2. Learned a new vectorized operation to convert the $X_{i} \theta$ into our main Hypothesis matrix.
3. Had flipped the sign of $Y^{i}$ and $H_{\theta}(X^{i})$, which earlier gave trash accuracies.
4. Had assumed {0, 1} range space to be applied on the perceptron model. Changed that to {-1, 1} as the generic convention.
5. In logistic regression, we use the log-likelihood function to maximise accuracy based on a probabilistic interpretation. Can we use our basic accuracy matrix to determine a rule for the updation? Will try this in the future.

Thank you for sticking around :)