In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
bc = load_breast_cancer()
x = bc.data
y = bc.target

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)

* **stratify** parameter in train_test_split is used to ensure that the proportion of classes in the split datasets (training and testing sets) is the same as the original dataset. This is particularly useful in cases where the target variable y is imbalanced, meaning that certain classes occur more frequently than others. The stratify parameter helps maintain the same distribution of classes in both the training and testing sets, reducing the risk of skewed datasets and improving the reliability of model evaluation.

In [4]:
class CustomPerceptron(object):
    def __init__(self,n_iterations=100,random_state=1,learning_rate=0.01):
        self.n_iterations = n_iterations
        self.random_state = random_state
        self.learning_rate = learning_rate

    def fit(self,x,y):
        regn = np.random.RandomState(self.random_state)
        self.coef_ = regn.normal(loc=0.0, scale=0.01, size=1+x.shape[1])
        for _ in range(self.n_iterations):
            for xi, expected_value in zip(x,y):
                predicted_value = self.predict(xi)
                self.coef_[1:] += self.learning_rate * (expected_value - predicted_value) * xi
                self.coef_[0] += self.learning_rate * (expected_value - predicted_value) * 1

    def activation(self,x):
            return np.dot(x,self.coef_[1:] + self.coef_[0])

    def predict(self,x):
        output = self.activation(x)
        return np.where(output >= 0.0,1,0)

    def score(self,x,y):
        misclassified_data_count = 0
        for xi, target in zip(x,y):
            output = self.predict(xi)
            if (target != output):
                misclassified_data_count += 1
        total_data_count = len(x)
        self.score_ = (total_data_count - misclassified_data_count) / total_data_count
        return self.score_
            

In [5]:
n_iterations=100
learning_rate=0.01

In [6]:
per = CustomPerceptron()
per.fit(x_train,y_train)

In [8]:
print(per.score(x_test,y_test))
print(per.score(x_train,y_train))

0.8947368421052632
0.914572864321608
