In [2]:
import numpy as np
from scipy.stats import mode
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [3]:
data = load_breast_cancer()

X = data.data
y = data.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, shuffle=True, random_state=42)

In [5]:
X_train.shape, X_test.shape

((455, 30), (114, 30))

In [33]:
import numpy as np
from scipy.stats import mode

"""
KNN (K-Nearest Neighbors) is a simple machine learning algorithm that can be used for both regression and classification problems.
It's a non-parametric and instance-based method, meaning that the model does not explicitly learn from the training data, 
but instead makes predictions based on the closest training instances to a given test sample.
The prediction for a test sample is based on the majority vote of its k nearest neighbors in the training data.
The value of k determines the number of neighbors that will be considered, and the distance metric used to find the nearest neighbors.
"""

class KNNClassifier(object):
    
    def __init__(self, n_neighbors: int=3):
        self.n_neighbors=n_neighbors
        
    def __repr__(self):
        return f"KNNClassifier(n_neighbors={self.n_neighbors})"
    
    #to calculate distance between two points in a multidimensional space
    def _euclidean_distance(self, x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
        return np.sqrt(np.sum((x1-x2)**2))
    
    #fit the the model
    def fit(self, x: np.ndarray, y: np.ndarray) -> np.ndarray:
        self.X=x
        self.y=y
        return self
    
    def predict(self, x: np.ndarray) -> np.ndarray:
        return np.array([self._predict(points) for points in x]).flatten()
    
    def _predict(self, test: np.ndarray):
        self.distances = np.argsort([self._euclidean_distance(x, test) for x in self.X])
        preds = self.y[self.distances[self.n_neighbors]]
        preds = mode(preds, axis=0)[0]
        return preds
    

In [34]:
model = KNNClassifier(n_neighbors=10)

In [35]:
model.fit(X_train, y_train)

KNNClassifier(n_neighbors=10)

In [36]:
predicted = model.predict(X_test)

In [37]:
np.sum(y_test==predicted)/y_test.shape[0] * 100

93.85964912280701