<a href="https://colab.research.google.com/github/PalmerTurley34/K-Nearest-Neighbors/blob/main/Nearest_Neighbors.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
class KNearestNeighbors:
    import numpy as np
    def __init__(self, n_neighbors=5):
        self.k = n_neighbors

    def fit(self, x, y):
        self.x = np.array(x)
        self.y = np.array(y)
        self.n_classes = len(np.unique(self.y))
        # encode labels
        labels = np.unique(self.y)
        self.i_to_label = {i: label for i, label in enumerate(labels)}
        self.label_to_i = {label: i for i, label in enumerate(labels)}
        self.y_enc = np.array([self.label_to_i[label] for label in self.y])

    def single_predict(self, pred):
        # search through the training data and find the distance from each point to the target
        distances = []
        for observation in self.x:
            distances.append(np.linalg.norm(observation-pred))
              
        # look up the target values for the k nearest neighbors
        targets = list(zip(distances, self.y_enc))
        targets.sort(key=lambda x: x[0])
        # y_pred = target class with most neighbors
        k_nearest = targets[:self.k]
        votes = [0] * self.n_classes
        for vote in k_nearest:
            votes[vote[1]]+=1
        top_vote = votes.index(max(votes))
        return self.i_to_label[top_vote]
    
    def predict(self, pred):
        pred = np.array(pred)
        if pred.ndim == 1:
            return self.single_predict(pred)
        elif pred.ndim == 2:
            prediction = np.array([self.single_predict(x) for x in pred])
            return prediction

In [None]:
import pandas as pd
df = pd.read_csv('/content/iris.data', names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])


In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, stratify=df['class'])

In [None]:
x = train.drop('class', axis=1)
y = train['class']
test = test.drop('class', axis=1)

In [None]:
knn = KNearestNeighbors()

In [None]:
knn.fit(x,y)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
kn = KNeighborsClassifier()
kn.fit(x,y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [None]:
y_pred = kn.predict(test)

In [None]:
y_test = knn.predict(test)

In [None]:
# first ten results of my algorithm
y_test[:10]

array(['Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica'],
      dtype='<U15')

In [None]:
# first ten for sklearn algorithm
# results are exactly the same for the entire test data
y_pred[:10]

array(['Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica'],
      dtype=object)

In [None]:
import plotly.graph_objects as go
import plotly.express as px
scat = px.scatter(df, x='sepal_length', y='sepal_width', color='class', size='petal_length')
scat.show()

In [None]:
scat.add_trace(go.Scatter(x=[5.8], y=[3], marker={'size':20, 'color':'black'}, showlegend=False))
scat.show()

In [None]:
scat.add_trace(go.Scatter(
                x=[5.7,5.8],
                y=[2.9,3],
                mode='lines',
                line=go.scatter.Line(color='dodgerblue', width=4),
                showlegend=False
))

scat.add_trace(go.Scatter(
                x=[5.7,5.8],
                y=[3,3],
                mode='lines',
                line=go.scatter.Line(color='dodgerblue', width=4),
                showlegend=False
))

scat.add_trace(go.Scatter(
                x=[5.8,5.8],
                y=[2.8,3],
                mode='lines',
                line=go.scatter.Line(color='dodgerblue', width=4),
                showlegend=False
))

scat.add_trace(go.Scatter(
                x=[5.9,5.8],
                y=[3,3],
                mode='lines',
                line=go.scatter.Line(color='dodgerblue', width=4),
                showlegend=False
))

scat.add_trace(go.Scatter(
                x=[5.9,5.8],
                y=[3.2,3],
                mode='lines',
                line=go.scatter.Line(color='dodgerblue', width=4),
                showlegend=False
))
scat.show()