###  Creating a KNN model manually (Without using scikit-learn) 

In [None]:
import numpy as np 
from collections import Counter
import pandas as pd

def euclidean_distance(point1, point2):
    x1, y1 = point1
    x2, y2 = point2
    dist = np.sqrt((x1 - x2)**2 + (y1 - y2)**2)
    return dist

def knn_predict(dataset, labels, test, k):
    distance = []
    for i in range (len(dataset)):
        dist = euclidean_distance(dataset[i], test)
        distance.append((dist, labels[i]))
    distance.sort(key=lambda x: x[0])
    neighbours = distance[:k]
    labels_only = [label for _,label in neighbours]
    return Counter(labels_only).most_common(1)[0][0]

df = pd.read_json(r"D:\PYTON PROGRAMMING\PYTHON FILES\Scikit-Learn\Data\Student Performance.json")
print(df)
print()
prediction = knn_predict(df['training_data'].tolist(), df['training_labels'].tolist(), [3, 5], 1)
print(prediction)


## Using Scikit-learn

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

In [None]:
df = pd.read_csv(r"D:\PYTON PROGRAMMING\PYTHON FILES\Scikit-Learn\Data\diabetes.csv")
# Replace Zeroes
zero_not_accepted = ['Glucose', 'BloodPressure', 'SkinThickness', 'BMI', 'Insulin']
for column in zero_not_accepted:
    df[column] = df[column].replace(0, np.nan)
    mean = int(df[column].mean(skipna=True))
    df[column] = df[column].replace(np.nan, mean)
    
# Split dataset
x = df.iloc[:, 0:8]
y = df.iloc[:, 8]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) 

# Feature Scaling
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

# Define the model
classifier = KNeighborsClassifier(n_neighbors=5, metric='euclidean', p=2)
classifier.fit(x_train, y_train)

# Predict the test set results
y_pred = classifier.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)

print(f1_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

In [16]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

iris = load_iris()
x = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) 
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)

predictions = knn.predict(x_test)
accuracy = knn.score(x_test, y_test)
print("Accuracy: ", accuracy) 

Accuracy:  1.0
