In [None]:
# 1. Now, we're going to implement the KNN classification algorithm from scratch using python OOPS concepts and using breast cancer dataset

In [None]:
# Step 1: Importing libraries

import math
from collections import Counter
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [None]:
# Step 2: Loading and splitting the dataset

data = load_breast_cancer()
X, y = data.data, data.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Step 3: Defining class and defining it's methods

class KNNClassifier:
  def __init__(self, k=5):
    self.k = k

  def fit(self, X,y):
    self.X_train = X
    self.y_train = y

  def euclidean(self, a, b):
      return math.sqrt(sum((x - y) ** 2 for x, y in zip(a, b)))

  def predict(self, X):
    preds = []

    for x in X:
      distances = [(self.euclidean(x, self.X_train[i]), self.y_train[i]) for i in range(len(self.X_train))]
      distances.sort(key=lambda x:x[0])
      labels = [label for _ ,label in distances[:self.k]]
      preds.append(Counter(labels).most_common(1)[0][0])

    return preds

In [None]:
# Step 4: Model initialization and training and prediction as well

knn = KNNClassifier(k=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [None]:
# Step 5: Model evaluation

accuracy = sum(y_pred[i] == y_test[i] for i in range(len(y_test))) / len(y_test)
print("From-scratch Classification Accuracy:", accuracy)


From-scratch Classification Accuracy: 0.9590643274853801


In [None]:
# 2. Now, we're gonna implement the KNN regression algorithm from scratch using diabetes dataset

In [None]:
# step 1: Importing the libraries

import math
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [None]:
# Step 2: Loading and splitting the dataset

data = load_diabetes()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Step 3: Implementing the methods and attributes

class KNNRegressor:
  def __init__(self, k=5):
    self.k = k

  def fit(self, X,y):
    self.X_train = X
    self.y_train = y

  def euclidean(self, a, b):
    return math.sqrt(sum((x-y)**2 for x,y in zip(a,b)))

  def predict(self, X):
    preds = []
    for x in X:
      distances = [(self.euclidean(x, self.X_train[i]), self.y_train[i]) for i in range(len(self.X_train))]
      distances.sort(key=lambda x:x[0])
      values = [val for _, val in distances[:self.k]]

      preds.append(sum(values)/self.k)

    return preds

In [None]:
# Step 4: Model training and prediction

knn = KNNRegressor(k=5)
knn.fit(X_train, y_train)
knn.predict(X_test)

[np.float64(125.6),
 np.float64(160.2),
 np.float64(153.0),
 np.float64(238.0),
 np.float64(153.4),
 np.float64(150.4),
 np.float64(246.2),
 np.float64(170.0),
 np.float64(106.6),
 np.float64(104.6),
 np.float64(93.2),
 np.float64(151.6),
 np.float64(98.0),
 np.float64(166.6),
 np.float64(61.4),
 np.float64(103.2),
 np.float64(251.0),
 np.float64(252.0),
 np.float64(173.6),
 np.float64(242.0),
 np.float64(172.8),
 np.float64(86.6),
 np.float64(106.6),
 np.float64(207.4),
 np.float64(158.2),
 np.float64(150.0),
 np.float64(225.2),
 np.float64(139.2),
 np.float64(70.8),
 np.float64(117.2),
 np.float64(135.6),
 np.float64(166.6),
 np.float64(89.0),
 np.float64(158.4),
 np.float64(166.4),
 np.float64(237.6),
 np.float64(73.8),
 np.float64(136.8),
 np.float64(148.6),
 np.float64(109.8),
 np.float64(83.2),
 np.float64(99.4),
 np.float64(140.2),
 np.float64(143.4),
 np.float64(193.4),
 np.float64(81.6),
 np.float64(83.6),
 np.float64(108.6),
 np.float64(84.4),
 np.float64(142.6),
 np.float64(

In [None]:
# Step 5: Model evaluation

mse = sum((y_pred[i]-y_test[i])**2 for i in range(len(y_test)))/len(y)
print("From-scratch Regression MSE:", mse)

From-scratch Regression MSE: 8159.309954751131


In [None]:
# 3. KNN classification implementation using scikit-learn

In [None]:
# Step1: Importing libraries

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

In [None]:
# Step 2: Load and split the dataset

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state = 42)

In [None]:
# Step 3: Model definition

model = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=5))
])

In [None]:
# Step 4: Model training and prediction

model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [None]:
# Step 5: Model evaluation

print("sklearn Classification Accuracy:", accuracy_score(y_test, y_pred))

sklearn Classification Accuracy: 0.9590643274853801


In [None]:
# 4. Sklearn KNN - regression

In [None]:
# Step 1: Importing libraries

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [None]:
# Step 2: Loading dataset and splitting it

data = load_diabetes()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Step 3: Defining the model

model = Pipeline([
    {'scaler', StandardScaler()},
    ('knn', KNeighborsRegressor(n_neighbors=5))
])

In [None]:
# Step 4: Model training and prediction

model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:
# Step 5: Model evaluation

print("sklearn Regression MSE:", mean_squared_error(y_test, y_pred))

sklearn Regression MSE: 3277.7368421052633
