In [16]:
import numpy as np

# Libraies For Unsupervises KNN
from sklearn.neighbors import NearestNeighbors

# Load and Extracting Scikit-learn Dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Library For Supervised KNN
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import RadiusNeighborsRegressor

## Unsupervised KNN
___

In [17]:
Input_data = np.array([[-1, 1], [-2, 2], [-3, 3], [1, 2], [2, 3], [3, 4],[4, 5]])
nrst_neigh = NearestNeighbors(n_neighbors = 3, algorithm = 'ball_tree')
nrst_neigh.fit(Input_data)

In [24]:
distances, indices = nrst_neigh.kneighbors(Input_data)
print(f'Distances:\n {distances} \nIndices:\n {indices}')

Distances:
 [[0.         1.41421356 2.23606798]
 [0.         1.41421356 1.41421356]
 [0.         1.41421356 2.82842712]
 [0.         1.41421356 2.23606798]
 [0.         1.41421356 1.41421356]
 [0.         1.41421356 1.41421356]
 [0.         1.41421356 2.82842712]] 
Indices:
 [[0 1 3]
 [1 2 0]
 [2 1 0]
 [3 4 0]
 [4 5 3]
 [5 6 4]
 [6 5 4]]


In [19]:
nrst_neigh.kneighbors_graph(Input_data).toarray()

array([[1., 1., 0., 1., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0., 0.],
       [1., 0., 0., 1., 1., 0., 0.],
       [0., 0., 0., 1., 1., 1., 0.],
       [0., 0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 0., 1., 1., 1.]])

## Supervised KNN Learning
**KNeighborsRegressor**:

Uses a specified number of nearest neighbor points (k) to make predictions on new data points.

Makes predictions by taking the average of the target values of the k nearest neighbors.

Requires choosing a good value for k - lower values lead to overfitting, higher values underfit.

Computes distances using the Minkowski metric to find nearest neighbors. Common choices are Euclidean (l2) and Manhattan (l1) distances.

Can weigh nearer neighbors more heavily than farther ones using kernel weighting.

Simple and intuitive approach, but doesn't generalize as well as more complex regressors.

**RadiusNeighborsClassifier**:

Finds all neighbors within a fixed radius r around a point, rather than finding k nearest neighbors.

Radio can be tuned as a hyperparameter. Lower radius risks overfitting, higher risks underfitting.

Weights nearby points higher than distant points within the radius.

Scaling features is important so distances are measured uniformly.

Has advantages over KNN when data is not uniformly dense.

Can be more effective with proper radius tuning, but harder to pick a good parameter.
___

In [28]:
"""
This codeblock demonstrates a scikit-learning workflow for regression using the KNeighborsRegressor on the Iris dataset.
"""

iris = load_iris()

"""
Load the Iris flower dataset and extract the features (X) and targets (y).
"""

X = iris.data[:, :4]  
y = iris.target

"""
Split the data into 80% train and 20% test sets.
"""

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

"""
Scale the feature data using StandardScaler.
"""

scaler = StandardScaler() 
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

"""
Train a KNeighborsRegressor model on the training data.
"""

knnr = KNeighborsRegressor(n_neighbors=8)
knnr.fit(X_train, y_train)

"""
Evaluate model performance using mean squared error on test set.
"""

print ("The MSE is:",format(np.power(y-knnr.predict(X),4).mean()))

"""
Create a simple dummy regression dataset.
"""

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]

"""
Train another KNeighborsRegressor on the dummy data.
"""

knnr = KNeighborsRegressor(n_neighbors=3) 
knnr.fit(X, y)

"""
Make a prediction on a new data point using the fitted model.
"""

print(knnr.predict([[2.5]]))

The MSE is: 4.4333349609375
[0.66666667]


### RadiusNeighborsRegressor
____

In [29]:
"""
This codeblock demonstrates a scikit-learning workflow for regression using RadiusNeighborsRegressor on the Iris dataset.
"""

"""
Load the Iris dataset.
"""

iris = load_iris()

X = iris.data[:, :4]
y = iris.target

"""
Split data into 80% train and 20% test sets. 
"""

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

"""
Scale the features using StandardScaler.
"""

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

"""
Train a RadiusNeighborsRegressor model.
"""

knnr_r = RadiusNeighborsRegressor(radius = 1)
knnr_r.fit(X_train, y_train)

"""
Evaluate model performance.
"""

print ("The MSE is:",format(np.power(y-knnr_r.predict(X),4).mean()))

"""
Create a simple dummy regression dataset.  
"""

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]

"""
Train a RadiusNeighborsRegressor on the dummy data.
"""

knnr_r = RadiusNeighborsRegressor(radius = 1)
knnr_r.fit(X, y)

"""
Make a prediction on new data point.
"""

print(knnr_r.predict([[2.5]]))

The MSE is: 5.666666666666667
[1.]


