In [None]:
'''
    K Nearest Neighbours -->
    
    K-Nearest Neighbors (KNN) is a supervised machine learning algorithm that is used
    for both classification and regression tasks. It classifies or predicts a data point's
    output based on the output of its closest neighbors in the training dataset.
    The "K" in KNN refers to the number of nearest neighbors considered to make a decision.
'''

<img src='Source/Graph.png' alt='Graph' style="width:700px; height:auto; margin-left:40px;">

In [None]:
'''
    K (Number of Neighbors) -->

    The value of K determines how many neighbors influence the decision.
    K=1 : The prediction is based solely on the nearest neighbor (can lead to overfitting).
    K>1 : The prediction considers a broader context (less sensitive to noise).
    
    Distance Metrics: KNN relies on measuring the distance between data points to identify neighbors
    
    Weighting Neighbors:

    Assign more weight to closer neighbors, as they are likely to have more influence.
    Example: Use a weighting scheme like 1/distance.
    
    Decision Rule:

    Classification : Predict the majority class label among the K nearest neighbors.
    Regression : Predict the average (or weighted average) value of the K nearest neighbors.
'''

<img src='Source/Distance.png' alt='Distance' style="width:500px; height:auto; margin-left:40px;">

In [None]:
'''
    How KNN Works -->
    
    Choose a Value for K :
    K is the number of neighbors to consider (e.g., K=3 means the 3 nearest points will be considered).
    
    Measure Distance :
    Calculate the distance between the test point and all training points using a distance metric.
    
    Find the K Nearest Neighbors :
    Identify the K training samples closest to the test sample.
    
    Make Predictions :

    For Classification :
    Assign the class that is most frequent among the K neighbors (majority vote).

    For Regression :
    Predict the average (or weighted average) of the values of the K neighbors.
'''

In [None]:
'''
    Advantages of KNN -->
    
    Simple to Implement: Easy to understand and directly applicable.
    No Training Phase: No model is trained, making it fast for small datasets.
    Adaptable: Works for both classification and regression tasks.
    Non-Parametric: No assumptions about the data distribution.
    
    Disadvantages of KNN -->
    
    Computationally Expensive : High memory and time consumption as it calculates the distance for all points
    Curse of Dimensionality : Performance degrades with high-dimensional data because distances become less meaningful.
    Sensitive to Noise : Outliers can significantly impact results.
'''

In [None]:
'''
    Tips for KNN -->
    
    Choosing K : Use cross-validation to select the best K.
    Smaller K : Sensitive to noise.
    Larger K : Can smooth out noise but may overlook finer patterns.

    Scaling Features :
    Normalize or standardize features since KNN relies on distances.
    
    Use Weighted KNN :
    Assign weights to neighbors based on their distances (closer neighbors have more influence).

    Dimensionality Reduction :
    Apply PCA or feature selection to reduce dimensionality for better performance.
'''

In [15]:
#   Importing Libraries -->

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
#   Importing Dataset -->

data = pd.read_csv('Data/Social_Network_Ads.csv')
data.head(10)

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0
5,27,58000,0
6,27,84000,0
7,32,150000,1
8,25,33000,0
9,35,65000,0


In [3]:
x_data = data.iloc[:, :-1].values
y_data = data.iloc[:, -1].values

In [None]:
#   Splitting Data -->

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=42)

In [5]:
x_train

array([[    57, 122000],
       [    39,  71000],
       [    47,  25000],
       [    24,  19000],
       [    36,  50000],
       [    32, 150000],
       [    48,  29000],
       [    30, 107000],
       [    60,  34000],
       [    38,  61000],
       [    33,  31000],
       [    39,  71000],
       [    55,  39000],
       [    49,  39000],
       [    43, 112000],
       [    27,  20000],
       [    26,  17000],
       [    37,  93000],
       [    42,  54000],
       [    35,  61000],
       [    29,  75000],
       [    38,  80000],
       [    45,  26000],
       [    54, 108000],
       [    46,  23000],
       [    23,  28000],
       [    37,  75000],
       [    42,  65000],
       [    35,  71000],
       [    51, 146000],
       [    39,  96000],
       [    24,  89000],
       [    58,  95000],
       [    25,  22000],
       [    41,  59000],
       [    28,  89000],
       [    42,  80000],
       [    42, 108000],
       [    46,  96000],
       [    47, 113000],


In [11]:
y_train

array([1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0], d

In [8]:
#   Scaling Features -->

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [9]:
x_train

array([[ 1.8925893 ,  1.52189404],
       [ 0.1250379 ,  0.03213212],
       [ 0.9106163 , -1.31157471],
       [-1.34792161, -1.48684082],
       [-0.169554  , -0.58129926],
       [-0.56234321,  2.33980255],
       [ 1.0088136 , -1.19473064],
       [-0.75873781,  1.08372877],
       [ 2.1871812 , -1.04867555],
       [ 0.0268406 , -0.25997806],
       [-0.46414591, -1.1363086 ],
       [ 0.1250379 ,  0.03213212],
       [ 1.6961947 , -0.90262046],
       [ 1.1070109 , -0.90262046],
       [ 0.5178271 ,  1.22978386],
       [-1.05332971, -1.4576298 ],
       [-1.15152701, -1.54526286],
       [-0.0713567 ,  0.67477452],
       [ 0.4196298 , -0.46445519],
       [-0.2677513 , -0.25997806],
       [-0.85693511,  0.14897619],
       [ 0.0268406 ,  0.29503128],
       [ 0.7142217 , -1.28236369],
       [ 1.5979974 ,  1.11293979],
       [ 0.812419  , -1.36999675],
       [-1.44611891, -1.22394166],
       [-0.0713567 ,  0.14897619],
       [ 0.4196298 , -0.14313399],
       [-0.2677513 ,

In [10]:
x_test

array([[ 0.812419  , -1.39920777],
       [ 2.0889839 ,  0.52871943],
       [-0.95513241, -0.75656537],
       [ 1.0088136 ,  0.76240757],
       [-0.85693511, -1.22394166],
       [-0.75873781, -0.23076704],
       [ 0.9106163 ,  1.08372877],
       [-0.85693511,  0.38266434],
       [ 0.2232352 ,  0.14897619],
       [ 0.4196298 , -0.14313399],
       [-0.2677513 , -0.14313399],
       [ 1.4998001 , -1.04867555],
       [-1.44611891, -0.6397213 ],
       [-1.74071081, -1.36999675],
       [-0.75873781,  0.49950841],
       [-0.2677513 ,  1.11293979],
       [ 1.4016028 , -0.93183148],
       [ 0.812419  ,  0.11976517],
       [ 0.1250379 , -0.8149874 ],
       [ 1.794392  , -0.28918908],
       [-1.54431621, -1.25315268],
       [-0.85693511,  0.29503128],
       [ 0.9106163 , -1.36999675],
       [ 2.0889839 ,  0.17818721],
       [-1.83890811, -1.48684082],
       [ 1.3034055 , -1.36999675],
       [ 0.4196298 ,  0.29503128],
       [-0.0713567 , -0.49366621],
       [ 1.6961947 ,

In [None]:
#   Building Model -->

model = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=2)
model.fit(x_train, y_train)

In [None]:
#   Predicting Results -->

y_pred = model.predict(x_test)
y_pred

array([1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1], dtype=int64)

In [16]:
#   Accuracy Score, Confusion Matrix and Classification Report

acc_score = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [17]:
print(acc_score)

0.93


In [18]:
print(conf_matrix)

[[59  4]
 [ 3 34]]


In [19]:
print(class_report)

              precision    recall  f1-score   support

           0       0.95      0.94      0.94        63
           1       0.89      0.92      0.91        37

    accuracy                           0.93       100
   macro avg       0.92      0.93      0.93       100
weighted avg       0.93      0.93      0.93       100

