# **Importing necessary libraries**

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings as w
w.filterwarnings('ignore')
import matplotlib.pyplot as plt

># Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [3]:
iris.feature_names,iris.target_names

(['sepal length (cm)',
  'sepal width (cm)',
  'petal length (cm)',
  'petal width (cm)'],
 array(['setosa', 'versicolor', 'virginica'], dtype='<U10'))

In [4]:
iris.data[0:5],iris.target[0:5]

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2]]),
 array([0, 0, 0, 0, 0]))

In [21]:
X = iris.data
y = iris.target

In [174]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)

In [175]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()

In [176]:
model.fit(X_train,y_train)

In [177]:
y_pred = model.predict(X_test)

In [178]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.9777777777777777
[[16  0  0]
 [ 0 17  1]
 [ 0  0 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



# 3x3 confusion matrix basic overview
<img width = '400' src ='https://www.researchgate.net/publication/365099641/figure/fig3/AS:11431281096367166@1668121079561/Confusion-Matrix-3x3.png'>

# Q2. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using cross-validation on load_iris dataset in sklearn.datasets.

In [179]:
from sklearn.model_selection import GridSearchCV
parameters = {'n_neighbors':[5,6,7,8,9,10],
             'weights':['uniform','distance'],
             'algorithm':['ball_tree','kd_tree','brute']}

In [180]:
grid_model = GridSearchCV(model,param_grid=parameters,scoring='accuracy',cv=5)
grid_model.fit(X_train,y_train)

In [181]:
grid_model.best_params_

{'algorithm': 'brute', 'n_neighbors': 5, 'weights': 'uniform'}

In [182]:
grid_model.score(X_test,y_test)

0.9777777777777777

# Q3. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.

In [183]:
X = iris.data
y = iris.target

In [203]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=5)

In [204]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(weights='distance')
model.fit(X_train,y_train)

In [205]:
y_pred = model.predict(X_test)

In [206]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.9555555555555556
[[15  0  0]
 [ 0 14  2]
 [ 0  0 14]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.88      0.93        16
           2       0.88      1.00      0.93        14

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45



# Q4. Implement a function to standardise the features before applying KNN classifier.

>## Train-Test Split

In [207]:
X = iris.data
y = iris.target

In [222]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=7)

>## Feature Scaling

In [223]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [224]:
def scaling(train,test):
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train,test

In [225]:
X_train,X_test = scaling(X_train,X_test)

>## Model Training

In [226]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X_train,y_train)

In [227]:
y_pred = model.predict(X_test)

In [228]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.9111111111111111
[[12  0  0]
 [ 0 16  0]
 [ 0  4 13]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.80      1.00      0.89        16
           2       1.00      0.76      0.87        17

    accuracy                           0.91        45
   macro avg       0.93      0.92      0.92        45
weighted avg       0.93      0.91      0.91        45



># Q5. Write a Python code to implement the KNN regressor algorithm on boston dataset in sklearn.datasets.

In [12]:
from sklearn.datasets import fetch_openml
boston = fetch_openml(name="boston")

In [48]:
print(boston.feature_names,boston.target_names)

['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] ['MEDV']


In [49]:
print(boston.data[0:5],boston.target[0:5])

      CRIM    ZN  INDUS CHAS    NOX     RM   AGE     DIS RAD    TAX  PTRATIO   
0  0.00632  18.0   2.31    0  0.538  6.575  65.2  4.0900   1  296.0     15.3  \
1  0.02731   0.0   7.07    0  0.469  6.421  78.9  4.9671   2  242.0     17.8   
2  0.02729   0.0   7.07    0  0.469  7.185  61.1  4.9671   2  242.0     17.8   
3  0.03237   0.0   2.18    0  0.458  6.998  45.8  6.0622   3  222.0     18.7   
4  0.06905   0.0   2.18    0  0.458  7.147  54.2  6.0622   3  222.0     18.7   

        B  LSTAT  
0  396.90   4.98  
1  396.90   9.14  
2  392.83   4.03  
3  394.63   2.94  
4  396.90   5.33   0    24.0
1    21.6
2    34.7
3    33.4
4    36.2
Name: MEDV, dtype: float64


In [50]:
X = boston.data
y = boston.target

In [51]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=5646)

In [52]:
from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor()

In [53]:
model.fit(X_train,y_train)

In [54]:
model.score(X_test,y_test)

0.7560718080579734

# Q6. Implement the KNN regressor algorithm with feature scaling on boston dataset in sklearn.datasets.

In [40]:
from sklearn.datasets import fetch_openml
df = fetch_openml(name='boston')

In [41]:
X = df.data
y = df.target

In [43]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=5646)

>## Feature Scaling

In [44]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [45]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [46]:
from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor()
model.fit(X_train,y_train)

In [47]:
model.score(X_test,y_test)

0.7777543073271829

# Q7. Write a Python function to calculate the euclidean distance between two points.
## Since euclidean distance between two points is given by:
# $$(x_1 , y_1)$$ and $$(x_2 , y_2)$$ is given by $$d = \sqrt{(x_2 – x_1)^2 + (y_2 – y_1)^2}$$

In [56]:
from math import sqrt

In [57]:
def euclidean(a,b):
    distance = sqrt((b[0]-a[0])**2 + (b[1]-a[1])**2)
    return distance

In [87]:
point_A,point_B = (0,0),(5,12)

In [88]:
print(f'Euclidean distance: {euclidean(point_A,point_B)}')

Euclidean distance: 13.0


<img width ='700' src ='https://www.researchgate.net/profile/Jose-Martinez-245/publication/343237167/figure/fig24/AS:917936684404746@1595864568479/Euclidean-and-Manhattan-distance-comparison-3235-Optimizations-The-first-optimization.png'>

# Q8. Write a Python function to calculate the manhattan distance between two points.
## Since manhattan distance between two points is given by:
# $$(x_1 , y_1)$$ and $$(x_2 , y_2)$$ is given by $$d = |x_1 – x_2| + |y_1 – y_2|$$

In [77]:
def manhattan(a,b):
    distance = abs(a[0]-b[0]) + abs(a[1]-b[1])
    return distance

In [89]:
point_A,point_B = (0,0),(5,12)

In [86]:
print(f'Manhattan distance: {manhattan(point_A,point_B)}')

Manhattan distance: 17
