Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.

In [1]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data = load_iris()
data.keys()

# lets divide the data set 
x = pd.DataFrame(data.data, columns=data.feature_names) 
y = data.target

# lets splite the data points 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# model traing 
k_clf = KNeighborsClassifier() 
k_clf.fit(x_train, y_train)

# prediction to test value 
y_pred = k_clf.predict(x_test)

# Evaluation 
print('Accuracy Score : ', accuracy_score(y_test, y_pred))

Accuracy Score :  0.9666666666666667


Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using cross-validation on load_iris dataset in sklearn.datasets.

In [2]:
# performing Cross-Validation 

# defining parameters
parameters = {'n_neighbors': [1,2,3,4,5,6,7,8,9,10],
              'weights':['uniform','distance'],
                'p':[1,2]}

from sklearn.model_selection import GridSearchCV

tunned_model = GridSearchCV(k_clf, param_grid=parameters, cv = 5,scoring = 'accuracy')

tunned_model.fit(x_train, y_train)
print( 'Best Parametrs :', tunned_model.best_params_)

# Evaluation 
y_pred = k_clf.predict(x_test)

# Evaluation 
print('Accuracy Score : ', accuracy_score(y_test, y_pred))

Best Parametrs : {'n_neighbors': 8, 'p': 2, 'weights': 'uniform'}
Accuracy Score :  0.9666666666666667


Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in sklearn.datasets.

In [3]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

# Define column names for the DataFrame
column_names = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT"]

# Create a DataFrame with column names
boston_df = pd.DataFrame(data, columns=column_names)

# Add the 'target' column to the DataFrame
boston_df["MEDV"] = target

# lets divide the data set 
x = boston_df.drop(['MEDV'], axis = 1)
y = target

# lets split the data points 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# model traiing 
k_reg = KNeighborsRegressor()
k_reg.fit(x_train, y_train)

# prediction for test data
y_pred = k_reg.predict(x_test)

# Evaluation
print('r2_score :', r2_score(y_test, y_pred))
print('MSE', mean_squared_error(y_test, y_pred))
print('MAE', mean_absolute_error(y_test, y_pred))

r2_score : 0.6473640882039258
MSE 25.860125490196076
MAE 3.6639215686274507


Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in sklearn.datasets.

In [4]:
from sklearn.preprocessing import StandardScaler

# iniciating standersclare 
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# model traiing 
k_reg = KNeighborsRegressor()
k_reg.fit(x_train_scaled, y_train)

# prediction for test data
y_pred = k_reg.predict(x_test_scaled)

# Evaluation
print('r2_score :', r2_score(y_test, y_pred))
print('MSE', mean_squared_error(y_test, y_pred))
print('MAE', mean_absolute_error(y_test, y_pred))

r2_score : 0.7190172315709293
MSE 20.60552941176471
MAE 2.5921568627450977


Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.

In [5]:
# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable (class labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a KNN classifier with weighted voting (using 'distance' as the weights)
k = 3  # You can choose the number of neighbors (k) here
knn_classifier = KNeighborsClassifier(n_neighbors=k, weights='distance')

# Fit the classifier to the training data
knn_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn_classifier.predict(X_test)

# Calculate and print the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 100.00%


Q6. Implement a function to standardise the features before applying KNN classifier.


In [6]:
def standardize(x_train, x_test):
    scaler = StandardScaler()
    x_train_scaled = scaler.fit_transform(x_train)
    x_test_scaled = scaler.transform(x_test)
    
    return x_test_scaled, x_test_scaled
    

In [7]:
x_train_scaled, x_test_scaled = standardize(x_train, x_test)
x_train_scaled, x_test_scaled

(array([[-0.39680933, -0.50032012, -1.00711147, ..., -0.77195078,
          0.42887165, -0.48121032],
        [-0.40079621,  1.2294987 , -0.66439105, ..., -0.32270256,
          0.44418004, -1.25709448],
        [-0.39523443, -0.50032012,  2.43316256, ...,  0.80041799,
          0.36993437,  0.79033849],
        ...,
        [ 1.1779666 , -0.50032012,  1.03323679, ...,  0.84534281,
         -3.5124912 ,  1.23390203],
        [-0.40131065, -0.50032012, -0.94321444, ...,  0.08162084,
          0.43422959,  0.05669848],
        [-0.39643252, -0.50032012,  0.26792466, ...,  0.17147048,
          0.29470458, -0.49529171]]),
 array([[-0.39680933, -0.50032012, -1.00711147, ..., -0.77195078,
          0.42887165, -0.48121032],
        [-0.40079621,  1.2294987 , -0.66439105, ..., -0.32270256,
          0.44418004, -1.25709448],
        [-0.39523443, -0.50032012,  2.43316256, ...,  0.80041799,
          0.36993437,  0.79033849],
        ...,
        [ 1.1779666 , -0.50032012,  1.03323679, ...,  

Q7. Write a Python function to calculate the euclidean distance between two points.

In [11]:
import numpy as np 
def cal_euclidean(x1, y1, x2, y2):
    distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
    return distance

In [12]:
cal_euclidean(5,6,0,1)

7.0710678118654755

Q8. Write a Python function to calculate the manhattan distance between two points.

In [13]:
import numpy as np 
def cal_manhattan(x1, y1, x2, y2):
    distance = abs(x2 - x1) + abs(y2 - y1)
    return distance

In [14]:
cal_manhattan(5,6,0,1)

10