### Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in
sklearn.datasets.

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load iris dataset
iris = load_iris()

# Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=42)

# Create a KNN classifier object with k = 3
knn = KNeighborsClassifier(n_neighbors=3)

# Train the model using the training data
knn.fit(X_train, y_train)

# Predict the target variable for the test data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")


Accuracy: 1.0


### Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in
sklearn.datasets.

In [4]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load California housing dataset
california = fetch_california_housing()

# Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(california.data, california.target, test_size=0.3, random_state=42)

# Create a KNN regressor object with k = 3
knn = KNeighborsRegressor(n_neighbors=3)

# Train the model using the training data
knn.fit(X_train, y_train)

# Predict the target variable for the test data
y_pred = knn.predict(X_test)

# Calculate the mean squared error of the model
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}")


Mean Squared Error: 1.1982342607232825


### Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using
cross-validation on load_iris dataset in sklearn.datasets.

In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Load iris dataset
iris = load_iris()

# Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=42)

# Set the range of K values to test
k_range = list(range(1, 31))

# Create a parameter grid
param_grid = {'n_neighbors': k_range}

# Create a KNN classifier object
knn = KNeighborsClassifier()

# Use GridSearchCV to find the optimal value of K
grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy')
grid.fit(X_train, y_train)

# Print the best value of K and its corresponding accuracy score
print("Best value of K: ", grid.best_params_['n_neighbors'])
print("Accuracy score: ", grid.best_score_)


Best value of K:  11
Accuracy score:  0.9627272727272727


### Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in
sklearn.datasets.

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the California Housing Prices dataset
data = fetch_california_housing()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Scale the data using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit the KNN regressor model on the scaled training data
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# Predict the housing prices on the scaled test data
y_pred_scaled = knn.predict(X_test_scaled)

# Compute the mean squared error on the scaled test data
mse_scaled = mean_squared_error(y_test, y_pred_scaled)
print("Mean squared error on scaled test data:", mse_scaled)


Mean squared error on scaled test data: 0.4324216146043236


### Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on
load_iris dataset in sklearn.datasets.

In [3]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier

# Load iris dataset
iris = load_iris()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2)

# Instantiate KNN classifier with weighted voting and fit to training data
knn = KNeighborsClassifier(weights='distance')
knn.fit(X_train, y_train)

# Predict classes for testing data
y_pred = knn.predict(X_test)

# Calculate accuracy of classifier
accuracy = knn.score(X_test, y_test)

print("Accuracy:", accuracy)


Accuracy: 0.9666666666666667


### Q6. Implement a function to standardise the features before applying KNN classifier.

In [11]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def euclidean_distance(x1, x2):
    """
    Compute the Euclidean distance between two points.

    Args:
    - x1: numpy array, first point
    - x2: numpy array, second point

    Returns:
    - dist: float, Euclidean distance between x1 and x2
    """

    dist = np.sqrt(np.sum((x1 - x2)**2))

    return dist

def manhattan_distance(x1, x2):
    """
    Compute the Manhattan distance between two points.

    Args:
    - x1: numpy array, first point
    - x2: numpy array, second point

    Returns:
    - dist: float, Manhattan distance between x1 and x2
    """

    dist = np.sum(np.abs(x1 - x2))

    return dist

def knn_classifier(X_train, y_train, X_test, k, distance_func=euclidean_distance):
    """
    Perform KNN classification on test data.

    Args:
    - X_train: numpy array, training data features
    - y_train: numpy array, training data labels
    - X_test: numpy array, test data features
    - k: int, number of nearest neighbors to consider
    - distance_func: function, distance function to use for computing distances

    Returns:
    - y_pred: numpy array, predicted labels for test data
    """

    # Standardize features in training and test data
    scaler = StandardScaler()
    X_train_std = scaler.fit_transform(X_train)
    X_test_std = scaler.transform(X_test)

    # Calculate pairwise distances between test data and training data
    num_test = X_test.shape[0]
    num_train = X_train.shape[0]
    distances = np.zeros((num_test, num_train))
    for i in range(num_test):
        for j in range(num_train):
            distances[i, j] = distance_func(X_test_std[i], X_train_std[j])

    # Sort distances and get indices of k nearest neighbors for each test point
    indices = np.argsort(distances, axis=1)[:, :k]

    # Predict labels for test data using weighted voting
    y_pred = np.zeros(num_test)
    for i in range(num_test):
        k_nearest_labels = y_train[indices[i]]
        weights = 1 / distances[i, indices[i]]
        y_pred[i] = np.bincount(k_nearest_labels, weights=weights).argmax()

    return y_pred

# Load iris dataset and split into train and test data
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

# Perform KNN classification with k=5 and Euclidean distance
y_pred = knn_classifier(X_train, y_train, X_test, k=5, distance_func=euclidean_distance)

# Compute accuracy of KNN classifier
accuracy = np.sum(y_pred == y_test) / len(y_test)
print("Accuracy of KNN classifier with k=5 and Euclidean distance: {:.2f}%".format(accuracy*100))


Accuracy of KNN classifier with k=5 and Euclidean distance: 100.00%


### Q7. Write a Python function to calculate the euclidean distance between two points.

In [5]:
import math

def euclidean_distance(point1, point2):
    """Calculate the Euclidean distance between two points.
    
    Args:
        point1 (list or tuple): The first point, represented as a list or tuple of coordinates.
        point2 (list or tuple): The second point, represented as a list or tuple of coordinates.
    
    Returns:
        float: The Euclidean distance between the two points.
    """
    distance_squared = sum([(point1[i] - point2[i])**2 for i in range(len(point1))])
    return math.sqrt(distance_squared)


In [6]:
point1 = [1, 2, 3]
point2 = [4, 5, 6]

distance = euclidean_distance(point1, point2)

print(distance) # Output: 5.196152422706632


5.196152422706632


### Q8. Write a Python function to calculate the manhattan distance between two points.

In [8]:
def manhattan_distance(x1, y1, x2, y2):
    """
    Calculate the Manhattan distance between two points (x1, y1) and (x2, y2).
    """
    return abs(x1 - x2) + abs(y1 - y2)


manhattan_distance(1,2,3,4)


4