Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in
sklearn.datasets.

In [9]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)

# Train the model
knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

accuracy, report


(1.0,
 '              precision    recall  f1-score   support\n\n           0       1.00      1.00      1.00        19\n           1       1.00      1.00      1.00        13\n           2       1.00      1.00      1.00        13\n\n    accuracy                           1.00        45\n   macro avg       1.00      1.00      1.00        45\nweighted avg       1.00      1.00      1.00        45\n')

Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in
sklearn.datasets.

In [10]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=5)

# Train the model
knn_regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


Mean Squared Error: 3222.117894736842
R-squared: 0.4031244536507893


Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using
cross-validation on load_iris dataset in sklearn.datasets.

In [8]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN classifier
knn = KNeighborsClassifier()

# Define the parameter grid: trying different values for K
param_grid = {'n_neighbors': list(range(1, 31))}

# Use GridSearchCV to search for the optimal K
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best K value
optimal_k = grid_search.best_params_['n_neighbors']
print(f"The optimal number of neighbors (K): {optimal_k}")

# Train the model with the optimal K
knn_optimized = KNeighborsClassifier(n_neighbors=optimal_k)
knn_optimized.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_optimized.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with optimal K: {accuracy}")


The optimal number of neighbors (K): 1
Accuracy with optimal K: 1.0


Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in
sklearn.datasets.

To implement the K-Nearest Neighbors (KNN) regressor algorithm with feature scaling on the Boston housing dataset, you can follow these steps:

Load the dataset.

Split the dataset into features and target variable.

Standardize the features using StandardScaler.

Split the data into training and testing sets.

Create a KNN regressor instance and fit it to the training data.

Make predictions on the test data and evaluate the model.

In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the California housing dataset
california_housing = fetch_california_housing()
X = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
y = pd.Series(california_housing.target)

def knn_regressor_on_california(n_neighbors=5, test_size=0.2):
    """
    Implement KNN regressor on the California housing dataset with feature scaling.
    
    Parameters:
    - n_neighbors: Number of neighbors for KNN
    - test_size: Proportion of the dataset to include in the test split
    
    Returns:
    - mse: Mean Squared Error of the KNN regressor on the test set
    - r2: R^2 score of the KNN regressor on the test set
    """
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
    # Create a StandardScaler instance
    scaler = StandardScaler()
    
    # Fit the scaler on the training data and transform both training and testing data
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Create a KNN regressor instance
    knn_regressor = KNeighborsRegressor(n_neighbors=n_neighbors)
    
    # Fit the regressor on the scaled training data
    knn_regressor.fit(X_train_scaled, y_train)
    
    # Make predictions on the scaled test data
    y_pred = knn_regressor.predict(X_test_scaled)
    
    # Calculate Mean Squared Error and R^2 score
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    return mse, r2

# Example usage
mse, r2 = knn_regressor_on_california(n_neighbors=5)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")


Mean Squared Error: 0.43
R^2 Score: 0.67


Q6. Implement a function to standardise the features before applying KNN classifier.

In [4]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def standardize_and_knn(X, y, test_size=0.2, n_neighbors=5):
    """
    Standardizes the features and applies KNN classifier.
    
    Parameters:
    - X: Input features (numpy array or pandas DataFrame)
    - y: Target labels (numpy array or pandas Series)
    - test_size: Proportion of the dataset to include in the test split
    - n_neighbors: Number of neighbors to use for KNN
    
    Returns:
    - accuracy: Accuracy of the KNN classifier on the test set
    """
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
    # Create a StandardScaler instance
    scaler = StandardScaler()
    
    # Fit the scaler on the training data and transform both training and testing data
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Create a KNN classifier instance
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    
    # Fit the classifier on the standardized training data
    knn.fit(X_train_scaled, y_train)
    
    # Make predictions on the standardized test data
    y_pred = knn.predict(X_test_scaled)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy



Explanation:

StandardScaler: This class standardizes features by removing the mean and scaling to unit variance.

train_test_split: This function splits the dataset into training and testing sets.

KNeighborsClassifier: This is the KNN classifier implementation.

accuracy_score: This function calculates the accuracy of the model.

You can replace the sample X and y in the example usage with your actual dataset to test the function.

Q7. Write a Python function to calculate the euclidean distance between two points.

In [7]:
import numpy as np

def euclidean_distance(point1, point2):
    """
    Calculate the Euclidean distance between two points.

    Parameters:
    - point1: First point (list, tuple, or numpy array)
    - point2: Second point (list, tuple, or numpy array)

    Returns:
    - distance: Euclidean distance between the two points
    """
    # Convert points to numpy arrays for easier calculations
    point1 = np.array(point1)
    point2 = np.array(point2)

    # Calculate the Euclidean distance
    distance = np.sqrt(np.sum((point1 - point2) ** 2))
    
    return distance

# Example usage
p1 = (3, 4)
p2 = (0, 0)
distance = euclidean_distance(p1, p2)
print(f"The Euclidean distance between {p1} and {p2} is: {distance:.2f}")


The Euclidean distance between (3, 4) and (0, 0) is: 5.00
