### Question1

In [None]:
# Here's a Python code example that implements the K-Nearest Neighbors (KNN) classifier algorithm using the load_iris dataset from scikit-learn:

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN classifier with a chosen value of K
k = 3  # You can change this value
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Fit the model to the training data
knn_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(f'Accuracy of KNN Classifier with K={k}: {accuracy}')

# In this code:

#    We import the necessary libraries, including load_iris for loading the dataset, KNeighborsClassifier for creating the KNN classifier, and train_test_split for splitting the data into training and testing sets.

#    We load the iris dataset and separate the features (X) and the target variable (y).

#    We split the dataset into training and testing sets using train_test_split, with 70% of the data used for training and 30% for testing.

#    We initialize the KNN classifier with a chosen value of K (you can change the k variable to experiment with different values).

#    The model is trained on the training data using fit.

#    We make predictions on the test data using predict.

#    Finally, we calculate the accuracy of the KNN classifier on the test data and print the result.

# You can change the value of k to see how it affects the accuracy of the classifier.

### Question2

In [None]:
# Here's a Python code example that implements the K-Nearest Neighbors (KNN) regressor algorithm using the load_boston dataset from scikit-learn:

# Import necessary libraries
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the Boston Housing dataset
boston = load_boston()
X = boston.data  # Features
y = boston.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the KNN regressor with a chosen value of K
k = 3  # You can change this value
knn_regressor = KNeighborsRegressor(n_neighbors=k)

# Fit the model to the training data
knn_regressor.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn_regressor.predict(X_test)

# Calculate the Mean Squared Error (MSE) and R-squared (R2) score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the performance metrics
print(f'Mean Squared Error (MSE): {mse:.2f}')
print(f'R-squared (R2) Score: {r2:.2f}')

# In this code:

#    We import the necessary libraries, including load_boston for loading the dataset, KNeighborsRegressor for creating the KNN regressor, and train_test_split for splitting the data into training and testing sets.

#    We load the Boston Housing dataset and separate the features (X) and the target variable (y).

#    We split the dataset into training and testing sets using train_test_split, with 70% of the data used for training and 30% for testing.

#    We initialize the KNN regressor with a chosen value of K (you can change the k variable to experiment with different values).

#    The model is trained on the training data using fit.

#    We make predictions on the test data using predict.

#    Finally, we calculate the Mean Squared Error (MSE) and R-squared (R2) score to evaluate the performance of the KNN regressor.

# You can change the value of k to see how it affects the regression performance.



#### Question3

In [None]:
# To find the optimal value of K for the K-Nearest Neighbors (KNN) classifier using cross-validation on the load_iris dataset from scikit-learn, you can perform a grid search over different values of K and select the one with the best cross-validated performance. Here's a Python code snippet to do that:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import numpy as np

# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets (optional)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define a range of K values to try
k_values = list(range(1, 21))  # You can adjust this range

# Create an empty list to store cross-validated accuracy scores
cv_scores = []

# Perform k-fold cross-validation for each K value
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

# Plot the cross-validated accuracy scores for different K values
plt.figure(figsize=(10, 6))
plt.plot(k_values, cv_scores, marker='o', linestyle='-', color='b')
plt.title('Cross-Validated Accuracy vs. K Value')
plt.xlabel('K Value')
plt.ylabel('Cross-Validated Accuracy')
plt.grid(True)
plt.show()

# Find the optimal K value with the highest accuracy
optimal_k = k_values[np.argmax(cv_scores)]
print(f'Optimal K value: {optimal_k}')

# In this code:

#    We load the Iris dataset and separate the features (X) and the target variable (y).

#    Optionally, you can split the dataset into training and testing sets using train_test_split. This step is not necessary if you intend to use cross-validation for model evaluation.

#    We define a range of K values to try (in this example, from 1 to 20). You can adjust this range based on your preferences.

#    We create an empty list, cv_scores, to store the cross-validated accuracy scores for different K values.

#    We perform k-fold cross-validation for each K value and compute the mean accuracy score using cross_val_score.

#    The cross-validated accuracy scores are plotted against the K values to help you visually identify the optimal K value.

#    Finally, we find and print the optimal K value with the highest cross-validated accuracy.

# This code will help you determine the best K value for your KNN classifier on the Iris dataset using cross-validation.

#### Question4

In [None]:
# Here's a Python code snippet to implement the K-Nearest Neighbors (KNN) regressor algorithm with feature scaling on the load_boston dataset from scikit-learn:

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load the Boston Housing dataset
boston = load_boston()
X = boston.data  # Features
y = boston.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create a KNN regressor model
k = 5  # Number of neighbors (you can adjust this)
knn_regressor = KNeighborsRegressor(n_neighbors=k)

# Fit the model to the training data
knn_regressor.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn_regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2) Score: {r2}")

#In this code:
#
#    We load the Boston Housing dataset and separate the features (X) and the target variable (y).

#    We split the dataset into training and testing sets using train_test_split, with 80% of the data for training and 20% for testing.

#    We standardize the features using StandardScaler to ensure that each feature has a mean of 0 and a standard deviation of 1. This step is essential for KNN as it relies on distance metrics.

#    We create a KNN regressor model with the desired number of neighbors (in this case, k = 5), but you can adjust this value based on your needs.

#    The KNN regressor is fitted to the training data.

#    We make predictions on the test data using the trained model.

#    Finally, we evaluate the model's performance using mean squared error (MSE) and R-squared (R2) score, which are common metrics for regression tasks.

# This code demonstrates how to implement KNN regression with feature scaling on the Boston Housing dataset. You can adjust the value of k and other parameters as needed for your specific regression problem.

### Question5

In [None]:
# Here's a Python code snippet to implement the K-Nearest Neighbors (KNN) classifier algorithm with weighted voting on the load_iris dataset from scikit-learn:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create a KNN classifier model with weighted voting
k = 5  # Number of neighbors (you can adjust this)
knn_classifier = KNeighborsClassifier(n_neighbors=k, weights='distance')

# Fit the model to the training data
knn_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")

# In this code:

#    We load the Iris dataset and separate the features (X) and the target variable (y).

#    We split the dataset into training and testing sets using train_test_split, with 80% of the data for training and 20% for testing.

#    We standardize the features using StandardScaler to ensure that each feature has a mean of 0 and a standard deviation of 1. Standardization is essential for KNN as it relies on distance metrics.

#    We create a KNN classifier model with the desired number of neighbors (in this case, k = 5) and specify weights='distance' to enable weighted voting based on the inverse of the distances.

#    The KNN classifier is fitted to the training data.

#    We make predictions on the test data using the trained model.

#    Finally, we evaluate the model's performance using accuracy, a common metric for classification tasks.

# This code demonstrates how to implement KNN classification with weighted voting on the Iris dataset. You can adjust the value of k and other parameters as needed for your specific classification problem.

### Question6

In [None]:
# Here's a Python function that standardizes the features using StandardScaler from scikit-learn before applying a KNN classifier:

from sklearn.preprocessing import StandardScaler

def standardize_features(X_train, X_test):
    """
    Standardize features using StandardScaler.

    Parameters:
    - X_train: Training data features
    - X_test: Testing data features

    Returns:
    - X_train_scaled: Standardized training data features
    - X_test_scaled: Standardized testing data features
    """
    # Initialize the StandardScaler
    scaler = StandardScaler()

    # Fit and transform the scaler on the training data
    X_train_scaled = scaler.fit_transform(X_train)

    # Transform the testing data using the same scaler
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled

# You can use this function to standardize your training and testing data features before applying the KNN classifier. Here's how you can use it:

# Assuming you have loaded and split your data into X_train, X_test, y_train, and y_test
X_train_scaled, X_test_scaled = standardize_features(X_train, X_test)

# Create and train the KNN classifier using X_train_scaled
# Make predictions, evaluate the model, etc.

# This function will ensure that your features have a mean of 0 and a standard deviation of 1, which is important for distance-based algorithms like KNN.

### Question7

In [None]:
# Here's a Python function to calculate the Euclidean distance between two points represented as NumPy arrays:

import numpy as np

def euclidean_distance(point1, point2):
    """
    Calculate the Euclidean distance between two points.

    Parameters:
    - point1: NumPy array representing the first point
    - point2: NumPy array representing the second point

    Returns:
    - distance: Euclidean distance between the two points
    """
    # Ensure that both points have the same dimensionality
    if len(point1) != len(point2):
        raise ValueError("Both points must have the same dimensionality")

    # Calculate the Euclidean distance
    distance = np.sqrt(np.sum((point1 - point2) ** 2))

    return distance

# You can use this function to find the Euclidean distance between any two points by passing their coordinates as NumPy arrays. Here's an example of how to use it:

point1 = np.array([1.0, 2.0, 3.0])
point2 = np.array([4.0, 5.0, 6.0])

distance = euclidean_distance(point1, point2)
print("Euclidean Distance:", distance)

# This will calculate and print the Euclidean distance between point1 and point2.

### Question8

In [None]:
# Here's a Python function to calculate the Manhattan distance (also known as the L1 distance or taxicab distance) between two points represented as NumPy arrays:

import numpy as np

def manhattan_distance(point1, point2):
    """
    Calculate the Manhattan distance between two points.

    Parameters:
    - point1: NumPy array representing the first point
    - point2: NumPy array representing the second point

    Returns:
    - distance: Manhattan distance between the two points
    """
    # Ensure that both points have the same dimensionality
    if len(point1) != len(point2):
        raise ValueError("Both points must have the same dimensionality")

    # Calculate the Manhattan distance
    distance = np.sum(np.abs(point1 - point2))

    return distance

# You can use this function to find the Manhattan distance between any two points by passing their coordinates as NumPy arrays. Here's an example of how to use it:

point1 = np.array([1.0, 2.0, 3.0])
point2 = np.array([4.0, 5.0, 6.0])

distance = manhattan_distance(point1, point2)
print("Manhattan Distance:", distance)

# This will calculate and print the Manhattan distance between point1 and point2.