# Answer1

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
iris = load_iris()

In [3]:
X = iris.data
y = iris.target

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [8]:
# Initialize the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)

In [9]:
# Train the classifier on the training set
knn_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_classifier.predict(X_test)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 100.00%


# Answer2

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error,r2_score

import pandas as pd
import numpy as np
# load the dataset:

data_url = "http://lib.stat.cmu.edu/datasets/boston"
df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)

data = np.hstack([df.values[::2, :], df.values[1::2, :2]])
target = df.values[1::2, 2]

X= data
y=target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=3)

# Train the regressor on the training set
knn_regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_regressor.predict(X_test)

#Calculate and print r2_score as accuracy:
r2_score = r2_score(y_test,y_pred)
print(f'r2_score: {r2_score:.2f}')

# Calculate and print the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')

r2_score: 0.70
Mean Squared Error: 21.66


# Answer3

In [21]:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier
knn_classifier = KNeighborsClassifier()

# Define a parameter grid for K values
param_grid = {'n_neighbors': range(1, 21)}

# Initialize GridSearchCV
grid_search = GridSearchCV(knn_classifier, param_grid, cv=5, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best K value and its corresponding accuracy
best_k = grid_search.best_params_['n_neighbors']
best_accuracy = grid_search.best_score_
print(f'Best K value: {best_k}')
print(f'Corresponding Cross-validated Accuracy: {best_accuracy:.2f}')

Best K value: 3
Corresponding Cross-validated Accuracy: 0.96


# Answer4

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
# load the dataset:

data_url = "http://lib.stat.cmu.edu/datasets/boston"
df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)

data = np.hstack([df.values[::2, :], df.values[1::2, :2]])
target = df.values[1::2, 2]

X = data
y =target


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the StandardScaler for feature scaling
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=3)

# Train the regressor on the scaled training set
knn_regressor.fit(X_train_scaled, y_train)

# Make predictions on the scaled test set
y_pred = knn_regressor.predict(X_test_scaled)

# Calculate and print the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')

Mean Squared Error: 19.40


# Answer5

In [24]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier with weighted voting
knn_classifier_weighted = KNeighborsClassifier(n_neighbors=3, weights='distance')

# Train the classifier on the training set
knn_classifier_weighted.fit(X_train, y_train)

# Make predictions on the test set
y_pred_weighted = knn_classifier_weighted.predict(X_test)

# Calculate and print the accuracy with weighted voting
accuracy_weighted = accuracy_score(y_test, y_pred_weighted)
print(f'Accuracy with Weighted Voting: {accuracy_weighted * 100:.2f}%')

Accuracy with Weighted Voting: 100.00%


# Answer6

In [25]:
from sklearn.preprocessing import StandardScaler

def standardize_features(X_train, X_test):
    """
    Standardize features using StandardScaler.

    Parameters:
    - X_train: Training features
    - X_test: Test features

    Returns:
    - X_train_standardized: Standardized training features
    - X_test_standardized: Standardized test features
    """

    # Initialize the StandardScaler
    scaler = StandardScaler()

    # Fit and transform the scaler on the training data
    X_train_standardized = scaler.fit_transform(X_train)

    # Transform the test data using the same scaler
    X_test_standardized = scaler.transform(X_test)

    return X_train_standardized, X_test_standardized

# Example usage with load_iris dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
X_train_std, X_test_std = standardize_features(X_train, X_test)

# Initialize the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)

# Train the classifier on the standardized training set
knn_classifier.fit(X_train_std, y_train)

# Make predictions on the standardized test set
y_pred = knn_classifier.predict(X_test_std)

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy on Standardized Features: {accuracy * 100:.2f}%')

Accuracy on Standardized Features: 100.00%


# Answer7

In [26]:
import numpy as np

def euclidean_distance(point1, point2):
    """
    Calculate the Euclidean distance between two points.

    Parameters:
    - point1: A numpy array or list representing the coordinates of the first point.
    - point2: A numpy array or list representing the coordinates of the second point.

    Returns:
    - distance: The Euclidean distance between the two points.
    """

    # Convert points to numpy arrays if they are provided as lists
    point1 = np.array(point1)
    point2 = np.array(point2)

    # Calculate Euclidean distance
    distance = np.linalg.norm(point1 - point2)

    return distance

# Example usage
point_a = [1, 2, 3]
point_b = [4, 5, 6]

distance = euclidean_distance(point_a, point_b)
print(f'Euclidean Distance between {point_a} and {point_b}: {distance:.2f}')

Euclidean Distance between [1, 2, 3] and [4, 5, 6]: 5.20


# Answer8

In [27]:
import numpy as np

def manhattan_distance(point1, point2):
    """
    Calculate the Manhattan distance between two points.

    Parameters:
    - point1: A numpy array or list representing the coordinates of the first point.
    - point2: A numpy array or list representing the coordinates of the second point.

    Returns:
    - distance: The Manhattan distance between the two points.
    """

    # Convert points to numpy arrays if they are provided as lists
    point1 = np.array(point1)
    point2 = np.array(point2)

    # Calculate Manhattan distance
    distance = np.sum(np.abs(point1 - point2))

    return distance

# Example usage
point_a = [1, 2, 3]
point_b = [4, 5, 6]

distance = manhattan_distance(point_a, point_b)
print(f'Manhattan Distance between {point_a} and {point_b}: {distance}')

Manhattan Distance between [1, 2, 3] and [4, 5, 6]: 9
