###[Q1.] Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.
#####[Ans]

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(f"ACCURACY : {accuracy_score(y_test, y_pred):.2f}")

ACCURACY : 1.00


###[Q2.] Write a Python code to implement the KNN regressor algorithm on load_boston dataset in sklearn.datasets.
#####[Ans]

In [12]:
## BOSTON DATASET HAS BEEN REMOVED FROM THE SCI-KIT LEARN, SO I AM USING CALIFORNIA HOUSING DATASET.
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = KNeighborsRegressor(n_neighbors=5)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"MEAN SQUARED ERROR : {mse:.2f}")

MEAN SQUARED ERROR : 1.14


###[Q3.] Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using cross-validation on load_iris dataset in sklearn.datasets.
#####[Ans]

In [14]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

data = load_iris()
X = data.data
y = data.target

best_k = 0
best_score = 0

for k in range(1, 21):
  knn = KNeighborsClassifier(n_neighbors=k)
  scores = cross_val_score(knn, X, y, cv=5)
  avg_score = scores.mean()

  if avg_score > best_score:
    best_k = k
    best_score = avg_score

print(f"OPTIMAL K : {best_k}, CROSS VALIDATION SCORE : {best_score:.2f}")

OPTIMAL K : 6, CROSS VALIDATION SCORE : 0.98


###[Q4.] Implement the KNN regressor algorithm with feature scaling on load_boston dataset in sklearn.datasets.
#####[Ans]

In [16]:
## BOSTON DATASET HAS BEEN REMOVED FROM THE SCI-KIT LEARN, SO I AM USING CALIFORNIA HOUSING DATASET.
from sklearn.datasets import fetch_california_housing
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

data = fetch_california_housing()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor(n_neighbors=3))
])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
print(f"MEAN SQUARED ERROR : {mean_squared_error(y_test, y_pred):.2f}")

MEAN SQUARED ERROR : 0.46


###[Q5.] Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.
#####[Ans]

In [17]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = KNeighborsClassifier(n_neighbors=3, weights='distance')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(f"ACCURACY : {accuracy_score(y_test, y_pred):.2f}")

ACCURACY : 1.00


###[Q6.] Implement a function to standardise the features before applying KNN classifier.
#####[Ans]

In [19]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

def standardize_and_apply_knn():
    data = load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy after standardization: {accuracy:.2f}")

standardize_and_apply_knn()


Accuracy after standardization: 1.00


###[Q7.] Write a Python function to calculate the euclidean distance between two points.
#####[Ans]

In [26]:
import numpy as np
def euclidean_distance(point1, point2):
   return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))

print(euclidean_distance([1, 2], [4, 6]))

5.0


###[Q8.] Write a Python function to calculate the manhattan distance between two points.
#####[Ans]

In [24]:
import numpy as np
def manhattan_distance(point1, point2):
  return np.sum(np.abs(np.array(point1) - np.array(point2)))

print(manhattan_distance([1,2], [4, 6]))

7
