In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.metrics import accuracy_score

In [4]:
data = {
    "age": [22, 25, 30, 35, 40, 45, 50, 55, 60, 65],
    "salary": [30000, 35000, 50000, 60000, 65000, 70000, 90000, 120000, 130000, 150000],
    "experience": [1, 2, 5, 7, 10, 12, 18, 25, 30, 35],
    "target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
}

df = pd.DataFrame(data)

X = df[["age", "salary","experience"]]
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X,y,
    test_size = .25,
    stratify= y,
    random_state=42
)

In [5]:
# Exercise 1 — Basic KNN Classification
    # Train KNN
    # Predict classes

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print("Accuracy without scaling: ", accuracy_score(y_test, y_pred))

Accuracy without scaling:  1.0


In [7]:
# Exercise 2 — Effect of Scaling
    # Compare scaled vs unscaled 

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

y_pred = knn.predict(X_test_scaled)
print("Accuracy with scaling: ", accuracy_score(y_test, y_pred))

Accuracy with scaling:  1.0


In [11]:
# Exercise 3 — Effect of K
    # Try multiple K values 

for k in [1,3,5,7]:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    print(f"K={k} → Accuracy={acc:.2f}")

K=1 → Accuracy=1.00
K=3 → Accuracy=1.00
K=5 → Accuracy=1.00
K=7 → Accuracy=0.67


In [15]:
# Exercise 4 — KNN for Regression
    # Predict continuous values

X = df[["age", "experience"]]
y = df["salary"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = .25,
    random_state =42
)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn_reg = KNeighborsRegressor(n_neighbors=3)
knn_reg.fit(X_train_scaled, y_train)

y_pred = knn_reg.predict(X_train_scaled)

print("Actual salaries: ", y_test.values)
print("Predicted salaries: ", y_pred.astype(int))

Actual salaries:  [130000  35000  70000]
Predicted salaries:  [ 46666 120000  46666 120000  58333  58333  91666]
