In [105]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import cross_val_score,KFold

**CREATING DATASET**

In [106]:
data = {
    'Engine Size': [2.0, 3.5, 2.5, 4.0, 1.8, 3.0, 2.2, 3.8],
    'Horsepower': [180, 250, 200, 300, 150, 220, 170, 280],
    'Fuel Efficiency': [25, 20, 30, 18, 35, 22, 28, 17],
    'Fuel Efficient': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No']
}
df = pd.DataFrame(data)

In [107]:
X = df[['Engine Size', 'Horsepower', 'Fuel Efficiency']]
y = df['Fuel Efficient']

Splitting dataset (20% Testing and 80% Training set)

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


*NORMALIZE THE FEATURES*

In [109]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

**IMPLEMENTING KNN ALGORITHM**

In [110]:
def knn_classifier(k):
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(X_train_scaled, y_train)
    y_pred = knn_model.predict(X_test_scaled)
    return y_pred

*Function to evaluate Performance*

In [111]:
def evaluation(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, pos_label='Yes')
    recall = recall_score(y_true, y_pred, pos_label='Yes')
    return accuracy, precision, recall

In [112]:
k_values = np.arange(1, len(X_train) + 1)


*Evaluating for each value of K*


In [113]:
for k in k_values:
    y_pred = knn_classifier(k)
    accuracy, precision, recall = evaluation(y_test, y_pred)
    print(f'For k={k}: Accuracy={accuracy*100}%, Precision={precision}, Recall={recall}')


For k=1: Accuracy=100.0%, Precision=1.0, Recall=1.0
For k=2: Accuracy=100.0%, Precision=1.0, Recall=1.0
For k=3: Accuracy=50.0%, Precision=0.5, Recall=1.0
For k=4: Accuracy=100.0%, Precision=1.0, Recall=1.0
For k=5: Accuracy=50.0%, Precision=0.5, Recall=1.0
For k=6: Accuracy=50.0%, Precision=0.0, Recall=0.0


  _warn_prf(average, modifier, msg_start, len(result))


*Performing Cross Validation (OPTIONAL)*

In [114]:
for k in k_values:
    knn_model = KNeighborsClassifier(n_neighbors=k)
    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(knn_model, X_scaled, y, cv=cv, scoring='accuracy')
    print(f'For k={k}: Cross-validated Accuracy: {scores.mean():.2%} (+/- {scores.std() * 2:.2%})')

For k=1: Cross-validated Accuracy: 90.00% (+/- 40.00%)
For k=2: Cross-validated Accuracy: 90.00% (+/- 40.00%)
For k=3: Cross-validated Accuracy: 90.00% (+/- 40.00%)
For k=4: Cross-validated Accuracy: 70.00% (+/- 80.00%)
For k=5: Cross-validated Accuracy: 60.00% (+/- 97.98%)
For k=6: Cross-validated Accuracy: 30.00% (+/- 80.00%)
