In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
import joblib as jb
from sklearn.model_selection import GridSearchCV

In [None]:
data = pd.read_csv('acceptance_data.csv')

In [None]:
X = data.iloc[:, 0:2]
y = data.iloc[:, 2]

# display(X)
# display(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

In [None]:
# Normalize the data
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_train = pd.DataFrame(X_train, columns=X.columns)
X_test = scaler.transform(X_test)
X_test = pd.DataFrame(X_test, columns=X.columns)

# display(X_train)
# display(X_test)

In [None]:
knn = KNeighborsClassifier()

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))
print("Classification Report: \n", classification_report(y_test, y_pred))


In [None]:
knn_parameters = {
    'n_neighbors': np.arange(3, 30, 2),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': np.arange(1, 50, 5),
    'n_jobs': [-1, 1]
}

knn_grid = GridSearchCV(knn, knn_parameters, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)

knn_best_params = knn_grid.fit(X_train, y_train)

# print("Best Score: ", knn_grid.best_score_)
# print("Best Params: ", knn_grid.best_params_)
# print("Best Estimator: ", knn_grid.best_estimator_)

knn_best_model = knn_grid.best_estimator_

In [None]:
knn_best_model.fit(X_train, y_train)

y_pred_final = knn_best_model.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred_final))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred_final))
print("Classification Report: \n", classification_report(y_test, y_pred_final))


In [None]:
jb.dump(knn_best_model, 'knn_model.pkl')