KNN implementation

In [39]:
# Imports
import data_preprocessing as dp
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

In [40]:
# Load the data
data = dp.data_preprocessing('project_train.csv')

# Splitting the data into X and y
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Splitting the data into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [41]:
# KNN implementation with optimization of the number of neighbors
# using CVgridSearch

def knn(X_train, y_train, X_test, y_test):
    # Find best K parameter
    k_list = list(range(1, 31, 2))
    param_grid = dict(n_neighbors=k_list)
    grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=10, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_params = grid_search.best_params_
    
    # Test the model with the best parameters
    knn = KNeighborsClassifier(n_neighbors=best_params['n_neighbors'])
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # Accuracy score
    accuracy = accuracy_score(y_test, y_pred)

    return cm, accuracy

In [42]:
# Print results
cm, accuracy = knn(X_train, y_train, X_test, y_test)
print(cm)
print('Accuracy:', accuracy)

[[50  3]
 [14 34]]
Accuracy: 0.8316831683168316
