In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from k_nearest_neighbors import KNearestNeighborsClassifier, KNearestNeighborsRegressor

### Classifier

In [2]:
df = pd.read_csv("../data/churn_data.csv", index_col=[0])
X = df.drop(["Exited"], axis=1)
y = df[["Exited"]]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)
reduced_X_test = X_test.iloc[0:200].copy()
reduced_y_test = y_test.iloc[0:200].copy()

In [3]:
knn = KNearestNeighborsClassifier(k=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(reduced_X_test)

print(f"Accuracy: {round(accuracy_score(reduced_y_test, y_pred), 3)}")
print(f"Precision: {round(precision_score(reduced_y_test, y_pred), 3)}")
print(f"Recall: {round(recall_score(reduced_y_test, y_pred), 3)}")

Accuracy: 0.775
Precision: 0.765
Recall: 0.788


In [4]:
sk = KNeighborsClassifier(n_neighbors=5, metric="minkowski", p=2, algorithm="brute")
sk.fit(X_train, y_train)
y_sk = sk.predict(reduced_X_test)

print(f"Accuracy: {round(accuracy_score(reduced_y_test, y_sk), 3)}")
print(f"Precision: {round(precision_score(reduced_y_test, y_sk), 3)}")
print(f"Recall: {round(recall_score(reduced_y_test, y_sk), 3)}")

  return self._fit(X, y)


Accuracy: 0.79
Precision: 0.782
Recall: 0.798


### Regressor

In [5]:
df = pd.read_csv("../data/adm_data.csv", index_col=[0])
X = df.drop(["Chance of Admit "], axis=1)
y = df[["Chance of Admit "]]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

In [6]:
knn = KNearestNeighborsRegressor(k=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Mean Squared Error: 0.005638030303030301


In [7]:
sk = KNeighborsRegressor(n_neighbors=5, metric="minkowski", p=2, algorithm="brute")
sk.fit(X_train, y_train)
y_sk = sk.predict(X_test)
print(f"Mean Squared Error: {mean_squared_error(y_test, y_sk)}")

Mean Squared Error: 0.005638030303030301
