In [41]:
# Required imports
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import classification_report, accuracy_score

In [42]:
def load_data():
    data = pd.read_csv('../../data/dataset_annotated_v1.csv')
    data = shuffle(data)
    X1 = data[["feature_1", "feature_2", "feature_3"]].to_numpy().astype(np.float32)
    y1 = data["presence"].to_numpy()
    
    data = pd.read_csv('../../data/dataset_annotated_v2.csv')
    data = shuffle(data)
    X2 = data[["feature_1", "feature_2", "feature_3"]].to_numpy().astype(np.float32)
    y2 = data["presence"].to_numpy()
    
    data = pd.read_csv('../../data/dataset_annotated_v3.csv')
    data = shuffle(data)
    X3 = data[["feature_1", "feature_2", "feature_3"]].to_numpy().astype(np.float32)
    y3 = data["presence"].to_numpy()
    
    X = np.concatenate((X1, X2, X3), axis=0)
    y = np.concatenate((y1, y2, y3), axis=None)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=321)
    return X_train, X_test, y_train, y_test

In [43]:
def get_metrics(y_true, y_pred):
    result1 = classification_report(y_true, y_pred)
    print('Classification Report: ', result1)
    result2 = accuracy_score(y_true, y_pred)
    print('Accuracy: ', result2, "\n\n")

In [44]:
from imblearn.over_sampling import SMOTE, SMOTEN

# Oversample data 
def balance_data(xx, yy, ratio):
    smote = SMOTE(sampling_strategy=ratio)
    return smote.fit_resample(xx, yy)

In [45]:
# Model training
X_train, X_test, y_train, y_test = load_data()
X_train, y_train = balance_data(X_train, y_train, 0.33)
model = KNeighborsClassifier(n_neighbors = 3)
model.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [46]:
def knn_evaluate_test_data():
    global model, X_test, y_test
    y_test_pred = model.predict(X_test)
    get_metrics(y_test, y_test_pred)

In [47]:
def knn_evaluate_sample(x):
    global model
    return model.predict(np.array([x,]))[0]

In [48]:
knn_evaluate_test_data()

Classification Report:                precision    recall  f1-score   support

           0       0.79      0.67      0.73        67
           1       0.93      0.96      0.95       320

    accuracy                           0.91       387
   macro avg       0.86      0.82      0.84       387
weighted avg       0.91      0.91      0.91       387

Accuracy:  0.9121447028423773 




In [49]:
# knn_evaluate_sample(np.array([4, 4, 1]))