In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import numpy as np
import json
import pandas as pd

In [2]:
with open('x_vector.json', 'r') as file:
    x_vector = json.load(file)

with open('y_vector.json', 'r') as file:
    y_vector = json.load(file)


filtered_indices = [
    i for i, sublist in enumerate(x_vector) 
    if not any(
        (isinstance(value, (float, int)) and (np.isinf(value) or np.abs(value) > np.finfo(np.float32).max or np.isnan(value) or value == 'nan'))
        for value in sublist
    )
]

x_vector_filtered = [x_vector[i] for i in filtered_indices]
y_vector_filtered = [y_vector[i] for i in filtered_indices]


In [3]:
x_train, x_test, y_train, y_test = train_test_split(x_vector_filtered, y_vector_filtered, test_size=0.20, random_state=42)


In [4]:
x_train = np.array(x_train).astype(float)
y_train = np.array(y_train).astype(float)
x_test = np.array(x_test).astype(float)
y_test = np.array(y_test).astype(float)

In [5]:
knn_clf = KNeighborsClassifier(
    n_neighbors=5,
    weights='uniform',
    p=2
)

knn_clf.fit(x_train, y_train)

y_pred = knn_clf.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Test Accuracy: 0.7357723577235772

Classification Report:
              precision    recall  f1-score   support

         0.0       0.81      0.79      0.80       164
         1.0       0.60      0.62      0.61        82

    accuracy                           0.74       246
   macro avg       0.70      0.71      0.71       246
weighted avg       0.74      0.74      0.74       246

Confusion Matrix:
[[130  34]
 [ 31  51]]
