In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import json
import pandas as pd

In [None]:
with open('./x_vector_density.json', 'r') as file:
    x_vector = json.load(file)

with open('./y_vector_density.json', 'r') as file:
    y_vector = json.load(file)

with open('./refcode_list_density.json', 'r') as f:
    refcode_list = json.load(f)

In [None]:
filtered_indices = [
    i for i, sublist in enumerate(x_vector) 
    if not any(
        (isinstance(value, (float, int)) and (np.isinf(value) or np.abs(value) > np.finfo(np.float32).max or np.isnan(value) or value == 'nan'))
        for value in sublist
    )
]

x_vector_filtered = [x_vector[i] for i in filtered_indices]
y_vector_filtered = [y_vector[i] for i in filtered_indices]
refcode_list_filtered = [refcode_list[i] for i in filtered_indices]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_vector_filtered, y_vector_filtered, test_size=0.25, random_state=42)


In [None]:
x_train = np.array(x_train).astype(float)
y_train = np.array(y_train).astype(float)
x_test = np.array(x_test).astype(float)
y_test = np.array(y_test).astype(float)

In [None]:
knn_model_exp = KNeighborsRegressor(n_neighbors=9, p=1, weights='distance')

knn_model_exp.fit(x_train, y_train)

y_train_pred_exp = knn_model_exp.predict(x_train)
train_mse = mean_squared_error(y_train, y_train_pred_exp)
train_r2 = r2_score(y_train, y_train_pred_exp)
train_mae = mean_absolute_error(y_train, y_train_pred_exp)
print("Train Mean Squared Error:", train_mse)
print("Train R-squared (R2):", train_r2)
print("Train Mean Absolute Error (MAE):", train_mae)

y_test_pred_exp = knn_model_exp.predict(x_test)
test_mse = mean_squared_error(y_test, y_test_pred_exp)
test_r2 = r2_score(y_test, y_test_pred_exp)
test_mae = mean_absolute_error(y_test, y_test_pred_exp)
print("Test Mean Squared Error:", test_mse)
print("Test R-squared (R2):", test_r2)
print("Test Mean Absolute Error (MAE):", test_mae)
