# KNN

KNeighborsRegressor

In [5]:
import numpy as np

# ml
from sklearn.model_selection import (
    KFold,
    cross_validate,
    GridSearchCV,
    RepeatedKFold,
    train_test_split,
)
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from utils.metrics import calculate_metrics, get_ccp_scoring, print_results_table
from utils.datasets import load_and_split_data

In [6]:
data_path = "../../data/processed/rdc_data_cleaned.csv"
X_raw, Y_raw = load_and_split_data(data_path, test_size=0.1, is_split=False)


scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_x.fit_transform(X_raw)
y_scaled = scaler_y.fit_transform(Y_raw)

# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.1, random_state=42
)

# 打印划分后的数据集大小
print("训练集大小:", len(x_train))
print("测试集大小:", len(x_test))

训练集大小: 350
测试集大小: 39


In [18]:
model_name = "KNN"

x_train, x_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.1, random_state=42
)

grid_search = GridSearchCV(
    KNeighborsRegressor(algorithm="auto", n_jobs=-1),
    param_grid=[
        {
            "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
            "n_neighbors": range(1, 10),
            "weights": ["uniform", "distance"],
            "p": [1, 2, 3, 4, 5],
            "leaf_size": range(1, 10),
        },
    ],
    scoring="neg_mean_absolute_error",
    cv=KFold(n_splits=5, shuffle=True, random_state=42),
    verbose=1,
    n_jobs=-1,
)
grid_search.fit(x_train, y_train)
calculate_metrics(
    scaler_y.inverse_transform(grid_search.best_estimator_.predict(x_test)),
    scaler_y.inverse_transform(y_test),
    print_metrics=True,
    title=model_name,
)
print("Best Parameters for", model_name, ":", grid_search.best_params_)
print("Best Score for", model_name, ":", grid_search.best_score_)
print("=" * 47 + "\n" * 2)

Fitting 5 folds for each of 3240 candidates, totalling 16200 fits


Best Parameters for KNN : {'algorithm': 'auto', 'leaf_size': 1, 'n_neighbors': 6, 'p': 1, 'weights': 'distance'}
Best Score for KNN : -0.3330865211289627


