# Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from joblib import dump

In [None]:
X_train = np.load('../data/X_train.npy')
X_train_all_indices = np.load('../data/X_train_all_indices.npy')
X_train_selected_indices = np.load('../data/X_train_selected_indices.npy')
y_train = np.load('../data/y_train.npy')

In [None]:
scaler_train = StandardScaler()
X_train = scaler_train.fit_transform(X_train)
scaler_train_all_indices = StandardScaler()
X_train_all_indices = scaler_train_all_indices.fit_transform(X_train_all_indices)
scaler_train_selected_indices = StandardScaler()
X_train_selected_indices = scaler_train_selected_indices.fit_transform(X_train_selected_indices)

# Model Training

In [None]:
model = KNeighborsRegressor()

param_grid = {
    'n_neighbors': [3, 7, 15, 30],
    'weights': ['uniform', 'distance'],
    'leaf_size': [10, 50, 100],
    'p': [1, 2],
}

grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)
print(f"Best parameters: {grid_search.best_params_}")

In [None]:
param_grid = {
    'n_neighbors': [30, 40, 50, 60],
    'weights': ['uniform', 'distance'],
    'leaf_size': [3, 5, 10],
    'p': [1, 2],
}

grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)
print(f"Best parameters: {grid_search.best_params_}")

In [None]:
model = KNeighborsRegressor(leaf_size=3, n_neighbors=30, p=1, weights='distance')
model.fit(X_train, y_train)
dump(model, '../models/knn_10_bands.joblib')

In [None]:
model_all_indices = KNeighborsRegressor(leaf_size=3, n_neighbors=30, p=1, weights='distance')
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/knn_all_bands.joblib')

In [None]:
model_selected_indices = KNeighborsRegressor(leaf_size=3, n_neighbors=30, p=1, weights='distance')
model_selected_indices.fit(X_train_selected_indices, y_train)
dump(model_selected_indices, '../models/knn_selected_bands.joblib')