# KNN Regression â€” Diabetes Notebook

This notebook mirrors the scripted KNN regression pipeline for exploratory analysis, diagnostics, and documentation. Run the cells in order to reproduce the persisted artefacts and validation metrics.

In [None]:
"""Bootstrap the notebook environment to align with src/."""
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

import pandas as pd
from IPython.display import display
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from src.config import CONFIG as KNN_CONFIG, KNNRegressionConfig
from src.data import load_dataset, build_features, train_validation_split
from src.pipeline import DiabetesKNNPipeline


In [None]:
config: KNNRegressionConfig = KNN_CONFIG
df = load_dataset(config)
display(df.head())
print(f'Total rows: {len(df)}')

In [None]:
X, y = build_features(df, config)
X_train, X_val, y_train, y_val = train_validation_split(config)
pipeline = DiabetesKNNPipeline(config)
metrics = pipeline.train()
metrics

In [None]:
y_pred = pipeline.pipeline.predict(X_val)  # type: ignore[arg-type]
print(f'R2: {r2_score(y_val, y_pred):.3f}')
print(f'RMSE: {(mean_squared_error(y_val, y_pred) ** 0.5):.3f}')
print(f'MAE: {mean_absolute_error(y_val, y_pred):.3f}')