### IMPORTS

In [None]:
# IMPORTS AND PRINTS ALL VERSIONS SO WE CAN REPRODUCE RESULTS EXACTLY LATER              

import sys                                           # STANDARD LIB TO ACCESS PYTHON RUNTIME DETAILS
import platform                                      # STANDARD LIB TO GET OS/PLATFORM INFORMATION
import warnings                                      # STANDARD LIB TO CONTROL WARNING MESSAGES
from pathlib import Path                             # STANDARD LIB FOR SAFE, CROSS-PLATFORM PATH HANDLING
from typing import Dict, Any, Tuple                  # TYPE HINTS FOR CLARITY

import numpy as np                                   # NUMERICAL COMPUTING
import pandas as pd                                  # DATAFRAMES AND DATA MANIPULATION

import matplotlib                                    # BASE PLOTTING BACKEND
import matplotlib.pyplot as plt                      # STATEFUL PLOTTING INTERFACE
from matplotlib import rcParams                      # IMPORTS RCPARAMS TO SET GLOBAL STYLES
import seaborn as sns                                # STATISTICAL PLOTTING BUILT ON TOP OF MATPLOTLIB
import plotly                                        # INTERACTIVE PLOTTING (NOT USED HERE BUT KEPT FOR CONSISTENCY)

import sklearn                                       # SCIKIT-LEARN: CLASSIC MACHINE LEARNING
from sklearn.model_selection import train_test_split # TRAIN/TEST SPLITTING
from sklearn.model_selection import KFold, cross_val_score  # K-FOLD SPLITTING AND CV
from sklearn.pipeline import Pipeline                # TO BUILD CLEAN, REUSABLE PREPROCESSING PIPELINES
from sklearn.impute import SimpleImputer             # SIMPLE STRATEGIES TO IMPUTE MISSING VALUES
from sklearn.preprocessing import StandardScaler     # STANDARDIZATION (MEAN=0, STD=1)
from sklearn.compose import ColumnTransformer        # APPLY TRANSFORMS TO COLUMNS (WE USE ALL NUMERIC)
from sklearn.metrics import (                        # METRICS FOR REGRESSION
    r2_score,
    mean_absolute_error,
    mean_squared_error,
    explained_variance_score,
)
from sklearn.multioutput import MultiOutputRegressor    # WRAPS SINGLE-OUTPUT MODELS FOR MULTI-OUTPUT TARGETS
from sklearn.linear_model import LinearRegression       # LINEAR REGRESSION BASELINE
from sklearn.neighbors import KNeighborsRegressor       # KNN REGRESSOR
from sklearn.svm import SVR                             # SUPPORT VECTOR REGRESSION
from sklearn.compose import TransformedTargetRegressor  # FOR SCALING TARGETS

import optuna                                        # HYPERPARAMETER OPTIMIZATION
import shap                                          # MODEL INTERPRETABILITY 
import joblib                                        # SERIALIZATION (SAVING/LOADING MODELS)

import tensorflow as tf                              # TENSORFLOW FOR DEEP LEARNING
from tensorflow import keras                         # HIGH-LEVEL KERAS API
from tensorflow.keras import layers                  # COMMON LAYERS FOR NN

warnings.filterwarnings("ignore")                    # SUPPRESSES NON-CRITICAL WARNINGS FOR CLEANER OUTPUT

print("PYTHON:", sys.version)                        # PRINTS PYTHON VERSION
print("OS:", platform.platform())                    # PRINTS OPERATING SYSTEM DETAILS
print("NUMPY:", np.__version__)                      # PRINTS NUMPY VERSION
print("PANDAS:", pd.__version__)                     # PRINTS PANDAS VERSION
print("SCIKIT-LEARN:", sklearn.__version__)          # PRINTS SCIKIT-LEARN VERSION
print("MATPLOTLIB:", matplotlib.__version__)         # PRINTS MATPLOTLIB VERSION
print("SEABORN:", sns.__version__)                   # PRINTS SEABORN VERSION
print("PLOTLY:", plotly.__version__)                 # PRINTS PLOTLY VERSION
print("OPTUNA:", optuna.__version__)                 # PRINTS OPTUNA VERSION
print("SHAP:", shap.__version__)                     # PRINTS SHAP VERSION
print("TENSORFLOW:", tf.__version__)                 # PRINTS TENSORFLOW VERSION

PYTHON: 3.12.10 (tags/v3.12.10:0cc8128, Apr  8 2025, 12:21:36) [MSC v.1943 64 bit (AMD64)]
OS: Windows-11-10.0.26100-SP0
NUMPY: 2.1.3
PANDAS: 2.3.1
SCIKIT-LEARN: 1.7.1
MATPLOTLIB: 3.10.3
SEABORN: 0.13.2
PLOTLY: 6.2.0
OPTUNA: 4.4.0
SHAP: 0.48.0
TENSORFLOW: 2.19.0


### PLOTTING SETUP

In [None]:
# GLOBAL PLOTTING STYLE TO KEEP ALL FIGURES CRISP, BOLD, UPPERCASE, AND HIGH-IMPACT      
FIGSIZE = (8, 6)                                      # DEFAULT FIGURE SIZE
DPI = 500                                             # HIGH RESOLUTION
LINEWIDTH = 2.0                                       # BOLD LINE WIDTH
GRID_LINEWIDTH = 1.5                                  # BOLD GRID LINES
FONTSIZE_TITLE = 20                                   # LARGE TITLE SIZE
FONTSIZE_LABEL = 16                                   # LARGE AXIS LABEL SIZE
FONTSIZE_TICK = 14                                    # LARGE TICK LABEL SIZE
FONTSIZE_LEGEND = 14                                  # LARGE LEGEND FONT SIZE

def init_plot_style() -> None:                        # DEFINES A FUNCTION TO INITIALIZE GLOBAL STYLE
    """SET GLOBAL MATPLOTLIB STYLE FOR BOLD, UPPERCASE, HIGH-RES FIGURES."""  
    rcParams["figure.figsize"] = FIGSIZE              # SETS FIGURE SIZE
    rcParams["figure.dpi"] = DPI                      # SETS DPI
    rcParams["savefig.dpi"] = DPI                     # HIGH-RES SAVED FIGURES
    rcParams["font.weight"] = "bold"                  # MAKES TEXT BOLD
    rcParams["axes.titleweight"] = "bold"             # BOLD TITLES
    rcParams["axes.labelweight"] = "bold"             # BOLD LABELS
    rcParams["axes.titlesize"] = FONTSIZE_TITLE       # TITLE FONT SIZE
    rcParams["axes.labelsize"] = FONTSIZE_LABEL       # LABEL FONT SIZE
    rcParams["xtick.labelsize"] = FONTSIZE_TICK       # X-TICK LABEL SIZE
    rcParams["ytick.labelsize"] = FONTSIZE_TICK       # Y-TICK LABEL SIZE
    rcParams["legend.fontsize"] = FONTSIZE_LEGEND     # LEGEND FONT SIZE
    rcParams["legend.title_fontsize"] = FONTSIZE_LEGEND  # LEGEND TITLE FONT SIZE
    rcParams["lines.linewidth"] = LINEWIDTH           # DEFAULT LINE WIDTH
    rcParams["grid.linewidth"] = GRID_LINEWIDTH       # GRID LINE WIDTH
    rcParams["axes.grid"] = True                      # ENABLE GRID BY DEFAULT
    rcParams["grid.alpha"] = 0.3                      # GRID TRANSPARENCY
    rcParams["axes.spines.top"] = True                # SHOW TOP SPINE
    rcParams["axes.spines.right"] = True              # SHOW RIGHT SPINE

def boldify_axes(ax: plt.Axes,
                 title: str = "",
                 xlabel: str = "",
                 ylabel: str = "",
                 legend: bool = True) -> None:
    """UPPERCASE + BOLD ALL TEXT ELEMENTS ON AN AXES OBJECT."""  
    if title:                                                    # CHECKS IF TITLE IS PROVIDED
        ax.set_title(title.upper(), weight="bold", size=FONTSIZE_TITLE)    # SETS BOLD, UPPERCASE TITLE
    if xlabel:                                                   # CHECKS IF XLABEL IS PROVIDED
        ax.set_xlabel(xlabel.upper(), weight="bold", size=FONTSIZE_LABEL)  # SETS BOLD, UPPERCASE XLABEL
    if ylabel:                                                   # CHECKS IF YLABEL IS PROVIDED
        ax.set_ylabel(ylabel.upper(), weight="bold", size=FONTSIZE_LABEL)  # SETS BOLD, UPPERCASE YLABEL

    for tick in ax.get_xticklabels():                             # LOOPS OVER X TICKS
        tick.set_fontweight("bold")                               # MAKES THEM BOLD
        tick.set_fontsize(FONTSIZE_TICK)                          # SETS FONT SIZE
        tick.set_text(str(tick.get_text()).upper())               # UPPERCASES TEXT

    for tick in ax.get_yticklabels():                             # LOOPS OVER Y TICKS
        tick.set_fontweight("bold")                               # MAKES THEM BOLD
        tick.set_fontsize(FONTSIZE_TICK)                          # SETS FONT SIZE
        tick.set_text(str(tick.get_text()).upper())               # UPPERCASES TEXT

    for spine in ax.spines.values():                              # ITERATES OVER SPINES
        spine.set_linewidth(2.0)                                  # MAKES SPINES THICK

    if legend and ax.get_legend() is not None:                     # IF LEGEND EXISTS, FORMAT IT
        leg = ax.get_legend()                                      # GETS LEGEND HANDLE
        if leg.get_title() is not None:                            # IF LEGEND TITLE EXISTS
            leg.get_title().set_text(leg.get_title().get_text().upper())  # UPPERCASE LEGEND TITLE
            leg.get_title().set_fontweight("bold")                 # BOLD LEGEND TITLE
        for text in leg.get_texts():                               # FOR EACH LEGEND LABEL
            text.set_text(text.get_text().upper())                 # UPPERCASE TEXT
            text.set_fontweight("bold")                            # BOLD TEXT
            text.set_fontsize(FONTSIZE_LEGEND)                     # SET FONT SIZE

def finalize_figure(fig: plt.Figure, suptitle: str = "") -> None:
    """APPLY SUPTITLE (UPPERCASE, BOLD) AND TIGHT LAYOUT."""       
    if suptitle:                                                   # IF SUPTITLE PROVIDED
        fig.suptitle(suptitle.upper(), fontsize=FONTSIZE_TITLE, fontweight="bold")  # SETS BOLD, UPPERCASE SUPTITLE
    fig.tight_layout()                                             # TIGHT LAYOUT TO PREVENT CLIPPING

init_plot_style()                                                  # INITIALIZES THE GLOBAL STYLE

### CONFIG & REPRODUCIBILITY

In [None]:
# CONFIGURATION FOR REPRODUCIBILITY AND PROJECT-SPECIFIC CONSTANTS                       

GLOBAL_SEED = 42                                     # GLOBAL SEED FOR REPRODUCIBILITY
np.random.seed(GLOBAL_SEED)                          # SETS NUMPY SEED
tf.random.set_seed(GLOBAL_SEED)                      # SETS TENSORFLOW SEED

# PATHS (ADAPT LOCALLY): THE USER REQUESTED ..\DATA\DATA[P].csv                          
DATA_CSV_PATH = Path("../DATA/DATA[P].csv")          # RELATIVE PATH AS SPECIFIED BY USER
RESULTS_DIR = Path("../DATA/")                        # DIRECTORY TO SAVE RESULTS
RESULTS_DIR.mkdir(parents=True, exist_ok=True)       # CREATES DIRECTORY IF NOT EXISTS

FEATURE_COLS = ["il", "iw", "pw", "ro"]              # FEATURES FROM USER
TARGET_COLS = ["frequency", "return loss", "gain"]           # MULTI-OUTPUT TARGETS FROM USER

TEST_SIZE = 0.2                                      # 80/20 TRAIN-TEST SPLIT
N_SPLITS = 5                                         # 5-FOLD CROSS VALIDATION
OPTUNA_TRIALS = 100                                  # OPTUNA TRIAL BUDGET
USE_TARGET_SCALING = True                            # SCALE TARGETS AS REQUESTED
USE_POLYNOMIALS = True                               # ENABLE FEATURE ENGINEERING AS REQUESTED (WE WILL USE POLYFEATURES FOR LINEAR/MLP)
POLY_DEGREE = 2                                      # DEGREE 2 POLYNOMIALS (YOU CAN CHANGE LATER)

print("GLOBAL SEED:", GLOBAL_SEED)                   # CONFIRMS GLOBAL SEED
print("TRAIN-TEST SPLIT:", 1 - TEST_SIZE, TEST_SIZE) # PRINTS TRAIN/TEST RATIO
print("K-FOLDS:", N_SPLITS)                          # PRINTS NUMBER OF FOLDS
print("OPTUNA TRIALS:", OPTUNA_TRIALS)               # PRINTS OPTUNA TRIAL COUNT

GLOBAL SEED: 42
TRAIN-TEST SPLIT: 0.8 0.2
K-FOLDS: 5
OPTUNA TRIALS: 100


### DATA READING

In [None]:
# SAFE CSV READER TO LOAD THE CLEANED DATA                                               

def safe_read_csv(path: Path) -> pd.DataFrame:       # DEFINES A SAFE CSV READER
    """SAFELY READ A CSV FILE AND RETURN A PANDAS DATAFRAME WITH CLEAR ERRORS."""  
    if not path.exists():                            # CHECKS IF FILE EXISTS
        raise FileNotFoundError(f"FILE NOT FOUND: {path}")  # RAISES ERROR IF NOT FOUND
    df_local = pd.read_csv(path)                     # READS CSV
    if df_local.empty:                               # CHECKS IF EMPTY
        raise ValueError("THE CSV FILE IS EMPTY.")   # RAISES ERROR IF EMPTY
    return df_local                                  # RETURNS DATAFRAME

df = safe_read_csv(DATA_CSV_PATH)                    # LOADS THE CLEANED DATA
print("DATA LOADED:", df.shape)                      # PRINTS SHAPE

DATA LOADED: (1296, 7)


### TRAIN-TEST SPLIT & K-FOLD

In [13]:
# WE SPLIT ONCE INTO TRAIN/TEST (80/20), THEN USE K-FOLD ON THE TRAIN SET FOR CV        

X = df[FEATURE_COLS]                          # EXTRACTS FEATURES AS NUMPY ARRAY
Y = df[TARGET_COLS]                           # EXTRACTS TARGETS AS NUMPY ARRAY

X_train, X_test, y_train, y_test = train_test_split( # PERFORMS TRAIN-TEST SPLIT
    X, Y, test_size=TEST_SIZE, random_state=GLOBAL_SEED, shuffle=True
)                                                    

print("TRAIN SHAPE (X, Y):", X_train.shape, y_train.shape)  # PRINTS TRAIN SHAPES
print("TEST SHAPE  (X, Y):", X_test.shape, y_test.shape)    # PRINTS TEST SHAPES

kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=GLOBAL_SEED)  # DEFINES K-FOLD SPLITTER
print("K-FOLD READY.")                                 

TRAIN SHAPE (X, Y): (1036, 4) (1036, 3)
TEST SHAPE  (X, Y): (260, 4) (260, 3)
K-FOLD READY.


### METRICS HELPERS

In [14]:
# UTILITY TO CALCULATE ALL REQUESTED METRICS IN ONE PLACE                                  

def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]: # DEFINES METRICS FUNCTION
    """RETURN A DICTIONARY OF REGRESSION METRICS FOR MULTI-OUTPUT TARGETS."""       
    r2 = r2_score(y_true, y_pred, multioutput="uniform_average")                    # COMPUTES R2
    mae = mean_absolute_error(y_true, y_pred, multioutput="uniform_average")        # COMPUTES MAE
    mse = mean_squared_error(y_true, y_pred, multioutput="uniform_average")         # COMPUTES MSE
    rmse = np.sqrt(mse)                                                             # COMPUTES RMSE
    evs = explained_variance_score(y_true, y_pred, multioutput="uniform_average")   # COMPUTES EXPLAINED VARIANCE
    return {"r2": r2, "mae": mae, "mse": mse, "rmse": rmse, "explained_variance": evs}  # RETURNS ALL METRICS

def print_metrics(name: str, metrics: Dict[str, float]) -> None:      # DEFINES PRINTING FUNCTION
    """PRETTY-PRINT METRICS WITH UPPERCASE KEYS."""                    
    print(f"=== {name.upper()} METRICS ===")                          # PRINTS HEADER
    for k, v in metrics.items():                                      # LOOPS OVER METRICS
        print(f"{k.upper()}: {v:.6f}")                                # PRINTS EACH METRIC

### PREPROCESSING PIPELINES

In [None]:
# NUMERIC PIPELINE FOR FEATURES: IMPUTE MEDIAN + STANDARD SCALER                         

numeric_transformer = Pipeline(steps=[               # BUILDS A PIPELINE
    ("imputer", SimpleImputer(strategy="median")),   # MISSING VALUE IMPUTATION
    ("scaler", StandardScaler()),                    # STANDARD SCALING
])                                                   # CLOSES PIPELINE

preprocessor = ColumnTransformer(                    # WRAPS TRANSFORMER (ALL COLUMNS ARE NUMERIC)
    transformers=[("num", numeric_transformer, FEATURE_COLS)],  # APPLIES TO ALL FEATURE COLS
    remainder="drop"                                 # DROPS ANY OTHER COLUMNS (THERE ARE NONE)
)                                                    

y_scaler = StandardScaler() if USE_TARGET_SCALING else None  # CREATES Y SCALER IF REQUESTED

print("PREPROCESSORS READY. TARGET SCALING:", USE_TARGET_SCALING)  

PREPROCESSORS READY. TARGET SCALING: True


### FEATURE ENGINEERING (POLYNOMIALS)

In [16]:
# WE WILL ADD POLYNOMIAL/INTERACTION TERMS **ONLY** FOR LINEAR AND DNN MODELS            

from sklearn.preprocessing import PolynomialFeatures  # IMPORTS POLYNOMIAL FEATURES

if USE_POLYNOMIALS:                                   # IF POLYNOMIALS ENABLED
    poly = PolynomialFeatures(degree=POLY_DEGREE, include_bias=False)  # DEFINES POLY TRANSFORMER
    print("POLYNOMIAL FEATURES ENABLED. DEGREE:", POLY_DEGREE)         # PRINTS STATUS
else:
    poly = None                                        # OTHERWISE NONE
    print("POLYNOMIAL FEATURES DISABLED.")             # PRINTS STATUS

POLYNOMIAL FEATURES ENABLED. DEGREE: 2


### BASELINE MODELS

In [None]:
# WE DEFINE BUILDERS FOR: LINEAR REGRESSION, KNN, SVR (WITH TARGET SCALING)             

def build_linear_pipeline() -> Pipeline:              # BUILDS LINEAR REGRESSION PIPELINE
    """RETURN A PIPELINE: PREPROCESSOR (+ OPTIONAL POLY) + LINEAR REGRESSION WITH TARGET SCALING."""  
    reg = LinearRegression()                          # BASE REGRESSOR
    if USE_TARGET_SCALING:                            # IF WE SCALE TARGETS
        reg = TransformedTargetRegressor(             # WRAP REGRESSOR
            regressor=reg, transformer=StandardScaler(with_mean=True, with_std=True)
        )                                             # CLOSES TTR
    steps = [("pre", preprocessor)]                   # STARTS STEPS WITH PREPROCESSOR
    if USE_POLYNOMIALS:                               # IF POLY IS ENABLED
        steps.append(("poly", poly))                  # ADD POLY STEP
    steps.append(("reg", reg))                        # ADD REGRESSOR STEP
    return Pipeline(steps=steps)                      # RETURNS FULL PIPELINE

def build_knn_pipeline(n_neighbors: int = 5, weights: str = "uniform", p: int = 2) -> Pipeline:  # BUILDS KNN PIPELINE
    """RETURN A PIPELINE WITH KNN HYPERPARAMETERS AND TARGET SCALING."""                
    base = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, p=p)             # DEFINES KNN
    if USE_TARGET_SCALING:                                                                # IF TARGET SCALING
        base = TransformedTargetRegressor(                                                # WRAP WITH TTR
            regressor=base, transformer=StandardScaler(with_mean=True, with_std=True)
        )                                                                                # CLOSES TTR
    steps = [("pre", preprocessor)]                                                      # STARTS PIPELINE
    steps.append(("reg", base))                                                          # ADDS REGRESSOR
    return Pipeline(steps=steps)                                                         # RETURNS

def build_svr_pipeline(C: float = 1.0, epsilon: float = 0.1, gamma: str = "scale", kernel: str = "rbf") -> Pipeline:  # BUILDS SVR PIPELINE
    """RETURN A PIPELINE WITH SVR (WRAPPED FOR MULTI-OUTPUT) AND TARGET SCALING."""     
    svr = SVR(C=C, epsilon=epsilon, gamma=gamma, kernel=kernel)                          # DEFINES SVR
    mo = MultiOutputRegressor(svr)                                                       # WRAPS INTO MULTI-OUTPUT
    if USE_TARGET_SCALING:                                                               # IF TARGET SCALING
        mo = TransformedTargetRegressor(                                                 # WRAP WITH TTR
            regressor=mo, transformer=StandardScaler(with_mean=True, with_std=True)
        )                                                                                # CLOSES TTR
    steps = [("pre", preprocessor)]                                                      # STARTS PIPELINE
    steps.append(("reg", mo))                                                            # ADDS REGRESSOR
    return Pipeline(steps=steps)                                                         # RETURNS

### CROSS-VALIDATION EVALUATION

In [19]:
# HELPER TO RUN K-FOLD CV AND RETURN MEAN/STD R2, PLUS FIT ON FULL TRAIN AND EVAL TEST     

def evaluate_sklearn_pipeline(name: str, pipe: Pipeline) -> Dict[str, Any]:  # DEFINES EVALUATION FUNCTION
    """FIT/VALIDATE A SKLEARN PIPELINE WITH K-FOLD AND TEST EVALUATION."""   
    # CROSS-VALIDATION R2 SCORES                                                     
    cv_scores = cross_val_score(pipe, X_train, y_train, cv=kf, scoring="r2", n_jobs=-1)  # RUNS CV
    pipe.fit(X_train, y_train)                                                    # FITS ON FULL TRAIN
    y_pred_test = pipe.predict(X_test)                                            # PREDICTS ON TEST
    metrics = regression_metrics(y_test, y_pred_test)                             # COMPUTES METRICS
    result = {                                                                    # BUILDS RESULT DICT
        "model": name,
        "cv_r2_mean": np.mean(cv_scores),
        "cv_r2_std": np.std(cv_scores),
        **metrics
    }                                                                              # CLOSES DICT
    print_metrics(name, metrics)                                                   # PRINTS TEST METRICS
    print(f"CV R2 MEAN: {result['cv_r2_mean']:.6f} | CV R2 STD: {result['cv_r2_std']:.6f}")  # PRINTS CV SCORES
    return result                                                                   # RETURNS RESULT

### RUN BASELINES

In [20]:
# EVALUATE LINEAR, KNN, SVR BEFORE HYPERPARAMETER TUNING               

results = []                                          # LIST TO COLLECT RESULTS

linear_pipe = build_linear_pipeline()                 # BUILDS LINEAR PIPELINE
results.append(evaluate_sklearn_pipeline("linear_regression", linear_pipe))  # EVALUATES AND STORES

knn_pipe_default = build_knn_pipeline()               # BUILDS DEFAULT KNN
results.append(evaluate_sklearn_pipeline("knn", knn_pipe_default))          # EVALUATES AND STORES

svr_pipe_default = build_svr_pipeline()               # BUILDS DEFAULT SVR
results.append(evaluate_sklearn_pipeline("svr", svr_pipe_default))          # EVALUATES AND STORES

=== LINEAR_REGRESSION METRICS ===
R2: 0.449399
MAE: 2.406996
MSE: 18.508140
RMSE: 4.302109
EXPLAINED_VARIANCE: 0.449923
CV R2 MEAN: 0.519301 | CV R2 STD: 0.016814
=== KNN METRICS ===
R2: 0.693464
MAE: 1.704627
MSE: 9.739654
RMSE: 3.120842
EXPLAINED_VARIANCE: 0.694920
CV R2 MEAN: 0.671529 | CV R2 STD: 0.037887
=== SVR METRICS ===
R2: 0.516149
MAE: 1.910214
MSE: 17.967715
RMSE: 4.238834
EXPLAINED_VARIANCE: 0.530218
CV R2 MEAN: 0.605805 | CV R2 STD: 0.037346


### HYPERPARAMETER TUNING (OPTUNA)

In [21]:
# WE WILL OPTIMIZE KNN AND SVR WITH OPTUNA (100 TRIALS EACH)                              

def objective_knn(trial: optuna.trial.Trial) -> float:      # DEFINES OPTUNA OBJECTIVE FOR KNN
    """OPTIMIZE KNN HYPERPARAMETERS USING CV R2 SCORE."""  
    n_neighbors = trial.suggest_int("n_neighbors", 1, 50)   # SUGGESTS N_NEIGHBORS
    weights = trial.suggest_categorical("weights", ["uniform", "distance"])           # SUGGESTS WEIGHTS
    p = trial.suggest_int("p", 1, 2)                        # SUGGESTS MINKOWSKI P (1=MANHATTAN, 2=EUCLIDEAN)
    pipe = build_knn_pipeline(n_neighbors=n_neighbors, weights=weights, p=p)          # BUILDS PIPELINE
    scores = cross_val_score(pipe, X_train, y_train, cv=kf, scoring="r2", n_jobs=-1)  # CV R2
    return scores.mean()                                    # RETURNS MEAN R2 (MAXIMIZE)

def objective_svr(trial: optuna.trial.Trial) -> float:             # DEFINES OPTUNA OBJECTIVE FOR SVR
    """OPTIMIZE SVR HYPERPARAMETERS USING CV R2 SCORE."""  
    C = trial.suggest_float("C", 1e-3, 1e3, log=True)              # SUGGESTS C ON LOG SCALE
    epsilon = trial.suggest_float("epsilon", 1e-4, 1.0, log=True)  # SUGGESTS EPSILON
    gamma = trial.suggest_categorical("gamma", ["scale", "auto"])  # SUGGESTS GAMMA MODE
    pipe = build_svr_pipeline(C=C, epsilon=epsilon, gamma=gamma, kernel="rbf")        # BUILDS PIPELINE
    scores = cross_val_score(pipe, X_train, y_train, cv=kf, scoring="r2", n_jobs=-1)  # CV R2
    return scores.mean()                                           # RETURNS MEAN R2

print("STARTING OPTUNA STUDY: KNN")                                     # PRINTS STATUS
study_knn = optuna.create_study(direction="maximize")                   # CREATES STUDY FOR KNN
study_knn.optimize(objective_knn, n_trials=OPTUNA_TRIALS, show_progress_bar=False)  # RUNS OPTIMIZATION
print("BEST KNN PARAMS:", study_knn.best_params)                        # PRINTS BEST PARAMS
best_knn_pipe = build_knn_pipeline(**study_knn.best_params)             # BUILDS PIPELINE WITH BEST PARAMS
results.append(evaluate_sklearn_pipeline("knn_optuna", best_knn_pipe))  # EVALUATES BEST KNN

print("STARTING OPTUNA STUDY: SVR")                   # PRINTS STATUS
study_svr = optuna.create_study(direction="maximize") # CREATES STUDY FOR SVR
study_svr.optimize(objective_svr, n_trials=OPTUNA_TRIALS, show_progress_bar=False)  # RUNS OPTIMIZATION
print("BEST SVR PARAMS:", study_svr.best_params)      # PRINTS BEST PARAMS
best_svr_pipe = build_svr_pipeline(**study_svr.best_params)                         # BUILDS PIPELINE WITH BEST PARAMS
results.append(evaluate_sklearn_pipeline("svr_optuna", best_svr_pipe))              # EVALUATES BEST SVR

[I 2025-07-26 23:52:01,981] A new study created in memory with name: no-name-e4253849-4f8c-4c0c-b382-636cd03f4e06
[I 2025-07-26 23:52:02,038] Trial 0 finished with value: 0.5825907986976862 and parameters: {'n_neighbors': 13, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 0.5825907986976862.
[I 2025-07-26 23:52:02,079] Trial 1 finished with value: 0.5680052808013749 and parameters: {'n_neighbors': 19, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 0.5825907986976862.
[I 2025-07-26 23:52:02,118] Trial 2 finished with value: 0.5409931046004661 and parameters: {'n_neighbors': 44, 'weights': 'distance', 'p': 2}. Best is trial 0 with value: 0.5825907986976862.
[I 2025-07-26 23:52:02,157] Trial 3 finished with value: 0.5416491940444963 and parameters: {'n_neighbors': 24, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 0.5825907986976862.


STARTING OPTUNA STUDY: KNN


[I 2025-07-26 23:52:02,197] Trial 4 finished with value: 0.5408686395853896 and parameters: {'n_neighbors': 43, 'weights': 'uniform', 'p': 1}. Best is trial 0 with value: 0.5825907986976862.
[I 2025-07-26 23:52:02,237] Trial 5 finished with value: 0.593712881564845 and parameters: {'n_neighbors': 21, 'weights': 'distance', 'p': 2}. Best is trial 5 with value: 0.593712881564845.
[I 2025-07-26 23:52:02,297] Trial 6 finished with value: 0.6297984230346662 and parameters: {'n_neighbors': 19, 'weights': 'uniform', 'p': 1}. Best is trial 6 with value: 0.6297984230346662.
[I 2025-07-26 23:52:02,366] Trial 7 finished with value: 0.593712881564845 and parameters: {'n_neighbors': 21, 'weights': 'distance', 'p': 2}. Best is trial 6 with value: 0.6297984230346662.
[I 2025-07-26 23:52:02,414] Trial 8 finished with value: 0.4959509460496525 and parameters: {'n_neighbors': 50, 'weights': 'uniform', 'p': 2}. Best is trial 6 with value: 0.6297984230346662.
[I 2025-07-26 23:52:02,455] Trial 9 finished w

BEST KNN PARAMS: {'n_neighbors': 4, 'weights': 'distance', 'p': 1}
=== KNN_OPTUNA METRICS ===
R2: 0.684948
MAE: 1.720068
MSE: 9.639338
RMSE: 3.104728
EXPLAINED_VARIANCE: 0.685374
CV R2 MEAN: 0.718549 | CV R2 STD: 0.032647
STARTING OPTUNA STUDY: SVR


[I 2025-07-26 23:52:07,119] Trial 0 finished with value: 0.556381179549897 and parameters: {'C': 0.2122437330378445, 'epsilon': 0.00015728574453492995, 'gamma': 'auto'}. Best is trial 0 with value: 0.556381179549897.
[I 2025-07-26 23:52:07,327] Trial 1 finished with value: 0.10629657591042736 and parameters: {'C': 0.003099916534216559, 'epsilon': 0.0002958494046746417, 'gamma': 'auto'}. Best is trial 0 with value: 0.556381179549897.
[I 2025-07-26 23:52:08,383] Trial 2 finished with value: 0.6767173309960556 and parameters: {'C': 18.336992565505327, 'epsilon': 0.004970170788465608, 'gamma': 'auto'}. Best is trial 2 with value: 0.6767173309960556.
[I 2025-07-26 23:52:08,917] Trial 3 finished with value: 0.6321018821178799 and parameters: {'C': 4.562930309646663, 'epsilon': 0.00019049910083025553, 'gamma': 'auto'}. Best is trial 2 with value: 0.6767173309960556.
[I 2025-07-26 23:52:09,104] Trial 4 finished with value: 0.4756408054870446 and parameters: {'C': 0.036512886618333656, 'epsilon

BEST SVR PARAMS: {'C': 999.38636448995, 'epsilon': 0.009473880959163336, 'gamma': 'scale'}
=== SVR_OPTUNA METRICS ===
R2: 0.808053
MAE: 1.063798
MSE: 5.750019
RMSE: 2.397920
EXPLAINED_VARIANCE: 0.809794
CV R2 MEAN: 0.799152 | CV R2 STD: 0.017849
