In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor
from GA_UQ1 import GA_UQ

def evaluate_methods(X_train, y_train, X_test, y_test):
    # ===== FUQ (GA + XGBoost) =====
    model_fuq = XGBRegressor()
    model_fuq.fit(X_train, y_train)
    y_pred_fuq = model_fuq.predict(X_test)
    r2_fuq = r2_score(y_test, y_pred_fuq)
    mse_fuq = mean_squared_error(y_test, y_pred_fuq)
    rmse_fuq = mse_fuq ** 0.5

    # FUQ bounds (via GA)
    solution, _, _ = GA_UQ(list(y_pred_fuq), list(y_test))
    a, b = solution
    lower_fuq = y_pred_fuq - b
    upper_fuq = y_pred_fuq + a
    picp_fuq = np.mean((y_test >= lower_fuq) & (y_test <= upper_fuq))
    mpiw_fuq = np.mean(upper_fuq - lower_fuq)

    # ===== Z-Score RMSE-Based Interval =====
    z_score = 1.96  # 95% confidence interval
    lower_z = y_pred_fuq - z_score * rmse_fuq
    upper_z = y_pred_fuq + z_score * rmse_fuq
    picp_z = np.mean((y_test >= lower_z) & (y_test <= upper_z))
    mpiw_z = np.mean(upper_z - lower_z)

    # ===== XGBoost Quantile Regression =====
    model_lower = XGBRegressor(
        objective='reg:quantileerror',
        quantile_alpha=0.05,
        n_estimators=100,
        learning_rate=0.1
    )

    model_upper = XGBRegressor(
        objective='reg:quantileerror',
        quantile_alpha=0.95,
        n_estimators=100,
        learning_rate=0.1
    )

    model_lower.fit(X_train, y_train)
    model_upper.fit(X_train, y_train)

    lower_xgb = model_lower.predict(X_test)
    upper_xgb = model_upper.predict(X_test)
    picp_xgb = np.mean((y_test >= lower_xgb) & (y_test <= upper_xgb))
    mpiw_xgb = np.mean(upper_xgb - lower_xgb)

    return {
        "R2_XGBRegressor": r2_fuq,
        "RMSE_XGBRegressor": rmse_fuq,
        "PICP_FUQ": picp_fuq,
        "MPIW_FUQ": mpiw_fuq,
        "PICP_XGB": picp_xgb,
        "MPIW_XGB": mpiw_xgb,
        "PICP_Z": picp_z,
        "MPIW_Z": mpiw_z
    }


## Boston Housing

In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the Boston Housing dataset
data = fetch_openml(data_id=531)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np

X_train, X_test, y_train_full, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

scaler_X = MinMaxScaler()

X_train_full = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Ensure they are NumPy arrays (in case loaded as pandas objects)
X_train_full = np.array(X_train_full)
y_train_full = np.array(y_train_full)
X_test = np.array(X_test)
y_test = np.array(y_test)


In [None]:
### 1. Noise Addition
n_runs=5
noise_levels = [0.0, 0.4, 0.8, 1.0]
noise_results = []
for noise_level in noise_levels:
    metrics = []
    for run in range(n_runs):
        np.random.seed(42 + run)
        noise = np.random.normal(0, noise_level * np.std(y_train_full), size=len(y_train_full))
        y_train_noisy = y_train_full + noise
        metrics.append(evaluate_methods(X_train_full, y_train_noisy, X_test, y_test))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Noise_Level'] = noise_level
    noise_results.append(avg)
df_noise_boston = pd.DataFrame(noise_results)
df_noise_boston

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Noise_Level
0,0.905784,2.628542,0.941176,9.354553,0.784314,7.977828,0.941176,10.303882,0.0
1,0.836785,3.452885,0.958824,13.751529,0.892157,12.040995,0.958824,13.535311,0.4
2,0.644344,5.103493,0.972549,22.420949,0.919608,19.617367,0.94902,20.005693,0.8
3,0.476583,6.192246,0.956863,26.150464,0.939216,23.831825,0.941176,24.273603,1.0


In [4]:
### 1. Noise Addition
noise_levels = [0.0, 0.4, 0.8, 1.0]
noise_results = []
for noise_level in noise_levels:
    metrics = []
    for run in range(n_runs):
        np.random.seed(42 + run)
        noise = np.random.normal(0, noise_level * np.std(y_train_full), size=len(y_train_full))
        y_train_noisy = y_train_full + noise
        noise2 = np.random.normal(0, noise_level * np.std(y_test), size=len(y_test))
        y_test_noisy = y_test + noise2
        metrics.append(evaluate_methods(X_train_full, y_train_noisy, X_test, y_test_noisy))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Noise_Level'] = noise_level
    noise_results.append(avg)
df_noise = pd.DataFrame(noise_results)
df_noise

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Noise_Level
0,0.905784,2.628542,0.941176,9.345658,0.784314,7.977828,0.941176,10.303882,0.0
1,0.692629,5.08197,0.980392,23.160454,0.733333,12.040995,0.968627,19.921326,0.4
2,0.328879,8.964745,0.980392,39.625868,0.741176,19.617367,0.954902,35.141804,0.8
3,0.170792,11.039832,0.929412,39.294763,0.733333,23.831825,0.960784,43.276142,1.0


In [5]:
### 2. Data Reduction
fractions = [1.0, 0.7, 0.5, 0.2]
reduction_results = []
for frac in fractions:
    metrics = []
    for run in range(n_runs):
        np.random.seed(42 + run)
        size = int(frac * len(X_train_full))
        idx = np.random.choice(len(X_train_full), size=size, replace=False)
        metrics.append(evaluate_methods(X_train_full[idx], y_train_full[idx], X_test, y_test))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Train_Fraction'] = frac
    reduction_results.append(avg)
df_reduction = pd.DataFrame(reduction_results)
df_reduction

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Train_Fraction
0,0.905784,2.628542,0.941176,9.353838,0.784314,7.977828,0.941176,10.303882,1.0
1,0.84983,3.28306,0.933333,10.064398,0.694118,7.978528,0.966667,12.869596,0.7
2,0.793935,3.880161,0.911765,10.652688,0.645098,8.21427,0.964706,15.21023,0.5
3,0.683317,4.811538,0.909804,12.927684,0.670588,12.158981,0.956863,18.861227,0.2


In [6]:
### 3. Output Coarsening
from sklearn.preprocessing import KBinsDiscretizer

# --- Coarsening setup ---
coarsening_levels = [1.0, 0.5, .25]  # 100%, 50%, 25% precision
n_runs = 5
coarsening_results = []

# --- Quantile coarsening ---
def quantile_coarsening_with_midpoints(y, n_bins):
    kbin = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='quantile')
    y_binned = kbin.fit_transform(y.reshape(-1, 1)).ravel()
    bin_edges = kbin.bin_edges_[0]
    # Replace each bin label with the midpoint of that bin
    y_midpoints = np.array([ (bin_edges[int(b)] + bin_edges[int(b)+1]) / 2 for b in y_binned ])
    return y_midpoints

# --- Run experiments ---
for c in coarsening_levels:
    metrics = []
    bins = max(2, int(15 * c))  # Keep bins reasonable
    for run in range(n_runs):
        np.random.seed(42 + run)
        y_train_coarse = quantile_coarsening_with_midpoints(y_train_full, n_bins=bins)
        y_test_coarse = quantile_coarsening_with_midpoints(y_test, n_bins=bins)
        metrics.append(evaluate_methods(X_train_full, y_train_coarse, X_test, y_test_coarse))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Coarsening_Factor'] = c
    coarsening_results.append(avg)

# --- Results ---
df_coarse = pd.DataFrame(coarsening_results)
df_coarse

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Coarsening_Factor
0,0.87653,2.84955,0.882353,9.843289,0.823529,27.076618,0.941176,11.170239,1.0
1,0.796097,3.94856,0.813725,9.434556,0.852941,26.423529,0.931373,15.47836,0.5
2,0.708213,5.532492,0.647059,7.003547,0.5,20.384434,0.911765,21.687361,0.25


In [7]:
print("\n=== Noise Addition ===")
print(df_noise)

print("\n=== Data Reduction ===")
print(df_reduction)

print("\n=== Output Coarsening ===")
print(df_coarse)



=== Noise Addition ===
   R2_XGBRegressor  RMSE_XGBRegressor  PICP_FUQ   MPIW_FUQ  PICP_XGB  \
0         0.905784           2.628542  0.941176   9.345658  0.784314   
1         0.692629           5.081970  0.980392  23.160454  0.733333   
2         0.328879           8.964745  0.980392  39.625868  0.741176   
3         0.170792          11.039832  0.929412  39.294763  0.733333   

    MPIW_XGB    PICP_Z     MPIW_Z  Noise_Level  
0   7.977828  0.941176  10.303882          0.0  
1  12.040995  0.968627  19.921326          0.4  
2  19.617367  0.954902  35.141804          0.8  
3  23.831825  0.960784  43.276142          1.0  

=== Data Reduction ===
   R2_XGBRegressor  RMSE_XGBRegressor  PICP_FUQ   MPIW_FUQ  PICP_XGB  \
0         0.905784           2.628542  0.941176   9.353838  0.784314   
1         0.849830           3.283060  0.933333  10.064398  0.694118   
2         0.793935           3.880161  0.911765  10.652688  0.645098   
3         0.683317           4.811538  0.909804  12.927684

## California Housing

In [8]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Fetch the California housing dataset
data = fetch_california_housing()

X_train, X_test, y_train_full, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)
scaler_X = MinMaxScaler()

X_train_full = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Ensure they are NumPy arrays (in case loaded as pandas objects)
X_train_full = np.array(X_train_full)
y_train_full = np.array(y_train_full)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [9]:
### 1. Noise Addition
noise_levels = [0.0, 0.4, .8,1]
noise_results = []
for noise_level in noise_levels:
    metrics = []
    for run in range(n_runs):
        np.random.seed(42 + run)
        noise = np.random.normal(0, noise_level * np.std(y_train_full), size=len(y_train_full))
        y_train_noisy = y_train_full + noise
        metrics.append(evaluate_methods(X_train_full, y_train_noisy, X_test, y_test))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Noise_Level'] = noise_level
    noise_results.append(avg)
df_noise = pd.DataFrame(noise_results)
df_noise

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Noise_Level
0,0.830137,0.471794,0.94501,1.940283,0.843266,1.407597,0.937984,1.849434,0.0
1,0.809032,0.500217,0.953343,2.136818,0.959787,2.032482,0.942878,1.960849,0.4
2,0.748783,0.573739,0.959012,2.526671,0.985804,3.247443,0.943459,2.249056,0.8
3,0.709645,0.61672,0.960804,2.733572,0.991376,3.909585,0.944719,2.417541,1.0


In [10]:
### 2. Data Reduction
fractions = [1.0, 0.5, 0.1, 0.05]
reduction_results = []
for frac in fractions:
    metrics = []
    for run in range(n_runs):
        np.random.seed(42 + run)
        size = int(frac * len(X_train_full))
        idx = np.random.choice(len(X_train_full), size=size, replace=False)
        metrics.append(evaluate_methods(X_train_full[idx], y_train_full[idx], X_test, y_test))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Train_Fraction'] = frac
    reduction_results.append(avg)
df_reduction = pd.DataFrame(reduction_results)
df_reduction

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Train_Fraction
0,0.830756,0.470931,0.949516,1.954091,0.844041,1.398899,0.941812,1.846049,1.0
1,0.81438,0.493186,0.948498,2.04748,0.831977,1.456414,0.940795,1.933291,0.5
2,0.742221,0.581159,0.941715,2.336394,0.821415,2.417529,0.938033,2.278144,0.1
3,0.690911,0.636262,0.937984,2.52323,0.764971,2.042935,0.936773,2.494145,0.05


In [11]:
### 3. Output Coarsening
from sklearn.preprocessing import KBinsDiscretizer

# --- Coarsening setup ---
coarsening_levels = [1.0, 0.5, 0.25]  # 100%, 50%, 25% precision
n_runs = 5
coarsening_results = []

# --- Quantile coarsening ---
def quantile_coarsening_with_midpoints(y, n_bins):
    kbin = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='quantile')
    y_binned = kbin.fit_transform(y.reshape(-1, 1)).ravel()
    bin_edges = kbin.bin_edges_[0]
    # Replace each bin label with the midpoint of that bin
    y_midpoints = np.array([ (bin_edges[int(b)] + bin_edges[int(b)+1]) / 2 for b in y_binned ])
    return y_midpoints

# --- Run experiments ---
for c in coarsening_levels:
    metrics = []
    bins = max(2, int(10 * c))  # Keep bins reasonable
    for run in range(n_runs):
        np.random.seed(42 + run)
        y_train_coarse = quantile_coarsening_with_midpoints(y_train_full, n_bins=bins)
        y_test_coarse = quantile_coarsening_with_midpoints(y_test, n_bins=bins)
        metrics.append(evaluate_methods(X_train_full, y_train_coarse, X_test, y_test_coarse))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Coarsening_Factor'] = c
    coarsening_results.append(avg)

# --- Results ---
df_coarse = pd.DataFrame(coarsening_results)
df_coarse

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_XGBRegressor,RMSE_XGBRegressor,PICP_FUQ,MPIW_FUQ,PICP_XGB,MPIW_XGB,PICP_Z,MPIW_Z,Coarsening_Factor
0,0.825338,0.463276,0.958576,2.059211,0.862161,3.021591,0.940649,1.816042,1.0
1,0.802561,0.502476,0.952859,2.244329,0.799903,2.987284,0.93532,1.969706,0.5
2,0.694387,0.670301,0.856444,1.893559,0.292393,1.897491,0.915698,2.627579,0.25


In [12]:
print("\n=== Noise Addition ===")
print(df_noise)

print("\n=== Data Reduction ===")
print(df_reduction)

print("\n=== Output Coarsening ===")
print(df_coarse)


=== Noise Addition ===
   R2_XGBRegressor  RMSE_XGBRegressor  PICP_FUQ  MPIW_FUQ  PICP_XGB  MPIW_XGB  \
0         0.830137           0.471794  0.945010  1.940283  0.843266  1.407597   
1         0.809032           0.500217  0.953343  2.136818  0.959787  2.032482   
2         0.748783           0.573739  0.959012  2.526671  0.985804  3.247443   
3         0.709645           0.616720  0.960804  2.733572  0.991376  3.909585   

     PICP_Z    MPIW_Z  Noise_Level  
0  0.937984  1.849434          0.0  
1  0.942878  1.960849          0.4  
2  0.943459  2.249056          0.8  
3  0.944719  2.417541          1.0  

=== Data Reduction ===
   R2_XGBRegressor  RMSE_XGBRegressor  PICP_FUQ  MPIW_FUQ  PICP_XGB  MPIW_XGB  \
0         0.830756           0.470931  0.949516  1.954091  0.844041  1.398899   
1         0.814380           0.493186  0.948498  2.047480  0.831977  1.456414   
2         0.742221           0.581159  0.941715  2.336394  0.821415  2.417529   
3         0.690911           0.636262

# Auto

In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml

# Load the Automobile MPG dataset from OpenML
#mpg = fetch_openml(name='autompg',version=3)
mpg = fetch_openml(data_id=42372)
# Convert to DataFrame
X = pd.DataFrame(mpg.data, columns=mpg.feature_names)
y = pd.DataFrame(mpg.target)

from sklearn.preprocessing import MinMaxScaler
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X)
X=X_train_scaled
y=np.array(y)

In [14]:
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_squared_error
from xgboost import XGBRegressor
import numpy as np

def train_and_report( X_train, y_train):
    kfold = KFold(n_splits=10, shuffle=True, random_state=42)

    model_fuq=LinearRegression()
    
    predictions_fuq = []
    lower_xgb = []
    upper_xgb = []
    actual_values=[]
    model_fuq=XGBRegressor()
    model_lower = XGBRegressor(
        objective='reg:quantileerror',
        quantile_alpha=0.025,
        n_estimators=100,
        learning_rate=0.1
        )

        # Upper quantile model (95th percentile)
    model_upper = XGBRegressor(
            objective='reg:quantileerror',
            quantile_alpha=0.975,
            n_estimators=100,
            learning_rate=0.1
        )
    for train_index, test_index in kfold.split(X_train):
        X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

        # Train FUQ model (e.g., Linear Regression)
        model_fuq.fit(X_train_fold, y_train_fold)
        y_pred_fuq = model_fuq.predict(X_test_fold)
        predictions_fuq.extend(y_pred_fuq)

        # 
        # ===== XGBoost Quantile Regression =====
        model_lower.fit(X_train_fold, y_train_fold)
        model_upper.fit(X_train_fold, y_train_fold)
        lower_xgb.extend(model_lower.predict(X_test_fold))
        upper_xgb.extend(model_upper.predict(X_test_fold))
        actual_values.extend(y_test_fold)

    predictions_fuq = np.array(predictions_fuq)
    actual_values = np.array(actual_values)
    lower_xgb=np.array(lower_xgb)
    upper_xgb=np.array(upper_xgb)
    
    # FUQ Evaluation
    r2_fuq = r2_score(actual_values, predictions_fuq)
    mse_fuq = mean_squared_error(actual_values, predictions_fuq)
    rmse_fuq=mse_fuq**.5
    z_score = 1.96  # 95% confidence interval
    lower_z = y_pred_fuq - z_score * rmse_fuq
    upper_z = y_pred_fuq + z_score * rmse_fuq
    picp_z = np.mean((y_test >= lower_z) & (y_test <= upper_z))
    mpiw_z = np.mean(upper_z - lower_z)
    
    solution, _, _ = GA_UQ(predictions_fuq, actual_values)
    a_ga, b_ga = solution
    lower_ga = predictions_fuq - b_ga
    upper_ga = predictions_fuq + a_ga
    picp_ga = np.mean((actual_values >= lower_ga) & (actual_values <= upper_ga))
    mpiw_ga = np.mean(upper_ga - lower_ga)

    picp_xgb = np.mean((y_test >= lower_xgb) & (y_test <= upper_xgb))
    mpiw_xgb = np.mean(upper_xgb - lower_xgb)

    result = {
        "R2_RandomForest": r2_fuq,
        "RMSE_RandomForest": mse_fuq**.5,
        "PICP_FUQ": picp_ga,
        "MPIW_FUQ": mpiw_ga,
        "a": a_ga,
        "b": b_ga,
        "PICP_XGB": picp_xgb,
        "MPIW_XGB": mpiw_xgb,
        "PICP_Z": picp_z,
        "MPIW_Z": mpiw_z
    }

    return result


In [15]:

# Using Linear Regression model with reduced data fractions

results=[]
n_runs=5

results = []
for frac in [1.0, 0.7,0.5, 0.2]:
    for run in range(n_runs):
        np.random.seed(41 + run)
        size = int(frac * len(X))
        idx = np.random.choice(len(X), size=size, replace=False)
        X_reduced = X[idx]
        y_reduced = y[idx]

    result = train_and_report( X_reduced, y_reduced)
    result['Train_Fraction']=frac
    results.append(result)

# Convert results to DataFrame for analysis
df_reduction = pd.DataFrame(results)
df_reduction


ValueError: operands could not be broadcast together with shapes (4128,) (39,) 

In [None]:
### 1. Noise Addition
noise_levels = [0.0, 0.4, .8,1]
y=np.ravel(y)
noise_results = []
for noise_level in noise_levels:
    metrics = []
    for run in range(n_runs):
        np.random.seed(42 + run)
        noise = np.random.normal(0, noise_level * np.std(y), size=len(y))
        y_train_noisy = y + noise
        metrics.append(train_and_report(X, y_train_noisy))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Noise_Level'] = noise_level
    noise_results.append(avg)
df_noise = pd.DataFrame(noise_results)
df_noise

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_FUQ,RMSE_FUQ,PICP_FUQ,MPIW_FUQ,a_GA,b_GA,R2_Bayes,RMSE_Bayes,PICP_Bayes,MPIW_Bayes,Noise_Level
0,0.802567,3.46361,0.951531,13.930699,7.254232,6.676466,0.80252,3.46402,0.931122,11.99074,0.0
1,0.693607,4.672574,0.964286,19.938791,10.036314,9.902477,0.693549,4.673019,0.914796,16.173716,0.4
2,0.495753,7.120705,0.967347,31.267711,15.432604,15.835107,0.495834,7.120182,0.910714,24.541178,0.8
3,0.40888,8.501449,0.966327,37.086426,18.385117,18.701309,0.40918,8.499386,0.904592,29.21445,1.0


In [None]:
coarsening_levels = [1.0, 0.5, 0.25,0.1]  # 100%, 50%, 25% precision
n_runs = 5
coarsening_results = []

# --- Quantile coarsening ---
def quantile_coarsening_with_midpoints(y, n_bins):
    kbin = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='quantile')
    y_binned = kbin.fit_transform(y.reshape(-1, 1)).ravel()
    bin_edges = kbin.bin_edges_[0]
    # Replace each bin label with the midpoint of that bin
    y_midpoints = np.array([ (bin_edges[int(b)] + bin_edges[int(b)+1]) / 2 for b in y_binned ])
    return y_midpoints

# --- Run experiments ---
for c in coarsening_levels:
    metrics = []
    bins = max(2, int(10 * c))  # Keep bins reasonable
    for run in range(n_runs):
        np.random.seed(42 + run)
        y_train_coarse = quantile_coarsening_with_midpoints(y, n_bins=bins)
        metrics.append(train_and_report(X, y_train_coarse))
    avg = pd.DataFrame(metrics).mean().to_dict()
    avg['Coarsening_Factor'] = c
    coarsening_results.append(avg)

# --- Results ---
df_coarse = pd.DataFrame(coarsening_results)
df_coarse

If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want to mutate any gene, please set mutation_type=None.
If you do not want t

Unnamed: 0,R2_FUQ,RMSE_FUQ,PICP_FUQ,MPIW_FUQ,a_GA,b_GA,R2_Bayes,RMSE_Bayes,PICP_Bayes,MPIW_Bayes,Coarsening_Factor
0,0.782288,3.831191,0.969388,16.748269,8.152397,8.595872,0.782107,3.832783,0.908163,13.267847,1.0
1,0.775583,4.237978,0.989796,20.754838,10.45275,10.302088,0.775576,4.238047,0.928571,14.703536,0.5
2,0.624167,5.76269,0.951531,27.013961,15.21744,11.796521,0.623854,5.765089,0.905612,19.9955,0.25
3,0.624167,5.76269,0.951531,27.021471,15.221164,11.800307,0.623854,5.765089,0.905612,19.9955,0.1
