# Imports

In [None]:
# %% Imports y configuración
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datetime import datetime
from pathlib import Path
import json
from pathlib import Path

# Agregar el directorio raíz al path
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), '..'))

# Imports del proyecto
from model_ddp.utils.sistem_fun import (
    load_config,
    get_data_path,
    get_artifact_path,
    get_report_path,
    create_experiment_id,
    ensure_directories,
    save_experiment_metadata
)

from model_ddp.simulations.gaussian_simulator import (
    SimulationConfig,
    RBFKernel,
    MaternKernel,
    PeriodicKernel,
    LinearKernel,
    GaussianProcess,
    RegressionSimulator,
    TransformationFunctions
)

# Modelos
from model_ddp.models.LSBP_laplace_v1 import LSBPLaplace
from model_ddp.models.LSBP_normal_v3 import LSBPNormal

# Modelos Random forest y Xgboost 
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# Metricas y graficas 
from model_ddp.fit.metrics import regression_metrics
from model_ddp.graphics.plots_regression import plot_regression_analysis
from model_ddp.graphics.plots_traces import plot_hyperparameter_traces
from model_ddp.graphics.plots_aplication import plot_credible_intervals

# Modulo pipeline
from model_ddp.pipelines.data_separacion import split_data

config=load_config()

# Omitir warnings
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")

# Experimento Real 

In [2]:
#Parametros Iniciales
NOMBRE_EJECUCION = "concrete_Data"
SIM_REAL = "real"

# Parámetros de ejecución de experimentos 
CARACTERISTICAS = "Aplicacion real estimacion de la compresion del concreto"
EXPERIMENT_ID = create_experiment_id("concrete_Data")

##################################################
# Registrar Experimento
##################################################

# Preparar información del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"✓ Experimento registrado en: {registry_file}")


✓ Experimento registrado en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\versioning\experiment_registry.md


## Datos 

In [5]:
# Encontrar la raíz del proyecto
current_dir = Path.cwd()
print(f"Directorio actual: {current_dir}")

# Buscar la carpeta "data" ascendiendo
project_root = current_dir
while not (project_root / "data").exists() and project_root.parent != project_root:
    project_root = project_root.parent

# Ruta al archivo Excel
excel_path = project_root / "data" / "reales" / "Concrete_Data.xls"  # MODIFICAR SEGÚN TU ESTRUCTURA

if excel_path.exists():
    # Usar read_excel para archivos .xls
    datos = pd.read_excel(excel_path)
    print(f"\n✅ Datos cargados: {len(datos)} filas")
    print(datos.head())
else:
    print("\n❌ Archivo no encontrado")
    print(f"Archivos disponibles en {excel_path.parent}:")
    if excel_path.parent.exists():
        for f in excel_path.parent.glob("*.*"):
            print(f"  • {f.name}")

Directorio actual: c:\Users\JuanFran\Desktop\git_tesis\model_ddp\notebooks\reales

✅ Datos cargados: 1030 filas
   Cement (component 1)(kg in a m^3 mixture)  \
0                                      540.0   
1                                      540.0   
2                                      332.5   
3                                      332.5   
4                                      198.6   

   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  \
0                                                0.0       
1                                                0.0       
2                                              142.5       
3                                              142.5       
4                                              132.4       

   Fly Ash (component 3)(kg in a m^3 mixture)  \
0                                         0.0   
1                                         0.0   
2                                         0.0   
3                                         

In [7]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Crear carpeta de guardado 
##################################################

# Nombre de la columna target (limpiando espacios)
target_col = "Concrete compressive strength(MPa, megapascals) "

# Covariables: todas las columnas excepto el target
feature_cols = [c for c in datos.columns if c.strip() != target_col]

X = datos[feature_cols].values
y = datos[target_col].values

# Separar data usando split_data (asumo que es tu función personalizada)
splits = split_data(
    X=X,
    y=y,
    test_size=0.2,
    val_size=None,      # None si no quieres validación
    random_state=123
)

# Nombres de columnas para DataFrame
feature_cols_df = [f'X{i+1}' for i in range(X.shape[1])]

# Train
data_train = pd.DataFrame(splits["X_train"], columns=feature_cols_df)
data_train["Y"] = splits["y_train"]

# Test
data_test = pd.DataFrame(splits["X_test"], columns=feature_cols_df)
data_test["Y"] = splits["y_test"]

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/data_train.csv"
data_train.to_csv(csv_filename, index=False)

print(f"✓ Datos guardados en CSV: {csv_filename}")

csv_filename = f"{carpeta_datos}/data_test.csv"
data_test.to_csv(csv_filename, index=False)

print(f"✓ Datos guardados en CSV: {csv_filename}")


✓ Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\reales\concrete_Data_20251227_213314/data_train.csv
✓ Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\reales\concrete_Data_20251227_213314/data_test.csv


## Modelos 

### Random Forest

In [9]:
# Separar features y target
X_train = data_train[feature_cols_df]  # DataFrame con nombres
y_train = data_train["Y"]

X_test = data_test[feature_cols_df]
y_test = data_test["Y"]

rf_model = RandomForestRegressor(
    n_estimators=500,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=3,
    random_state=123,
    n_jobs=-1,
    verbose=1   
)

# Entrenar el modelo
rf_model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:    1.5s finished


0,1,2
,"n_estimators  n_estimators: int, default=100 The number of trees in the forest. .. versionchanged:: 0.22  The default value of ``n_estimators`` changed from 10 to 100  in 0.22.",500
,"criterion  criterion: {""squared_error"", ""absolute_error"", ""friedman_mse"", ""poisson""}, default=""squared_error"" The function to measure the quality of a split. Supported criteria are ""squared_error"" for the mean squared error, which is equal to variance reduction as feature selection criterion and minimizes the L2 loss using the mean of each terminal node, ""friedman_mse"", which uses mean squared error with Friedman's improvement score for potential splits, ""absolute_error"" for the mean absolute error, which minimizes the L1 loss using the median of each terminal node, and ""poisson"" which uses reduction in Poisson deviance to find splits. Training using ""absolute_error"" is significantly slower than when using ""squared_error"". .. versionadded:: 0.18  Mean Absolute Error (MAE) criterion. .. versionadded:: 1.0  Poisson criterion.",'squared_error'
,"max_depth  max_depth: int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",10
,"min_samples_split  min_samples_split: int or float, default=2 The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and  `ceil(min_samples_split * n_samples)` are the minimum  number of samples for each split. .. versionchanged:: 0.18  Added float values for fractions.",5
,"min_samples_leaf  min_samples_leaf: int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and  `ceil(min_samples_leaf * n_samples)` are the minimum  number of samples for each node. .. versionchanged:: 0.18  Added float values for fractions.",3
,"min_weight_fraction_leaf  min_weight_fraction_leaf: float, default=0.0 The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.",0.0
,"max_features  max_features: {""sqrt"", ""log2"", None}, int or float, default=1.0 The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and  `max(1, int(max_features * n_features_in_))` features are considered at each  split. - If ""sqrt"", then `max_features=sqrt(n_features)`. - If ""log2"", then `max_features=log2(n_features)`. - If None or 1.0, then `max_features=n_features`. .. note::  The default of 1.0 is equivalent to bagged trees and more  randomness can be achieved by setting smaller values, e.g. 0.3. .. versionchanged:: 1.1  The default of `max_features` changed from `""auto""` to 1.0. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.",1.0
,"max_leaf_nodes  max_leaf_nodes: int, default=None Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.",
,"min_impurity_decrease  min_impurity_decrease: float, default=0.0 A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following::  N_t / N * (impurity - N_t_R / N_t * right_impurity  - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19",0.0
,"bootstrap  bootstrap: bool, default=True Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree.",True


### XGBoost

In [10]:
# Separar features y target
X_train = data_train[feature_cols_df]
y_train = data_train["Y"]

X_test = data_test[feature_cols_df]
y_test = data_test["Y"]

# Crear el modelo
xgb_model = XGBRegressor(
    n_estimators=500,        # Número de árboles
    max_depth=6,             # Profundidad máxima de cada árbol
    learning_rate=0.1,       # Tasa de aprendizaje (shrinkage)
    subsample=0.8,           # Fracción de muestras para cada árbol
    colsample_bytree=0.8,    # Fracción de features para cada árbol
    min_child_weight=3,      # Similar a min_samples_leaf
    gamma=0,                 # Regularización por reducción mínima de pérdida
    reg_alpha=0,             # L1 regularization
    reg_lambda=1,            # L2 regularization
    random_state=123,
    n_jobs=-1,
    verbosity=1              # Equivalente a verbose
)

# Entrenar el modelo
xgb_model.fit(X_train, y_train)

0,1,2
,"objective  objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType] Specify the learning task and the corresponding learning objective or a custom objective function to be used. For custom objective, see :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more information, along with the end note for function signatures.",'reg:squarederror'
,"base_score  base_score: typing.Union[float, typing.List[float], NoneType] The initial prediction score of all instances, global bias.",
,booster,
,"callbacks  callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]] List of callback functions that are applied at end of each iteration. It is possible to use predefined callbacks by using :ref:`Callback API `. .. note::  States in callback are not preserved during training, which means callback  objects can not be reused for multiple training sessions without  reinitialization or deepcopy. .. code-block:: python  for params in parameters_grid:  # be sure to (re)initialize the callbacks before each run  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]  reg = xgboost.XGBRegressor(**params, callbacks=callbacks)  reg.fit(X, y)",
,colsample_bylevel  colsample_bylevel: typing.Optional[float] Subsample ratio of columns for each level.,
,colsample_bynode  colsample_bynode: typing.Optional[float] Subsample ratio of columns for each split.,
,colsample_bytree  colsample_bytree: typing.Optional[float] Subsample ratio of columns when constructing each tree.,0.8
,"device  device: typing.Optional[str] .. versionadded:: 2.0.0 Device ordinal, available options are `cpu`, `cuda`, and `gpu`.",
,"early_stopping_rounds  early_stopping_rounds: typing.Optional[int] .. versionadded:: 1.6.0 - Activates early stopping. Validation metric needs to improve at least once in  every **early_stopping_rounds** round(s) to continue training. Requires at  least one item in **eval_set** in :py:meth:`fit`. - If early stopping occurs, the model will have two additional attributes:  :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the  :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal  number of trees during inference. If users want to access the full model  (including trees built after early stopping), they can specify the  `iteration_range` in these inference methods. In addition, other utilities  like model plotting can also use the entire model. - If you prefer to discard the trees after `best_iteration`, consider using the  callback function :py:class:`xgboost.callback.EarlyStopping`. - If there's more than one item in **eval_set**, the last entry will be used for  early stopping. If there's more than one metric in **eval_metric**, the last  metric will be used for early stopping.",
,enable_categorical  enable_categorical: bool See the same parameter of :py:class:`DMatrix` for details.,False


### LSBP Normal

In [14]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPNormal...")
print("="*60)

# Crear instancia del modelo
lsbp_model_normal = LSBPNormal(
    y=data_train["Y"].values,
    X=data_train.drop(columns=["Y"]).values,
    H=20,                     # Número inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model_normal.run(
    iterations=2000,          # Iteraciones totales
    burnin=500               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPNormal...
Using C++ acceleration
Iter 100/2000: K_eff=13, H=100, μ=1.64, μ₀=0.42, κ₀=0.04, a₀=20.00, b₀=0.53
  Acceptance: α=0.89, ψ=0.85, κ=0.68, a=0.55
Iter 200/2000: K_eff=10, H=100, μ=2.45, μ₀=0.52, κ₀=0.06, a₀=20.00, b₀=0.47
  Acceptance: α=0.76, ψ=0.83, κ=0.76, a=0.49
Iter 300/2000: K_eff=13, H=100, μ=3.11, μ₀=0.35, κ₀=0.04, a₀=20.00, b₀=0.50
  Acceptance: α=0.72, ψ=0.75, κ=0.75, a=0.54
Iter 400/2000: K_eff=10, H=100, μ=3.22, μ₀=0.38, κ₀=0.04, a₀=20.00, b₀=0.46
  Acceptance: α=0.71, ψ=0.63, κ=0.69, a=0.52
Iter 500/2000: K_eff=15, H=100, μ=3.54, μ₀=0.13, κ₀=0.03, a₀=20.00, b₀=0.44
  Acceptance: α=0.67, ψ=0.64, κ=0.47, a=0.42
Iter 600/2000: K_eff=16, H=100, μ=4.02, μ₀=0.20, κ₀=0.03, a₀=20.00, b₀=0.46
  Acceptance: α=0.61, ψ=0.58, κ=0.42, a=0.57
Iter 700/2000: K_eff=13, H=100, μ=3.78, μ₀=0.30, κ₀=0.03, a₀=20.00, b₀=0.49
  Acceptance: α=0.63, ψ=0.66, κ=0.50, a=0.53
Iter 800/2000: K_eff=19, H=100, μ=3.70, μ₀=-0.10, κ₀=0.03, a₀=20.00, b₀=0.42
  Acceptance: α=0.67, ψ=0.5

### LSBP Laplace

In [17]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model_laplace = LSBPLaplace(
    y=data_train["Y"].values,
    X=data_train.drop(columns=["Y"]).values,
    H=20,                     # Número inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model_laplace.run(
    iterations=2000,          # Iteraciones totales
    burnin=500               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPLaplace...
Using C++ acceleration for 8 functions (compute_eta, compute_weights, update_lambda_latent, update_assignments, update_atoms, update_alpha, update_psi, update_ell)
Iter 100/2000: K_eff=69, H=100, μ=1.90, μ₀=0.01, τ₀=0.81, a₀=0.83, β₀=24.64
  Acceptance: α=0.75, ψ=0.84, τ=0.73, a=0.58
Iter 200/2000: K_eff=89, H=100, μ=3.53, μ₀=-0.13, τ₀=0.62, a₀=0.77, β₀=30.76
  Acceptance: α=0.64, ψ=0.68, τ=0.60, a=0.60
Iter 300/2000: K_eff=85, H=100, μ=4.64, μ₀=0.06, τ₀=0.56, a₀=0.63, β₀=21.04
  Acceptance: α=0.63, ψ=0.60, τ=0.45, a=0.29
Iter 400/2000: K_eff=89, H=100, μ=5.86, μ₀=-0.05, τ₀=0.57, a₀=0.67, β₀=24.27
  Acceptance: α=0.61, ψ=0.64, τ=0.40, a=0.29
Iter 500/2000: K_eff=81, H=100, μ=5.27, μ₀=0.15, τ₀=0.71, a₀=0.72, β₀=30.02
  Acceptance: α=0.58, ψ=0.50, τ=0.27, a=0.33
Iter 600/2000: K_eff=87, H=100, μ=6.90, μ₀=0.29, τ₀=0.63, a₀=0.85, β₀=29.90
  Acceptance: α=0.60, ψ=0.49, τ=0.45, a=0.26
Iter 700/2000: K_eff=89, H=100, μ=6.29, μ₀=-0.02, τ₀=0.83, a₀=0.77, β₀=39.55
  Ac

### PSBP Normal

In [25]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO PSBPNormal...")
print("="*60)

# Crear instancia del modelo
psbp_model = PSBPNormal(
    y=data_train["Y"].values,
    X=data_train.drop(columns=["Y"]).values,
    H=15,                     # Número inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = psbp_model.run(
    iterations=2000,          # Iteraciones totales
    burnin=500               # Burn-in
)

print("\n" + "="*60)
print("PSBP COMPLETADO")
print("="*60)


EJECUTANDO PSBPNormal...
Iter 100/2000: K_eff=2, H=102, μ=-0.07, μ₀=67.18, κ₀=0.69, a₀=15.15, b₀=2653.14
Iter 200/2000: K_eff=2, H=102, μ=0.10, μ₀=55.13, κ₀=3.34, a₀=20.00, b₀=2384.84
Iter 300/2000: K_eff=2, H=102, μ=-0.06, μ₀=59.05, κ₀=0.48, a₀=20.00, b₀=3043.61
Iter 400/2000: K_eff=2, H=102, μ=0.05, μ₀=66.29, κ₀=0.36, a₀=20.00, b₀=1642.59
Iter 500/2000: K_eff=2, H=102, μ=0.10, μ₀=75.69, κ₀=0.16, a₀=20.00, b₀=2053.35
Iter 600/2000: K_eff=3, H=102, μ=-0.00, μ₀=53.95, κ₀=0.31, a₀=20.00, b₀=1841.95
Iter 700/2000: K_eff=2, H=102, μ=-0.17, μ₀=32.32, κ₀=0.26, a₀=19.57, b₀=1527.71
Iter 800/2000: K_eff=2, H=102, μ=-0.12, μ₀=52.23, κ₀=0.10, a₀=20.00, b₀=1883.18
Iter 900/2000: K_eff=2, H=102, μ=-0.01, μ₀=62.22, κ₀=0.24, a₀=20.00, b₀=1806.48
Iter 1000/2000: K_eff=2, H=102, μ=-0.17, μ₀=30.71, κ₀=0.47, a₀=20.00, b₀=1986.72
Iter 1100/2000: K_eff=2, H=102, μ=0.03, μ₀=42.32, κ₀=0.21, a₀=20.00, b₀=2022.75
Iter 1200/2000: K_eff=2, H=102, μ=-0.08, μ₀=45.84, κ₀=0.98, a₀=20.00, b₀=2302.95
Iter 1300/2000:

## Evaluar 

### Random Forest

In [20]:
########################################################
# Hacer predicciones con Random Forest
########################################################

# Predicciones
y_pred = rf_model.predict(X_test)
y_true = y_test.values               # Convertimos a numpy array para consistencia

# Calcular métricas
metrics = regression_metrics(y_true, y_pred)

print("\n📊 MÉTRICAS DE AJUSTE RANDOM FOREST:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta para guardar reportes
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar métricas en JSON
metrics_file = carpeta_reportes / "metrics_rf_model.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n✓ Métricas guardadas: {metrics_file}")

########################################################
# Predicciones completas para gráficas
########################################################
# Desviación estándar de los árboles individuales
all_tree_preds = pd.DataFrame([tree.predict(X_test) for tree in rf_model.estimators_])
y_pred_std = all_tree_preds.std(axis=0).values

predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred,
    'residual_std': (y_true - y_pred) / (y_pred_std + 1e-8)  # Evitar división por 0
})

##################################################
# Gráficas de Fit 
##################################################
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}_RandomForest"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gráficas usando el módulo
splits = [
    (y_true, y_pred, "Test Set")
]
plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="RandomForest"
)
print(f"✓ Gráficas guardadas en: {carpeta_graficas}")


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 442 tasks      | elapsed:    0.4s
[Parallel(n_jobs=4)]: Done 500 out of 500 | elapsed:    0.5s finished



📊 MÉTRICAS DE AJUSTE RANDOM FOREST:
------------------------------------------------------------
  MSE     :   0.022696
  RMSE    :   0.150653
  MAE     :   0.069044
  R2      :   0.999917
  MAPE    :   0.204373
------------------------------------------------------------

✓ Métricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314\metrics_rf_model.json
✓ Gráficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314_RandomForest


### XGBoost

In [13]:
########################################################
# Hacer predicciones con XGBoost
########################################################
y_pred = xgb_model.predict(data_test[feature_cols_df].values)
y_true = data_test["Y"].values

# Calcular métricas
metrics = regression_metrics(y_true, y_pred)

print("\n📊 MÉTRICAS DE AJUSTE XGBOOST:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta para guardar reportes
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar métricas en JSON
metrics_file = carpeta_reportes / "metrics_xgb_model.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n✓ Métricas guardadas: {metrics_file}")

########################################################
# Predicciones completas para gráficas
########################################################
# Para XGBoost no tenemos std directamente, se puede poner NaN o 0
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred,
    'y_pred_std': 0,                # XGBoost no entrega std directamente
    'residual': y_true - y_pred,
    'residual_std': 0                # O calcular Z-score si quieres
})

##################################################
# Gráficas de Fit 
##################################################
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}_XGBoost"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gráficas usando el módulo
splits = [
    (y_true, y_pred, "Test Set")
]
plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="XGBoost"
)
print(f"✓ Gráficas guardadas en: {carpeta_graficas}")



📊 MÉTRICAS DE AJUSTE XGBOOST:
------------------------------------------------------------
  MSE     :   0.646500
  RMSE    :   0.804052
  MAE     :   0.453555
  R2      :   0.997644
  MAPE    :   1.480675
------------------------------------------------------------

✓ Métricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314\metrics_xgb_model.json
✓ Gráficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314_XGBoost


### LSBP Normal

In [30]:
########################################################
# Hacer predicciones con el modelo entrenado
########################################################
y_pred_mean, y_pred_std = lsbp_model_normal.predict_mean(
    X_new=data_test.drop(columns=["Y"]).values,
    n_samples=1000
)
y_true = data_test["Y"].values

# Calcular métricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\n📊 MÉTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar métricas en JSON
metrics_file = carpeta_reportes / "metrics_lsbp_model_normal.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n✓ Métricas guardadas: {metrics_file}")

########################################################
# Predicciones completas para generar graficas
########################################################
# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})

##################################################
# Gráficas de Fit 
##################################################

# Crear carpeta para gráficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}_LSBP_Normal"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gráficas usando el módulo
splits = [
    (y_true, y_pred_mean, "Test Set")
]
plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP_Normal"
)
print(f"✓ Gráficas guardadas en: {carpeta_graficas}")

##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'μ (Intercepto stick-breaking)'),
    ('mu0', 'μ₀ (Media base)'),
    ('kappa0', 'κ₀ (Precisión relativa)'),
    ('a0', 'a₀ (Shape σ²)'),
    ('b0', 'b₀ (Scale σ²)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP KN"
)

##################################################
# Grafica de intervalo prediccion media 
##################################################

# Versión simple 
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"✓ Todas las gráficas guardadas en: {carpeta_graficas}") 


📊 MÉTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     :   6.752447
  RMSE    :   2.598547
  MAE     :   1.515807
  R2      :   0.975396
  MAPE    :   5.319070
------------------------------------------------------------

✓ Métricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314\metrics_lsbp_model_normal.json
✓ Gráficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314_LSBP_Normal


  plt.tight_layout()
  plt.savefig(output_path, dpi=300, bbox_inches='tight')


✓ Todas las gráficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314_LSBP_Normal


### LSBP Laplace

In [31]:
########################################################
# Hacer predicciones con el modelo entrenado
########################################################
y_pred_mean, y_pred_std = lsbp_model_laplace.predict_mean(
    X_new=data_test.drop(columns=["Y"]).values,
    n_samples=1000
)
y_true = data_test["Y"].values

# Calcular métricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\n📊 MÉTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar métricas en JSON
metrics_file = carpeta_reportes / "metrics_lsbp_model_laplace.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n✓ Métricas guardadas: {metrics_file}")

########################################################
# Predicciones completas para generar graficas
########################################################
# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})

##################################################
# Gráficas de Fit 
##################################################
# Crear carpeta para gráficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}_LSBP_Laplace"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gráficas usando el módulo
splits = [
    (y_true, y_pred_mean, "Test Set")
]
plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP_Laplace"
)
print(f"✓ Gráficas guardadas en: {carpeta_graficas}")

##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'μ (Intercepto stick-breaking)'),
    ('mu0', 'μ₀ (Media base)'),
    ('tau0', 'τ₀ (Precisión)'),  # CORREGIDO
    ('a0', 'a₀ (Shape b)'),
    ('beta0', 'β₀ (Scale b)'),    # CORREGIDO
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versión simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"✓ Todas las gráficas guardadas en: {carpeta_graficas}") 



📊 MÉTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     :  48.704407
  RMSE    :   6.978854
  MAE     :   5.409517
  R2      :   0.822534
  MAPE    :  20.265105
------------------------------------------------------------

✓ Métricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314\metrics_lsbp_model_laplace.json
✓ Gráficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314_LSBP_Laplace


  plt.tight_layout()
  plt.savefig(output_path, dpi=300, bbox_inches='tight')


✓ Todas las gráficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\reales\concrete_Data_20251227_213314_LSBP_Laplace
