# Imports

In [1]:
# %% Imports y configuraci√≥n
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datetime import datetime
from pathlib import Path
import json

# Agregar el directorio ra√≠z al path
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), '..'))

# Imports del proyecto
from model_ddp.utils.sistem_fun import (
    load_config,
    get_data_path,
    get_artifact_path,
    get_report_path,
    create_experiment_id,
    ensure_directories,
    save_experiment_metadata
)

from model_ddp.simulations.gaussian_simulator import (
    RegressionSimulator,
    SimulationConfig,
    RBFKernel,
    MaternKernel,
    PeriodicKernel,
    TransformationFunctions
)

# Modelos
from model_ddp.models.DDPL2_v1 import DDPLinearSpline2

# Metricas y graficas 
from model_ddp.fit.metrics import regression_metrics
from model_ddp.graphics.plots_regression import plot_regression_analysis
from model_ddp.graphics.plots_traces import plot_hyperparameter_traces
from model_ddp.graphics.plots_aplication import plot_credible_intervals

config=load_config()

M√≥dulo C++ cargado exitosamente


Los experimentos realizados:
- 1 Feature Linear (n=200) -> 1200 (200 burn)
- 2 Feature Linear (n=200) -> 1200 (200 burn)
- 2 Feature Cuadraticos (n=400) -> 1200 (200 burn)
- 3 Feature Linear, variar Kernel Mater (n=800) ->2400 (400 burn)

Busco evaluar el tiempo de desempe√±o de la clase LSBP para el caso normal y las mejoras que realice en C++, aqui podra ver como aplicar la mayor parte de las funciones.

In [None]:
# 1 Feature Linear (n=200) -> 1200 (200 burn)
# 2 Feature Linear (n=200) -> 1200 (200 burn)
# 2 Feature Cuadraticos (n=400) -> 1200 (200 burn)
# 3 Feature Linear, variar Kernel Mater (n=800) ->2400 (400 burn)

# Experimento I 

In [2]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_ddp2_001"
SIM_REAL = "simulation"

In [3]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "1 Feature Linear (n=200) -> 1200 (200 burn)"
EXPERIMENT_ID = create_experiment_id("model_ddp2_001_exp_01")

In [4]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


‚úì Experimento registrado en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\versioning\experiment_registry.md


In [5]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=200,
    n_features=1,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = RBFKernel(
    length_scale=5.0,
    variance=1.0
)


transformation = TransformationFunctions.linear(
    coefficients= [2],
    intercept=[10]
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

Generando datos...
‚úì Datos generados exitosamente

Estad√≠sticas de X:
  Shape: (200, 1)
  Media por feature: [0.35866669]
  Std por feature: [0.82864792]

Estad√≠sticas de Y:
  Shape: (200,)
  Media: 10.7327
  Std: 1.6713
  Min: 6.4454
  Max: 13.9424
‚úì Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\simulaciones\model_ddp2_001_exp_01_20251227_205530/_data.csv


### Modelo

In [6]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

model_ddp2 = DDPLinearSpline2(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=10,
    n_knots=2,
    degree=2,
    nu_Psi=10,
    Omega_Psi=0.1 * np.eye(6),
    standardize_splines=True,  # ‚Üê ACTIVAR ESTANDARIZACI√ìN
    use_cpp=True,
    seed=42,
    verbose=True
)

model_ddp2.fit(n_iter=100, burn_in=20, thin=1)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPLaplace...
Usando aceleraci√≥n C++ (seed=42)
  Splines estandarizadas: mean=0.200, std=0.222
Inicializando DDPLinearSpline2 con estandarizaci√≥n robusta
  n=200, p=1, H=10
  Grado spline=2, knots internos=2
  Dimensi√≥n expandida K=6
  Estandarizaci√≥n de splines: True
Inicializando par√°metros...
Inicializaci√≥n completa. Clusters activos: 8

Iniciando MCMC: 100 iteraciones
  Burn-in: 20, Thin: 1


LinAlgError: 6-th leading minor of the array is not positive definite

### Guardado Modelo

In [None]:
##################################################
#  Guardar Modelo (DDPLinearSpline2)
##################################################
# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (excluyendo cpp_core que no es serializable)
model_file = carpeta_modelo / "ddp_model.pkl"

# Temporalmente remover cpp_core para serializaci√≥n
cpp_core_backup = model_ddp2.cpp_core if hasattr(model_ddp2, 'cpp_core') else None
use_cpp_backup = model_ddp2.use_cpp

# Desactivar referencia a C++
if hasattr(model_ddp2, 'cpp_core'):
    model_ddp2.cpp_core = None
model_ddp2.use_cpp = False

try:
    with open(model_file, 'wb') as f:
        pickle.dump(model_ddp2, f)
    print(f"‚úì Modelo guardado: {model_file}")
finally:
    # Restaurar cpp_core
    if cpp_core_backup is not None:
        model_ddp2.cpp_core = cpp_core_backup
    model_ddp2.use_cpp = use_cpp_backup

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(model_ddp2.trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = model_ddp2.get_cluster_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tipos numpy a tipos nativos de Python
    summary_json = {
        'n_clusters_mean': float(summary['n_clusters_mean']),
        'n_clusters_std': float(summary['n_clusters_std']),
        'n_clusters_mode': int(summary['n_clusters_mode']),
        'cluster_sizes': {int(k): int(v) for k, v in summary['cluster_sizes'].items()},
        'concentration_mean': float(summary['concentration_mean']),
        'concentration_std': float(summary['concentration_std'])
    }
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'DDPLinearSpline2',
    'data_shape': {
        'n': model_ddp2.n,
        'p': model_ddp2.p
    },
    'hyperparameters': {
        'H_initial': 10,  # El H inicial que usaste
        'H_final': model_ddp2.H,
        'iterations': 100,
        'burnin': 20,
        'thin': 1,
        'degree': model_ddp2.degree,
        'n_knots': model_ddp2.n_knots,
        'K_expanded': model_ddp2.K,
        'standardize_splines': model_ddp2.standardize_splines
    },
    'priors': {
        'mu_m': model_ddp2.mu_m.tolist(),
        'Sigma_m_shape': model_ddp2.Sigma_m.shape,
        'alpha_kappa': model_ddp2.alpha_kappa,
        'beta_kappa': model_ddp2.beta_kappa,
        'alpha_nu': model_ddp2.alpha_nu,
        'beta_nu': model_ddp2.beta_nu,
        'nu_Psi': model_ddp2.nu_Psi,
        'Omega_Psi_shape': model_ddp2.Omega_Psi.shape,
        'alpha_aM': model_ddp2.alpha_aM,
        'beta_aM': model_ddp2.beta_aM,
        'alpha_bM': model_ddp2.alpha_bM,
        'beta_bM': model_ddp2.beta_bM
    },
    'final_stats': {
        'H_final': model_ddp2.H,
        'n_clusters_mean': float(summary['n_clusters_mean']),
        'n_clusters_std': float(summary['n_clusters_std']),
        'n_clusters_mode': int(summary['n_clusters_mode']),
        'M_mean': float(summary['concentration_mean']),
        'M_std': float(summary['concentration_std'])
    },
    'n_posterior_samples': len(model_ddp2.trace['z'])
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n y estandarizaci√≥n
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': model_ddp2.y_mean,
    'y_std': model_ddp2.y_std,
    'X_mean': model_ddp2.X_mean,
    'X_std': model_ddp2.X_std,
    'spline_mean': model_ddp2.spline_bases['spline_mean'],
    'spline_std': model_ddp2.spline_bases['spline_std'],
    'knots': model_ddp2.spline_bases['knots'],
    'K_j': model_ddp2.spline_bases['K_j']
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

# 6. Guardar estad√≠sticas de las trazas MCMC
trace_stats = {
    'n_samples': len(model_ddp2.trace['z']),
    'parameters_tracked': list(model_ddp2.trace.keys()),
    'M_trace': {
        'mean': float(np.mean(model_ddp2.trace['M'])),
        'std': float(np.std(model_ddp2.trace['M'])),
        'min': float(np.min(model_ddp2.trace['M'])),
        'max': float(np.max(model_ddp2.trace['M']))
    },
    'n_clusters_trace': {
        'mean': float(np.mean(model_ddp2.trace['n_clusters'])),
        'std': float(np.std(model_ddp2.trace['n_clusters'])),
        'min': int(np.min(model_ddp2.trace['n_clusters'])),
        'max': int(np.max(model_ddp2.trace['n_clusters']))
    }
}

trace_stats_file = carpeta_modelo / "trace_stats.json"
with open(trace_stats_file, 'w') as f:
    json.dump(trace_stats, f, indent=2)
print(f"‚úì Estad√≠sticas de trazas guardadas: {trace_stats_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)


GUARDANDO MODELO Y RESULTADOS...
‚úì Modelo guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\model_ddp2_001_exp_01_20251227_203753\ddp_model.pkl
‚úì Trazas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\model_ddp2_001_exp_01_20251227_203753\trace.pkl
‚úì Resumen posterior guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\model_ddp2_001_exp_01_20251227_203753\posterior_summary.json
‚úì Metadatos guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\model_ddp2_001_exp_01_20251227_203753\metadata.json
‚úì Datos de normalizaci√≥n guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\model_ddp2_001_exp_01_20251227_203753\normalization.pkl
‚úì Estad√≠sticas de trazas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\model_ddp2_001_exp_01_20251227_203753\trace_stats.json

MODELO GUARDA

### Predicciones, grafica de Predicciones y guardado

In [18]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = model_ddp2.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=None
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})

predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)

# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="DDP Linear Spline"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")


GENERANDO PREDICCIONES...


  sigma2_components[h] = np.exp(log_sigma2_h)



üìä M√âTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     : 1935787489804005966875057769925876964046515271701481309812783219499097013382289809995646965327286958333065092056363749908178225777830602603504274818268702508568214113760651588576810547055410261022505697280.000000
  RMSE    : 1391325802895930652926942623776430609001608484955876072078673479383052785559339215107700395821888438272.000000
  MAE     : 1250994784574468811130408089074711463878724016390705087343158392080339540128343391193746329956127866880.000000
  R2      : -693016473534011689477852144695165331322118837323427463800551793022132874761590645480671925666278661288464641886519468725513774558848560817165788247528213089719520028909167624029125690021300330163731431424.000000
  MAPE    : 11499207886114669023007766548807646301599730177218543250616201602136173956200824995249487563817440247808.000000
------------------------------------------------------------

‚úì M√©tricas guardadas: C:\Us

LinAlgError: Singular matrix

Error in callback <function _draw_all_if_interactive at 0x000002A3B8600D60> (for post_execute), with arguments args (),kwargs {}:


LinAlgError: Singular matrix

LinAlgError: Singular matrix

<Figure size 600x1000 with 2 Axes>

In [19]:
# Agrega esto ANTES de las predicciones para diagnosticar:
print("\nüîç DIAGN√ìSTICO DEL MODELO:")
print(f"  y_mean: {model_ddp2.y_mean:.4f}")
print(f"  y_std: {model_ddp2.y_std:.4f}")
print(f"  √öltimos lambda_h:")
print(f"    min: {model_ddp2.lambda_h.min():.4f}")
print(f"    max: {model_ddp2.lambda_h.max():.4f}")
print(f"    mean: {model_ddp2.lambda_h.mean():.4f}")
print(f"  √öltimos xi_h:")
print(f"    min: {model_ddp2.xi_h.min():.4f}")
print(f"    max: {model_ddp2.xi_h.max():.4f}")
print(f"    mean: {model_ddp2.xi_h.mean():.4f}")
print(f"  Sigma_lambda:")
print(f"    trace: {np.trace(model_ddp2.Sigma_lambda):.4f}")
print(f"    max eigenvalue: {np.linalg.eigvals(model_ddp2.Sigma_lambda).max():.4f}")
print(f"  Sigma_xi:")
print(f"    trace: {np.trace(model_ddp2.Sigma_xi):.4f}")
print(f"    max eigenvalue: {np.linalg.eigvals(model_ddp2.Sigma_xi).max():.4f}")


üîç DIAGN√ìSTICO DEL MODELO:
  y_mean: 10.7327
  y_std: 1.6713
  √öltimos lambda_h:
    min: -36412013780735131439544914381811255931016218669843718972265026323347220101514248204854791331797345773813760.0000
    max: 37593332751607756692096598820802516001582537920104616575460848503216230528543456199600182680556406061400064.0000
    mean: -962244344919123460394602669531347034893381821603154364287420544850748498475749284318140831808359657111552.0000
  √öltimos xi_h:
    min: -17151626264943184460667333448517829153893121844034741308799503887032875995874270887889754378638651130118144.0000
    max: 55566145100385356617047573457723057972920395025220800553723718805719035991191619930540083260181471721160704.0000
    mean: 12133779176932344625796746806735912587591073059861303441045682632679066889552988279767498914505810688081920.0000
  Sigma_lambda:
    trace: 1000000.0000
    max eigenvalue: 999909.0973
  Sigma_xi:
    trace: 1000000.0000
    max eigenvalue: 999908.6770


### Otros analisis

In [None]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  # CORREGIDO
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    # CORREGIDO
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

# Experimento II 

In [None]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [None]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "2 Feature Linear (n=200) -> 1200 (200 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_02")

In [None]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


In [None]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=200,
    n_features=2,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = RBFKernel(
    length_scale=5.0,
    variance=1.0
)


transformation = TransformationFunctions.linear(
    coefficients= [2,15],
    intercept=[10]
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

## Modelo 

In [None]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=20,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=1200,          # Iteraciones totales
    burnin=200               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)

## Guardado Modelo

In [None]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)

## Pred, graf

In [None]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")

## Otros analisis

In [None]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

# Experimento III

In [None]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [None]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "2 Feature Cuadraticos (n=400) -> 1200 (200 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_03")

In [None]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


In [None]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=400,
    n_features=2,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = RBFKernel(
    length_scale=5.0,
    variance=1.0
)


transformation = TransformationFunctions.polynomial(
    degree=2
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

## Modelo

In [None]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=20,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=1200,          # Iteraciones totales
    burnin=200               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)

## Guardado Modelo

In [None]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)

## pred Graf

In [None]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")

## Otros 

In [None]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

# Experimento IV

In [None]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [None]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "3 Feature Linear, variar Kernel Mater (n=800) ->2400 (400 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_04")

In [None]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


In [None]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=800,
    n_features=3,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = MaternKernel(
    length_scale=5.0,
    variance=2.0
)


transformation = TransformationFunctions.linear(
    coefficients=[15,-5,3],
    intercept=[100]
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

## Modelo

In [None]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=30,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=2400,          # Iteraciones totales
    burnin=400               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)

## Guardado Modelo

In [None]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)

## Pred, graf

In [None]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")

## Otros

In [None]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 