# Imports

In [26]:
# %% Imports y configuraci√≥n
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datetime import datetime
from pathlib import Path
import json

# Agregar el directorio ra√≠z al path
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), '..'))

# Imports del proyecto
from model_ddp.utils.sistem_fun import (
    load_config,
    get_data_path,
    get_artifact_path,
    get_report_path,
    create_experiment_id,
    ensure_directories,
    save_experiment_metadata
)

from model_ddp.simulations.gaussian_simulator import (
    RegressionSimulator,
    SimulationConfig,
    RBFKernel,
    MaternKernel,
    PeriodicKernel,
    TransformationFunctions
)

# Modelos
from model_ddp.models.LSBP_laplace_v1 import LSBPLaplace

# Metricas y graficas 
from model_ddp.fit.metrics import regression_metrics
from model_ddp.graphics.plots_regression import plot_regression_analysis
from model_ddp.graphics.plots_traces import plot_hyperparameter_traces
from model_ddp.graphics.plots_aplication import plot_credible_intervals

config=load_config()

Los experimentos realizados:
- 1 Feature Linear (n=200) -> 1200 (200 burn)
- 2 Feature Linear (n=200) -> 1200 (200 burn)
- 2 Feature Cuadraticos (n=400) -> 1200 (200 burn)
- 3 Feature Linear, variar Kernel Mater (n=800) ->2400 (400 burn)

Busco evaluar el tiempo de desempe√±o de la clase LSBP para el caso normal y las mejoras que realice en C++, aqui podra ver como aplicar la mayor parte de las funciones.

In [27]:
# 1 Feature Linear (n=200) -> 1200 (200 burn)
# 2 Feature Linear (n=200) -> 1200 (200 burn)
# 2 Feature Cuadraticos (n=400) -> 1200 (200 burn)
# 3 Feature Linear, variar Kernel Mater (n=800) ->2400 (400 burn)

# Experimento I 

In [28]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [29]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "1 Feature Linear (n=200) -> 1200 (200 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_01")

In [30]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


‚úì Experimento registrado en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\versioning\experiment_registry.md


In [31]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=200,
    n_features=1,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = RBFKernel(
    length_scale=5.0,
    variance=1.0
)


transformation = TransformationFunctions.linear(
    coefficients= [2],
    intercept=[10]
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

Generando datos...
‚úì Datos generados exitosamente

Estad√≠sticas de X:
  Shape: (200, 1)
  Media por feature: [0.35866669]
  Std por feature: [0.82864792]

Estad√≠sticas de Y:
  Shape: (200,)
  Media: 10.7327
  Std: 1.6713
  Min: 6.4454
  Max: 13.9424
‚úì Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\simulaciones\lsbp_002_exp_01_20251225_182807/_data.csv


### Modelo

In [32]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=15,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=1200,          # Iteraciones totales
    burnin=200               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPLaplace...
Using C++ acceleration for 8 functions (compute_eta, compute_weights, update_lambda_latent, update_assignments, update_atoms, update_alpha, update_psi, update_ell)
Iter 100/1200: K_eff=28, H=100, Œº=0.99, Œº‚ÇÄ=-0.17, œÑ‚ÇÄ=0.93, a‚ÇÄ=0.62, Œ≤‚ÇÄ=10.28
  Acceptance: Œ±=0.86, œà=0.90, œÑ=0.81, a=0.73
Iter 200/1200: K_eff=28, H=100, Œº=0.34, Œº‚ÇÄ=0.12, œÑ‚ÇÄ=1.25, a‚ÇÄ=0.50, Œ≤‚ÇÄ=12.42
  Acceptance: Œ±=0.77, œà=0.76, œÑ=0.66, a=0.63
Iter 300/1200: K_eff=28, H=100, Œº=-0.12, Œº‚ÇÄ=0.13, œÑ‚ÇÄ=0.92, a‚ÇÄ=0.51, Œ≤‚ÇÄ=17.57
  Acceptance: Œ±=0.85, œà=0.78, œÑ=0.58, a=0.70
Iter 400/1200: K_eff=31, H=100, Œº=-0.52, Œº‚ÇÄ=0.11, œÑ‚ÇÄ=0.81, a‚ÇÄ=0.55, Œ≤‚ÇÄ=13.88
  Acceptance: Œ±=0.83, œà=0.77, œÑ=0.62, a=0.80
Iter 500/1200: K_eff=39, H=100, Œº=-1.29, Œº‚ÇÄ=-0.36, œÑ‚ÇÄ=0.77, a‚ÇÄ=0.50, Œ≤‚ÇÄ=15.55
  Acceptance: Œ±=0.80, œà=0.81, œÑ=0.67, a=0.75
Iter 600/1200: K_eff=39, H=100, Œº=-1.18, Œº‚ÇÄ=0.05, œÑ‚ÇÄ=0.60, a‚ÇÄ=0.50, Œ≤‚ÇÄ=9.11
  Acceptance: Œ±=0.77, œà=0.82, œÑ=0

### Guardado Modelo

In [33]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  # CORREGIDO
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  # CORREGIDO
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  # CORREGIDO
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)


GUARDANDO MODELO Y RESULTADOS...
‚úì Modelo guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_01_20251225_182807\lsbp_model.pkl
‚úì Trazas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_01_20251225_182807\trace.pkl
‚úì Resumen posterior guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_01_20251225_182807\posterior_summary.json
‚úì Metadatos guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_01_20251225_182807\metadata.json
‚úì Datos de normalizaci√≥n guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_01_20251225_182807\normalization.pkl

MODELO GUARDADO EN: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_01_20251225_182807


### Predicciones, grafica de Predicciones y guardado

In [34]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")


GENERANDO PREDICCIONES...

üìä M√âTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     :   0.376713
  RMSE    :   0.613770
  MAE     :   0.461317
  R2      :   0.865136
  MAPE    :   4.650671
------------------------------------------------------------

‚úì M√©tricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_01_20251225_182807\metrics.json
‚úì Predicciones guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_01_20251225_182807\predictions.csv

GENERANDO GR√ÅFICAS...
‚úì Gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_01_20251225_182807


### Otros analisis

In [35]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  # CORREGIDO
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    # CORREGIDO
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

  plt.tight_layout()
  plt.savefig(output_path, dpi=300, bbox_inches='tight')


‚úì Todas las gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_01_20251225_182807


# Experimento II 

In [36]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [37]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "2 Feature Linear (n=200) -> 1200 (200 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_02")

In [38]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


‚úì Experimento registrado en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\versioning\experiment_registry.md


In [39]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=200,
    n_features=2,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = RBFKernel(
    length_scale=5.0,
    variance=1.0
)


transformation = TransformationFunctions.linear(
    coefficients= [2,15],
    intercept=[10]
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

Generando datos...
‚úì Datos generados exitosamente

Estad√≠sticas de X:
  Shape: (200, 2)
  Media por feature: [ 0.35866669 -0.30311795]
  Std por feature: [0.82864792 0.75455656]

Estad√≠sticas de Y:
  Shape: (200,)
  Media: 6.1859
  Std: 11.7161
  Min: -17.2658
  Max: 33.8075
‚úì Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\simulaciones\lsbp_002_exp_02_20251225_183108/_data.csv


## Modelo 

In [40]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=20,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=1200,          # Iteraciones totales
    burnin=200               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPLaplace...
Using C++ acceleration for 8 functions (compute_eta, compute_weights, update_lambda_latent, update_assignments, update_atoms, update_alpha, update_psi, update_ell)
Iter 100/1200: K_eff=45, H=100, Œº=-0.70, Œº‚ÇÄ=0.33, œÑ‚ÇÄ=0.87, a‚ÇÄ=0.52, Œ≤‚ÇÄ=12.80
  Acceptance: Œ±=0.89, œà=0.82, œÑ=0.69, a=0.68
Iter 200/1200: K_eff=41, H=100, Œº=-0.61, Œº‚ÇÄ=0.10, œÑ‚ÇÄ=1.02, a‚ÇÄ=0.52, Œ≤‚ÇÄ=8.00
  Acceptance: Œ±=0.81, œà=0.83, œÑ=0.64, a=0.67
Iter 300/1200: K_eff=41, H=100, Œº=-0.50, Œº‚ÇÄ=0.20, œÑ‚ÇÄ=1.11, a‚ÇÄ=0.69, Œ≤‚ÇÄ=20.93
  Acceptance: Œ±=0.81, œà=0.95, œÑ=0.62, a=0.63
Iter 400/1200: K_eff=44, H=100, Œº=-0.54, Œº‚ÇÄ=0.22, œÑ‚ÇÄ=0.89, a‚ÇÄ=0.70, Œ≤‚ÇÄ=19.86
  Acceptance: Œ±=0.77, œà=0.88, œÑ=0.62, a=0.73
Iter 500/1200: K_eff=40, H=100, Œº=-0.38, Œº‚ÇÄ=-0.04, œÑ‚ÇÄ=0.72, a‚ÇÄ=0.77, Œ≤‚ÇÄ=21.97
  Acceptance: Œ±=0.81, œà=0.86, œÑ=0.63, a=0.67
Iter 600/1200: K_eff=45, H=100, Œº=-0.42, Œº‚ÇÄ=0.10, œÑ‚ÇÄ=0.74, a‚ÇÄ=0.53, Œ≤‚ÇÄ=10.63
  Acceptance: Œ±=0.80, œà=0.83, œÑ=

## Guardado Modelo

In [41]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)


GUARDANDO MODELO Y RESULTADOS...
‚úì Modelo guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_02_20251225_183108\lsbp_model.pkl
‚úì Trazas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_02_20251225_183108\trace.pkl
‚úì Resumen posterior guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_02_20251225_183108\posterior_summary.json
‚úì Metadatos guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_02_20251225_183108\metadata.json
‚úì Datos de normalizaci√≥n guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_02_20251225_183108\normalization.pkl

MODELO GUARDADO EN: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_02_20251225_183108


## Pred, graf

In [42]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")


GENERANDO PREDICCIONES...

üìä M√âTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     :  20.683025
  RMSE    :   4.547859
  MAE     :   3.561491
  R2      :   0.849322
  MAPE    :  84.088045
------------------------------------------------------------

‚úì M√©tricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_02_20251225_183108\metrics.json
‚úì Predicciones guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_02_20251225_183108\predictions.csv

GENERANDO GR√ÅFICAS...
‚úì Gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_02_20251225_183108


## Otros analisis

In [43]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

  plt.tight_layout()
  plt.savefig(output_path, dpi=300, bbox_inches='tight')


‚úì Todas las gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_02_20251225_183108


# Experimento III

In [44]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [45]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "2 Feature Cuadraticos (n=400) -> 1200 (200 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_03")

In [46]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


‚úì Experimento registrado en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\versioning\experiment_registry.md


In [47]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=400,
    n_features=2,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = RBFKernel(
    length_scale=5.0,
    variance=1.0
)


transformation = TransformationFunctions.polynomial(
    degree=2
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

Generando datos...
‚úì Datos generados exitosamente

Estad√≠sticas de X:
  Shape: (400, 2)
  Media por feature: [ 0.14187066 -0.50272967]
  Std por feature: [0.78558034 0.87377361]

Estad√≠sticas de Y:
  Shape: (400,)
  Media: 1.6552
  Std: 1.9636
  Min: -0.4204
  Max: 9.8443
‚úì Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\simulaciones\lsbp_002_exp_03_20251225_183445/_data.csv


## Modelo

In [48]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=20,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=1200,          # Iteraciones totales
    burnin=200               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPLaplace...
Using C++ acceleration for 8 functions (compute_eta, compute_weights, update_lambda_latent, update_assignments, update_atoms, update_alpha, update_psi, update_ell)
Iter 100/1200: K_eff=55, H=100, Œº=-0.97, Œº‚ÇÄ=0.94, œÑ‚ÇÄ=0.77, a‚ÇÄ=0.77, Œ≤‚ÇÄ=19.81
  Acceptance: Œ±=0.86, œà=0.94, œÑ=0.67, a=0.73
Iter 200/1200: K_eff=41, H=100, Œº=-0.80, Œº‚ÇÄ=1.07, œÑ‚ÇÄ=0.43, a‚ÇÄ=0.50, Œ≤‚ÇÄ=15.18
  Acceptance: Œ±=0.81, œà=0.79, œÑ=0.61, a=0.64
Iter 300/1200: K_eff=54, H=100, Œº=-0.71, Œº‚ÇÄ=1.17, œÑ‚ÇÄ=0.58, a‚ÇÄ=0.65, Œ≤‚ÇÄ=21.71
  Acceptance: Œ±=0.82, œà=0.83, œÑ=0.58, a=0.58
Iter 400/1200: K_eff=56, H=100, Œº=-0.59, Œº‚ÇÄ=1.09, œÑ‚ÇÄ=0.48, a‚ÇÄ=0.51, Œ≤‚ÇÄ=11.79
  Acceptance: Œ±=0.80, œà=0.82, œÑ=0.56, a=0.54
Iter 500/1200: K_eff=55, H=100, Œº=-0.50, Œº‚ÇÄ=0.84, œÑ‚ÇÄ=0.70, a‚ÇÄ=0.64, Œ≤‚ÇÄ=17.22
  Acceptance: Œ±=0.80, œà=0.85, œÑ=0.65, a=0.65
Iter 600/1200: K_eff=45, H=100, Œº=-0.68, Œº‚ÇÄ=0.92, œÑ‚ÇÄ=0.62, a‚ÇÄ=0.87, Œ≤‚ÇÄ=25.86
  Acceptance: Œ±=0.76, œà=0.81, œÑ=

## Guardado Modelo

In [49]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)


GUARDANDO MODELO Y RESULTADOS...
‚úì Modelo guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_03_20251225_183445\lsbp_model.pkl
‚úì Trazas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_03_20251225_183445\trace.pkl
‚úì Resumen posterior guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_03_20251225_183445\posterior_summary.json
‚úì Metadatos guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_03_20251225_183445\metadata.json
‚úì Datos de normalizaci√≥n guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_03_20251225_183445\normalization.pkl

MODELO GUARDADO EN: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_03_20251225_183445


## pred Graf

In [50]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")


GENERANDO PREDICCIONES...

üìä M√âTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     :   0.566669
  RMSE    :   0.752774
  MAE     :   0.481694
  R2      :   0.853039
  MAPE    : 110.574650
------------------------------------------------------------

‚úì M√©tricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_03_20251225_183445\metrics.json
‚úì Predicciones guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_03_20251225_183445\predictions.csv

GENERANDO GR√ÅFICAS...
‚úì Gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_03_20251225_183445


## Otros 

In [51]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

  plt.tight_layout()
  plt.savefig(output_path, dpi=300, bbox_inches='tight')


‚úì Todas las gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_03_20251225_183445


# Experimento IV

In [52]:
#Parametros Iniciales
NOMBRE_EJECUCION = "model_lsbp_002"
SIM_REAL = "simulation"

In [53]:
# Par√°metros de ejecuci√≥n de experimentos 
CARACTERISTICAS = "3 Feature Linear, variar Kernel Mater (n=800) ->2400 (400 burn)"
EXPERIMENT_ID = create_experiment_id("lsbp_002_exp_04")

In [54]:
##################################################
# Registrar Experimento
##################################################

# Preparar informaci√≥n del experimento
experiment_data = {
    'experiment_id': EXPERIMENT_ID,
    'nombre': NOMBRE_EJECUCION,
    'tipo': SIM_REAL,
    'descripcion': f"""Experimento: {CARACTERISTICAS}"""
}
registry_file = save_experiment_metadata(config, experiment_data)
print(f"‚úì Experimento registrado en: {registry_file}")


‚úì Experimento registrado en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\versioning\experiment_registry.md


In [55]:
##################################################
# Crear carpeta de guardado 
##################################################
data_path = get_data_path(config, SIM_REAL, "output")
carpeta_datos = data_path / f"{EXPERIMENT_ID}"
carpeta_datos.mkdir(parents=True, exist_ok=True)

##################################################
# Simulacion o data real
##################################################
# Configuraci√≥n de la simulaci√≥n
sim_config = SimulationConfig(
    n_samples=800,
    n_features=3,
    x_range=(0.0, 100.0),
    noise_std=0.2,
    random_state=234
)

# Definir kernel (RBF)
kernel = MaternKernel(
    length_scale=5.0,
    variance=2.0
)


transformation = TransformationFunctions.linear(
    coefficients=[15,-5,3],
    intercept=[100]
)

# Crear simulador
simulator = RegressionSimulator(
    config=sim_config,
    kernel=kernel,
    transformation=transformation
)

# Generar datos
print("Generando datos...")
X, Y = simulator.simulate()

print("‚úì Datos generados exitosamente")
print(f"\nEstad√≠sticas de X:")
print(f"  Shape: {X.shape}")
print(f"  Media por feature: {X.mean(axis=0)}")
print(f"  Std por feature: {X.std(axis=0)}")
print(f"\nEstad√≠sticas de Y:")
print(f"  Shape: {Y.shape}")
print(f"  Media: {Y.mean():.4f}")
print(f"  Std: {Y.std():.4f}")
print(f"  Min: {Y.min():.4f}")
print(f"  Max: {Y.max():.4f}")

##################################################
# Transformar a data frame 
##################################################
datos = pd.DataFrame(X, columns=[f'X{i+1}' for i in range(sim_config.n_features)])
datos['Y'] = Y

##################################################
# Guardar data frame  
##################################################
csv_filename = f"{carpeta_datos}/_data.csv"
datos.to_csv(csv_filename, index=False)

print(f"‚úì Datos guardados en CSV: {csv_filename}")

Generando datos...
‚úì Datos generados exitosamente

Estad√≠sticas de X:
  Shape: (800, 3)
  Media por feature: [-0.23289032  0.35779777  0.13681772]
  Std por feature: [1.11561901 0.93767766 1.07967819]

Estad√≠sticas de Y:
  Shape: (800,)
  Media: 95.1227
  Std: 16.0322
  Min: 65.3149
  Max: 136.1139
‚úì Datos guardados en CSV: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\data\simulaciones\lsbp_002_exp_04_20251225_184017/_data.csv


## Modelo

In [56]:
##################################################
# Modelo   
##################################################
print("\n" + "="*60)
print("EJECUTANDO LSBPLaplace...")
print("="*60)

# Crear instancia del modelo
lsbp_model = LSBPLaplace(
    y=datos["Y"].values,
    X=datos.drop(columns=["Y"]).values,
    H=30,                     # N√∫mero inicial de clusters truncados
    verbose=True              # Mostrar progreso
)

# Ejecutar MCMC
trace = lsbp_model.run(
    iterations=2400,          # Iteraciones totales
    burnin=400               # Burn-in
)

print("\n" + "="*60)
print("LSBP COMPLETADO")
print("="*60)


EJECUTANDO LSBPLaplace...
Using C++ acceleration for 8 functions (compute_eta, compute_weights, update_lambda_latent, update_assignments, update_atoms, update_alpha, update_psi, update_ell)
Iter 100/2400: K_eff=74, H=100, Œº=0.29, Œº‚ÇÄ=0.44, œÑ‚ÇÄ=1.22, a‚ÇÄ=0.64, Œ≤‚ÇÄ=18.69
  Acceptance: Œ±=0.75, œà=0.86, œÑ=0.63, a=0.58
Iter 200/2400: K_eff=76, H=100, Œº=0.06, Œº‚ÇÄ=0.34, œÑ‚ÇÄ=1.04, a‚ÇÄ=0.76, Œ≤‚ÇÄ=30.34
  Acceptance: Œ±=0.75, œà=0.81, œÑ=0.49, a=0.46
Iter 300/2400: K_eff=73, H=100, Œº=0.49, Œº‚ÇÄ=0.16, œÑ‚ÇÄ=0.99, a‚ÇÄ=0.79, Œ≤‚ÇÄ=24.64
  Acceptance: Œ±=0.57, œà=0.75, œÑ=0.55, a=0.49
Iter 400/2400: K_eff=75, H=100, Œº=0.23, Œº‚ÇÄ=0.33, œÑ‚ÇÄ=1.08, a‚ÇÄ=0.86, Œ≤‚ÇÄ=23.40
  Acceptance: Œ±=0.54, œà=0.62, œÑ=0.41, a=0.25
Iter 500/2400: K_eff=71, H=100, Œº=0.17, Œº‚ÇÄ=0.23, œÑ‚ÇÄ=0.79, a‚ÇÄ=0.58, Œ≤‚ÇÄ=23.81
  Acceptance: Œ±=0.53, œà=0.65, œÑ=0.39, a=0.38
Iter 600/2400: K_eff=62, H=100, Œº=0.39, Œº‚ÇÄ=0.12, œÑ‚ÇÄ=0.83, a‚ÇÄ=0.58, Œ≤‚ÇÄ=24.36
  Acceptance: Œ±=0.49, œà=0.57, œÑ=0.33, 

## Guardado Modelo

In [57]:
##################################################
#  Guardar Modelo  (SEGUN EXPERIMENTO)
##################################################

# Crear carpetas para guardar
artifact_path = get_artifact_path(config, SIM_REAL)
carpeta_modelo = artifact_path / f"{EXPERIMENT_ID}"
carpeta_modelo.mkdir(parents=True, exist_ok=True)

print("\n" + "="*60)
print("GUARDANDO MODELO Y RESULTADOS...")
print("="*60)

# 1. Guardar el modelo completo (objeto LSBPLaplace)
model_file = carpeta_modelo / "lsbp_model.pkl"
with open(model_file, 'wb') as f:
    pickle.dump(lsbp_model, f)
print(f"‚úì Modelo guardado: {model_file}")

# 2. Guardar solo las trazas (m√°s ligero)
trace_file = carpeta_modelo / "trace.pkl"
with open(trace_file, 'wb') as f:
    pickle.dump(trace, f)
print(f"‚úì Trazas guardadas: {trace_file}")

# 3. Guardar resumen posterior
summary = lsbp_model.get_posterior_summary()
summary_file = carpeta_modelo / "posterior_summary.json"
with open(summary_file, 'w') as f:
    # Convertir tuplas a listas para JSON
    summary_json = {k: {'mean': v[0], 'std': v[1]} for k, v in summary.items()}
    json.dump(summary_json, f, indent=2)
print(f"‚úì Resumen posterior guardado: {summary_file}")

# 4. Guardar metadatos del experimento
metadata = {
    'experiment_id': EXPERIMENT_ID,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'model_type': 'LSBPLaplace',
    'data_shape': {
        'n': lsbp_model.n,
        'p': lsbp_model.p
    },
    'hyperparameters': {
        'H_initial': 15,
        'iterations': 1200,
        'burnin': 200,
        'n_grid': lsbp_model.n_grid
    },
    'priors': {
        'mu_prior': (lsbp_model.mu_mu, lsbp_model.tau_mu_inv),
        'mu0_prior': (lsbp_model.m0, lsbp_model.s02),
        'tau0_prior': (lsbp_model.alpha_tau, lsbp_model.beta_tau),  
        'a0_prior': (lsbp_model.alpha_a, lsbp_model.beta_a),
        'beta0_prior': (lsbp_model.alpha_beta, lsbp_model.beta_beta),  
        'psi_prior': (lsbp_model.mu_psi, lsbp_model.tau_psi_inv)
    },
    'final_stats': {
        'H_final': lsbp_model.H,
        'n_clusters_mean': summary['n_clusters'][0],
        'n_clusters_std': summary['n_clusters'][1]
    },
    'acceptance_rates': {
        'alpha': np.mean(lsbp_model.mh_acceptance['alpha'][-100:]) if lsbp_model.mh_acceptance['alpha'] else 0,
        'psi': np.mean(lsbp_model.mh_acceptance['psi'][-100:]) if lsbp_model.mh_acceptance['psi'] else 0,
        'tau0': np.mean(lsbp_model.mh_acceptance['tau0'][-100:]) if lsbp_model.mh_acceptance['tau0'] else 0,  
        'a0': np.mean(lsbp_model.mh_acceptance['a0'][-100:]) if lsbp_model.mh_acceptance['a0'] else 0
    }
}

metadata_file = carpeta_modelo / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2)
print(f"‚úì Metadatos guardados: {metadata_file}")

# 5. Guardar informaci√≥n de normalizaci√≥n 
normalization_file = carpeta_modelo / "normalization.pkl"
normalization_data = {
    'y_mean': lsbp_model.y_mean,
    'y_std': lsbp_model.y_std,
    'X_mean': lsbp_model.X_mean,
    'X_std': lsbp_model.X_std
}
with open(normalization_file, 'wb') as f:
    pickle.dump(normalization_data, f)
print(f"‚úì Datos de normalizaci√≥n guardados: {normalization_file}")

print("\n" + "="*60)
print(f"MODELO GUARDADO EN: {carpeta_modelo}")
print("="*60)


GUARDANDO MODELO Y RESULTADOS...
‚úì Modelo guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_04_20251225_184017\lsbp_model.pkl
‚úì Trazas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_04_20251225_184017\trace.pkl
‚úì Resumen posterior guardado: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_04_20251225_184017\posterior_summary.json
‚úì Metadatos guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_04_20251225_184017\metadata.json
‚úì Datos de normalizaci√≥n guardados: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_04_20251225_184017\normalization.pkl

MODELO GUARDADO EN: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\artefact\simulaciones\models\lsbp_002_exp_04_20251225_184017


## Pred, graf

In [58]:
##################################################
#  FIT  (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO PREDICCIONES...")
print("="*60)

# Hacer predicciones con el modelo entrenado
y_pred_mean, y_pred_std = lsbp_model.predict_mean(
    X_new=datos.drop(columns=["Y"]).values,
    n_samples=1000
)

y_true = datos["Y"].values

# Calcular m√©tricas
metrics = regression_metrics(y_true, y_pred_mean)

print("\nüìä M√âTRICAS DE AJUSTE:")
print("-" * 60)
for metric_name, metric_value in metrics.items():
    print(f"  {metric_name.upper():8s}: {metric_value:10.6f}")
print("-" * 60)

# Carpeta
report_path = get_report_path(config, SIM_REAL, "tables")
carpeta_reportes = report_path / f"{EXPERIMENT_ID}"
carpeta_reportes.mkdir(parents=True, exist_ok=True)

# Guardar m√©tricas en JSON
metrics_file = carpeta_reportes / "metrics.json"
with open(metrics_file, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"\n‚úì M√©tricas guardadas: {metrics_file}")

# Guardar predicciones completas
predictions_df = pd.DataFrame({
    'y_true': y_true,
    'y_pred_mean': y_pred_mean,
    'y_pred_std': y_pred_std,
    'residual': y_true - y_pred_mean,
    'residual_std': (y_true - y_pred_mean) / y_pred_std  # Residuos estandarizados
})
predictions_file = carpeta_reportes / "predictions.csv"
predictions_df.to_csv(predictions_file, index=False)
print(f"‚úì Predicciones guardadas: {predictions_file}")

##################################################
# Gr√°ficas de Fit (SEGUN EXPERIMENTO)
##################################################
print("\n" + "="*60)
print("GENERANDO GR√ÅFICAS...")
print("="*60)
# Crear carpeta para gr√°ficas
graphics_path = get_report_path(config, SIM_REAL, "graphics")
carpeta_graficas = graphics_path / f"{EXPERIMENT_ID}"
carpeta_graficas.mkdir(parents=True, exist_ok=True)

# Generar gr√°ficas usando el m√≥dulo
splits = [
    (y_true, y_pred_mean, "Training Set")
]

plot_regression_analysis(
    splits=splits,
    output_path=str(carpeta_graficas),
    model_name="LSBP Laplace"
)

print(f"‚úì Gr√°ficas guardadas en: {carpeta_graficas}")


GENERANDO PREDICCIONES...

üìä M√âTRICAS DE AJUSTE:
------------------------------------------------------------
  MSE     :  10.000899
  RMSE    :   3.162420
  MAE     :   2.430945
  R2      :   0.961091
  MAPE    :   2.623253
------------------------------------------------------------

‚úì M√©tricas guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_04_20251225_184017\metrics.json
‚úì Predicciones guardadas: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_04_20251225_184017\predictions.csv

GENERANDO GR√ÅFICAS...
‚úì Gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_04_20251225_184017


## Otros

In [59]:
##################################################
# Trazas 
##################################################
# Parametros a ver las trazas
hyperparams = [
    ('mu', 'Œº (Intercepto stick-breaking)'),
    ('mu0', 'Œº‚ÇÄ (Media base)'),
    ('tau0', 'œÑ‚ÇÄ (Precisi√≥n)'),  
    ('a0', 'a‚ÇÄ (Shape b)'),
    ('beta0', 'Œ≤‚ÇÄ (Scale b)'),    
    ('n_clusters', 'Numero de Clusters')   
]

plot_hyperparameter_traces(
    trace=trace,
    param_config=hyperparams,
    output_path=carpeta_graficas / f"{EXPERIMENT_ID}.png",
    title="Trazas LSBP Laplace"
)

# Versi√≥n simple
plot_credible_intervals(
    y_true=y_true,
    y_pred_mean=y_pred_mean,
    y_pred_std=y_pred_std,
    output_path=carpeta_graficas / "intervalos_credibilidad.png",
    sort_by='y_true'
)

print(f"‚úì Todas las gr√°ficas guardadas en: {carpeta_graficas}") 

  plt.tight_layout()
  plt.savefig(output_path, dpi=300, bbox_inches='tight')


‚úì Todas las gr√°ficas guardadas en: C:\Users\JuanFran\Desktop\git_tesis\model_ddp\reports\simulaciones\lsbp_002_exp_04_20251225_184017
