In [None]:
%pip install nbformat

In [None]:

"""
SENTINEL - NeuralProphet Baseline Implementation
===============================================
Plataforma inteligente para monitoreo industrial y detecci√≥n predictiva de fallas

Autor: Equipo SENTINEL
Fecha: Mayo 2025
Objetivo: Establecer baseline de forecasting con NeuralProphet para series temporales industriales
"""

# =============================================================================
# 1. PREPARACI√ìN DEL ENTORNO Y DEPENDENCIAS
# =============================================================================

import warnings
warnings.filterwarnings('ignore')

# Instalaci√≥n de dependencias (ejecutar solo si es necesario)
"""
%pip install neuralprophet pandas numpy matplotlib plotly seaborn scikit-learn nbformat
"""

# Import de librer√≠as principales
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

# NeuralProphet y m√©tricas
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Configuraci√≥n de visualizaci√≥n
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print("‚úÖ Librer√≠as importadas correctamente")
print(f"üìä Versi√≥n de pandas: {pd.__version__}")

# =============================================================================
# 2. GENERACI√ìN DE DATOS SIMULADOS REPRESENTATIVOS
# =============================================================================

def generate_industrial_sensor_data(start_date='2023-01-01', periods=8760, freq='H'):
    """
    Genera datos simulados de sensores industriales para SENTINEL
    
    Par√°metros:
    - start_date: fecha de inicio
    - periods: n√∫mero de per√≠odos (8760 = 1 a√±o en horas)
    - freq: frecuencia temporal ('H' = horaria)
    
    Retorna:
    - DataFrame con datos de sensores simulados
    """
    
    # Crear √≠ndice temporal
    dates = pd.date_range(start=start_date, periods=periods, freq=freq)
    
    # Par√°metros base para simulaci√≥n realista
    np.random.seed(42)  # Para reproducibilidad, usar una semilla fija, una semilla significa # que los n√∫meros aleatorios generados ser√°n los mismos en cada ejecuci√≥n
    
    # === PRESI√ìN (variable objetivo principal) ===
    # Tendencia base con estacionalidad diaria y semanal
    trend = np.linspace(100, 110, periods)  # Tendencia ascendente leve
    daily_season = 5 * np.sin(2 * np.pi * np.arange(periods) / 24)  # Ciclo diario
    weekly_season = 3 * np.sin(2 * np.pi * np.arange(periods) / (24*7))  # Ciclo semanal
    
    # Ruido realista con algunos picos an√≥malos
    noise = np.random.normal(0, 2, periods)
    
    # Simular eventos de mantenimiento (ca√≠das programadas)
    maintenance_events = np.random.choice(periods, size=12, replace=False)
    for event in maintenance_events:
        if event + 8 < periods:  # 8 horas de mantenimiento
            trend[event:event+8] -= 15
    
    # Presi√≥n total
    pressure = trend + daily_season + weekly_season + noise
    pressure = np.clip(pressure, 85, 125)  # L√≠mites realistas
    
    # === VARIABLES EX√ìGENAS ===
    # Temperatura ambiente (afecta la presi√≥n)
    temp_base = 25 + 10 * np.sin(2 * np.pi * np.arange(periods) / (24*365))  # Estacional anual
    temp_daily = 8 * np.sin(2 * np.pi * np.arange(periods) / 24)  # Variaci√≥n diaria
    temperature = temp_base + temp_daily + np.random.normal(0, 2, periods)
    
    # Flujo (correlacionado con presi√≥n)
    flow_base = 50 + 0.3 * (pressure - 100)  # Correlaci√≥n con presi√≥n
    flow = flow_base + np.random.normal(0, 3, periods)
    flow = np.clip(flow, 30, 80)
    
    # Vibraci√≥n (indicador de desgaste)
    vibration_trend = np.linspace(2, 4, periods)  # Incremento gradual por desgaste
    vibration = vibration_trend + np.random.exponential(0.5, periods)
    
    # Humedad
    humidity = 60 + 20 * np.sin(2 * np.pi * np.arange(periods) / (24*365)) + np.random.normal(0, 5, periods)
    humidity = np.clip(humidity, 30, 90)
    
    # === CREAR DATAFRAME ===
    df = pd.DataFrame({
        'ds': dates,  # Timestamp (formato requerido por NeuralProphet)
        'y': pressure,  # Variable objetivo
        # Variables ex√≥genas
        'temperature': temperature,
        'flow': flow,
        'vibration': vibration,
        'humidity': humidity,
        # Variables categ√≥ricas
        'hour': dates.hour,
        'day_of_week': dates.dayofweek,
        'month': dates.month,
        'is_weekend': (dates.dayofweek >= 5).astype(int),
        'is_maintenance_day': 0  # Se llenar√° seg√∫n eventos
    })
    
    # Marcar d√≠as de mantenimiento
    for event in maintenance_events:
        df.loc[event:event+8, 'is_maintenance_day'] = 1
    
    return df

# Generar datos simulados
print("üîÑ Generando datos simulados de sensores industriales...")
df_raw = generate_industrial_sensor_data(start_date='2023-01-01', periods=8760, freq='H')

print(f"‚úÖ Datos generados: {len(df_raw)} registros")
print(f"üìÖ Per√≠odo: {df_raw['ds'].min()} a {df_raw['ds'].max()}")
print("\nüìä Primeras 5 filas:")
print(df_raw.head())

print("\nüìà Estad√≠sticas descriptivas:")
print(df_raw.describe())

# =============================================================================
# 3. AN√ÅLISIS EXPLORATORIO DE DATOS (EDA)
# =============================================================================

def plot_eda_analysis(df):
    """Generar gr√°ficos de an√°lisis exploratorio"""
    
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'Serie Temporal - Presi√≥n (Variable Objetivo)',
            'Distribuci√≥n de Presi√≥n',
            'Correlaci√≥n: Presi√≥n vs Temperatura',
            'Estacionalidad Diaria',
            'Variables Ex√≥genas vs Tiempo',
            'Matriz de Correlaci√≥n'
        ],
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": True}, {"secondary_y": False}]]
    )
    
    # 1. Serie temporal principal
    fig.add_trace(
        go.Scatter(x=df['ds'], y=df['y'], name='Presi√≥n', line=dict(color='blue')),
        row=1, col=1
    )
    
    # 2. Histograma de distribuci√≥n
    fig.add_trace(
        go.Histogram(x=df['y'], name='Distribuci√≥n', nbinsx=50, marker_color='lightblue'),
        row=1, col=2
    )
    
    # 3. Scatter plot presi√≥n vs temperatura
    fig.add_trace(
        go.Scatter(x=df['temperature'], y=df['y'], mode='markers', 
                  name='Presi√≥n vs Temp', marker=dict(color='red', size=3, opacity=0.6)),
        row=2, col=1
    )
    
    # 4. Patr√≥n diario (promedio por hora)
    hourly_avg = df.groupby('hour')['y'].mean()
    fig.add_trace(
        go.Scatter(x=hourly_avg.index, y=hourly_avg.values, 
                  name='Patr√≥n Diario', line=dict(color='green')),
        row=2, col=2
    )
    
    # 5. Variables ex√≥genas
    sample_data = df.iloc[::100]  # Muestrear para visualizaci√≥n
    fig.add_trace(
        go.Scatter(x=sample_data['ds'], y=sample_data['temperature'], 
                  name='Temperatura', line=dict(color='orange')),
        row=3, col=1
    )
    fig.add_trace(
        go.Scatter(x=sample_data['ds'], y=sample_data['flow'], 
                  name='Flujo', line=dict(color='purple'), yaxis='y2'),
        row=3, col=1, secondary_y=True
    )
    
    # Actualizar layouts
    fig.update_layout(height=1200, title_text="üìä SENTINEL - An√°lisis Exploratorio de Datos")
    fig.update_xaxes(title_text="Fecha", row=3, col=1)
    fig.update_yaxes(title_text="Presi√≥n (PSI)", row=1, col=1)
    fig.update_yaxes(title_text="Frecuencia", row=1, col=2)
    fig.update_yaxes(title_text="Presi√≥n", row=2, col=1)
    fig.update_yaxes(title_text="Presi√≥n Promedio", row=2, col=2)
    fig.update_yaxes(title_text="Temperatura (¬∞C)", row=3, col=1)
    fig.update_yaxes(title_text="Flujo (L/min)", secondary_y=True, row=3, col=1)
    
    return fig

# Generar an√°lisis exploratorio
print("üîç Realizando an√°lisis exploratorio de datos...")
eda_fig = plot_eda_analysis(df_raw)
eda_fig.show()

# Matriz de correlaci√≥n
corr_cols = ['y', 'temperature', 'flow', 'vibration', 'humidity']
correlation_matrix = df_raw[corr_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('üîó Matriz de Correlaci√≥n - Variables SENTINEL')
plt.tight_layout()
plt.show()

print("üìã Correlaciones con la variable objetivo (Presi√≥n):")
target_corr = correlation_matrix['y'].drop('y').sort_values(key=abs, ascending=False)
for var, corr in target_corr.items():
    print(f"  ‚Ä¢ {var}: {corr:.3f}")

# =============================================================================
# 4. PREPARACI√ìN DE DATOS PARA NEURALPROPHET
# =============================================================================

def prepare_data_for_neuralprophet(df, test_size=0.2):
    """
    Prepara los datos para el entrenamiento con NeuralProphet
    
    Par√°metros:
    - df: DataFrame con los datos
    - test_size: proporci√≥n de datos para testing
    
    Retorna:
    - df_train, df_test: DataFrames de entrenamiento y prueba
    """
    
    # Ordenar por fecha
    df = df.sort_values('ds').reset_index(drop=True)
    
    # Calcular punto de divisi√≥n
    split_point = int(len(df) * (1 - test_size))
    
    # Dividir datos
    df_train = df.iloc[:split_point].copy()
    df_test = df.iloc[split_point:].copy()
    
    print(f"üìä Divisi√≥n de datos:")
    print(f"  ‚Ä¢ Entrenamiento: {len(df_train)} registros ({df_train['ds'].min()} a {df_train['ds'].max()})")
    print(f"  ‚Ä¢ Prueba: {len(df_test)} registros ({df_test['ds'].min()} a {df_test['ds'].max()})")
    
    return df_train, df_test

# Preparar datos
df_train, df_test = prepare_data_for_neuralprophet(df_raw, test_size=0.2)

# Seleccionar variables para el modelo
# Variables ex√≥genas m√°s relevantes basadas en correlaci√≥n
key_regressors = ['temperature', 'flow', 'vibration']

print(f"\nüéØ Variables seleccionadas para el modelo:")
print(f"  ‚Ä¢ Variable objetivo: y (presi√≥n)")
print(f"  ‚Ä¢ Variables ex√≥genas: {key_regressors}")

# =============================================================================
# 5. CONFIGURACI√ìN Y ENTRENAMIENTO DE NEURALPROPHET
# =============================================================================

def create_and_train_neuralprophet_model(df_train, regressors=None):
    """
    Crea y entrena un modelo NeuralProphet optimizado para datos industriales
    
    Par√°metros:
    - df_train: DataFrame de entrenamiento
    - regressors: lista de variables ex√≥genas
    
    Retorna:
    - modelo entrenado
    """
    
    print("üß† Configurando modelo NeuralProphet...")
    
    # Configuraci√≥n del modelo optimizada para datos industriales
    model = NeuralProphet(
        # Configuraci√≥n de crecimiento
        growth='linear',  # Crecimiento lineal para datos industriales
        
        # Estacionalidades
        yearly_seasonality=True,   # Estacionalidad anual
        weekly_seasonality=True,   # Estacionalidad semanal
        daily_seasonality=True,    # Estacionalidad diaria (cr√≠tica para procesos industriales)
        
        # Configuraci√≥n de lags autorregresivos
        n_lags=48,  # 48 horas de historia (2 d√≠as)
        
        # Configuraci√≥n de la red neuronal
        num_hidden_layers=2,
        d_hidden=64,
        
        # Configuraci√≥n de entrenamiento
        epochs=100,
        batch_size=64,
        learning_rate=0.01,
        
        # Normalizaci√≥n
        normalize='standardize',
        
        # Configuraci√≥n de validaci√≥n
        newer_samples_weight=1.2,  # Dar m√°s peso a muestras recientes
        
        # Reducir verbosidad
        log_level='ERROR'
    )
    
    # Agregar variables ex√≥genas si se proporcionan
    if regressors:
        for regressor in regressors:
            print(f"  ‚ûï Agregando variable ex√≥gena: {regressor}")
            model.add_lagged_regressor(regressor, n_lags=24)  # 24 horas de lags
    
    print("üîÑ Iniciando entrenamiento del modelo...")
    
    # Preparar datos de entrenamiento
    df_model_train = df_train[['ds', 'y'] + (regressors or [])].copy()
    
    # Entrenar modelo
    metrics = model.fit(df_model_train, freq='H', validation_df=None)
    
    print("‚úÖ Entrenamiento completado!")
    
    return model, metrics

# Entrenar modelo
model, training_metrics = create_and_train_neuralprophet_model(
    df_train, 
    regressors=key_regressors
)

# =============================================================================
# 6. GENERACI√ìN DE PREDICCIONES Y EVALUACI√ìN
# =============================================================================

def generate_predictions_and_evaluate(model, df_train, df_test, regressors=None):
    """
    Genera predicciones y eval√∫a el desempe√±o del modelo
    
    Par√°metros:
    - model: modelo entrenado
    - df_train: datos de entrenamiento
    - df_test: datos de prueba
    - regressors: variables ex√≥genas
    
    Retorna:
    - predicciones, m√©tricas de evaluaci√≥n
    """
    
    print("üîÆ Generando predicciones...")
    
    # Preparar datos completos para predicci√≥n
    df_full = pd.concat([df_train, df_test], ignore_index=True)
    df_model = df_full[['ds', 'y'] + (regressors or [])].copy()
    
    # Generar predicciones para todo el conjunto
    forecast = model.predict(df_model)
    
    # Separar predicciones de entrenamiento y prueba
    train_size = len(df_train)
    forecast_train = forecast.iloc[:train_size]
    forecast_test = forecast.iloc[train_size:]
    
    # Calcular m√©tricas de evaluaci√≥n
    def calculate_metrics(y_true, y_pred, dataset_name):
        """Calcula m√©tricas de evaluaci√≥n"""
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
        r2 = r2_score(y_true, y_pred)
        
        metrics = {
            'MAE': mae,
            'RMSE': rmse,
            'MAPE': mape,
            'R¬≤': r2
        }
        
        print(f"\nüìä M√©tricas - {dataset_name}:")
        for metric, value in metrics.items():
            print(f"  ‚Ä¢ {metric}: {value:.4f}")
        
        return metrics
    
    # Evaluar en conjunto de entrenamiento
    train_metrics = calculate_metrics(
        df_train['y'].values, 
        forecast_train['yhat1'].values, 
        "Entrenamiento"
    )
    
    # Evaluar en conjunto de prueba
    test_metrics = calculate_metrics(
        df_test['y'].values, 
        forecast_test['yhat1'].values, 
        "Prueba"
    )
    
    return forecast, forecast_train, forecast_test, train_metrics, test_metrics

# Generar predicciones y evaluar
forecast, forecast_train, forecast_test, train_metrics, test_metrics = generate_predictions_and_evaluate(
    model, df_train, df_test, key_regressors
)

# =============================================================================
# 7. VISUALIZACI√ìN DE RESULTADOS
# =============================================================================

def plot_comprehensive_results(df_train, df_test, forecast_train, forecast_test, model):
    """
    Genera visualizaciones completas de los resultados del modelo
    """
    
    # === GR√ÅFICO PRINCIPAL: PREDICCIONES VS REALIDAD ===
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'üéØ Predicciones vs Realidad - Serie Completa',
            'üîç Zoom: √öltimos 30 d√≠as de Prueba',
            'üìä Residuales - Conjunto de Entrenamiento',
            'üìä Residuales - Conjunto de Prueba',
            'üìà Componentes del Modelo',
            '‚ö° Distribuci√≥n de Errores'
        ],
        specs=[[{"colspan": 2}, None],
               [{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]]
    )
    
    # 1. Serie temporal completa
    fig.add_trace(
        go.Scatter(x=df_train['ds'], y=df_train['y'], 
                  name='Datos Reales (Train)', line=dict(color='blue')),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=df_test['ds'], y=df_test['y'], 
                  name='Datos Reales (Test)', line=dict(color='darkblue')),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=forecast_train['ds'], y=forecast_train['yhat1'], 
                  name='Predicciones (Train)', line=dict(color='red', width=1)),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=forecast_test['ds'], y=forecast_test['yhat1'], 
                  name='Predicciones (Test)', line=dict(color='orange', width=2)),
        row=1, col=1
    )
    
    # Intervalos de confianza
    if 'yhat1_lower' in forecast_test.columns and 'yhat1_upper' in forecast_test.columns:
        fig.add_trace(
            go.Scatter(x=forecast_test['ds'], y=forecast_test['yhat1_upper'], 
                      fill=None, mode='lines', line_color='rgba(0,0,0,0)', 
                      showlegend=False),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(x=forecast_test['ds'], y=forecast_test['yhat1_lower'], 
                      fill='tonexty', mode='lines', line_color='rgba(0,0,0,0)', 
                      name='Intervalo Confianza', fillcolor='rgba(255,165,0,0.2)'),
            row=1, col=1
        )
    
    # 2. Zoom a √∫ltimos 30 d√≠as de prueba
    last_30_days = df_test.tail(720)  # 30 d√≠as * 24 horas
    forecast_30_days = forecast_test.tail(720)
    
    fig.add_trace(
        go.Scatter(x=last_30_days['ds'], y=last_30_days['y'], 
                  name='Real (30d)', line=dict(color='blue', width=3)),
        row=2, col=1
    )
    fig.add_trace(
        go.Scatter(x=forecast_30_days['ds'], y=forecast_30_days['yhat1'], 
                  name='Predicci√≥n (30d)', line=dict(color='red', width=2)),
        row=2, col=1
    )
    
    # 3. Residuales de entrenamiento
    train_residuals = df_train['y'].values - forecast_train['yhat1'].values
    fig.add_trace(
        go.Scatter(x=df_train['ds'], y=train_residuals, 
                  mode='markers', name='Residuales Train', 
                  marker=dict(color='green', size=2, opacity=0.6)),
        row=2, col=2
    )
    fig.add_hline(y=0, line_dash="dash", line_color="black", row=2, col=2)
    
    # 4. Residuales de prueba
    test_residuals = df_test['y'].values - forecast_test['yhat1'].values
    fig.add_trace(
        go.Scatter(x=df_test['ds'], y=test_residuals, 
                  mode='markers', name='Residuales Test', 
                  marker=dict(color='red', size=3, opacity=0.7)),
        row=3, col=1
    )
    fig.add_hline(y=0, line_dash="dash", line_color="black", row=3, col=1)
    
    # 5. Distribuci√≥n de errores
    fig.add_trace(
        go.Histogram(x=test_residuals, name='Dist. Errores', 
                    nbinsx=30, marker_color='lightcoral'),
        row=3, col=2
    )
    
    # Actualizar layout
    fig.update_layout(
        height=1200,
        title_text="üöÄ SENTINEL - Resultados del Modelo NeuralProphet",
        showlegend=True
    )
    
    # Etiquetas de ejes
    fig.update_yaxes(title_text="Presi√≥n (PSI)", row=1, col=1)
    fig.update_yaxes(title_text="Presi√≥n (PSI)", row=2, col=1)
    fig.update_yaxes(title_text="Residuales", row=2, col=2)
    fig.update_yaxes(title_text="Residuales", row=3, col=1)
    fig.update_yaxes(title_text="Frecuencia", row=3, col=2)
    
    return fig

# Generar visualizaci√≥n completa
results_fig = plot_comprehensive_results(df_train, df_test, forecast_train, forecast_test, model)
results_fig.show()

# === COMPONENTES DEL MODELO ===
print("üìà Analizando componentes del modelo...")

# Generar componentes
components = model.predict_components(df_train[['ds', 'y'] + key_regressors])

# Plotear componentes
fig_components = model.plot_components(
    forecast.iloc[:len(df_train)],  # Solo datos de entrenamiento para componentes
    components=components
)
fig_components.show()

# =============================================================================
# 8. AN√ÅLISIS DE IMPORTANCIA DE VARIABLES
# =============================================================================

def analyze_feature_importance(model, regressors):
    """
    Analiza la importancia de las variables en el modelo
    """
    print("üîç Analizando importancia de variables...")
    
    # Obtener par√°metros del modelo
    params = model.model.named_parameters()
    
    print("\nüìä Variables incluidas en el modelo:")
    print(f"  ‚Ä¢ Variable objetivo: y (presi√≥n)")
    print(f"  ‚Ä¢ Lags autorregresivos: {model.config_lagged_regressors.get('y', {}).get('n_lags', 0)}")
    
    for regressor in regressors:
        n_lags = model.config_lagged_regressors.get(regressor, {}).get('n_lags', 0)
        print(f"  ‚Ä¢ {regressor}: {n_lags} lags")
    
    return params

# Analizar importancia
importance_analysis = analyze_feature_importance(model, key_regressors)

# =============================================================================
# 9. PREDICCIONES FUTURAS
# =============================================================================

def generate_future_predictions(model, df_full, periods=168, regressors=None):
    """
    Genera predicciones futuras (168 horas = 1 semana)
    
    Par√°metros:
    - model: modelo entrenado
    - df_full: datos completos
    - periods: per√≠odos a predecir
    - regressors: variables ex√≥genas
    
    Retorna:
    - DataFrame con predicciones futuras
    """
    
    print(f"üîÆ Generando predicciones futuras para {periods} horas ({periods//24} d√≠as)...")
    
    # Crear fechas futuras
    last_date = df_full['ds'].max()
    future_dates = pd.date_range(
        start=last_date + timedelta(hours=1),
        periods=periods,
        freq='H'
    )
    
    # Crear DataFrame futuro
    df_future = pd.DataFrame({'ds': future_dates})
    
    # Si hay variables ex√≥genas, necesitamos proyectarlas
    if regressors:
        print("‚ö†Ô∏è  Proyectando variables ex√≥genas usando tendencias hist√≥ricas...")
        
        for regressor in regressors:
            # Usar los √∫ltimos valores como base y agregar ruido controlado
            last_values = df_full[regressor].tail(24).values  # √öltimas 24 horas
            mean_val = np.mean(last_values)
            std_val = np.std(last_values)
            
            # Generar valores futuros con patr√≥n similar
            if regressor == 'temperature':
                # Temperatura con patr√≥n diario
                future_vals = mean_val + 3 * np.sin(2 * np.pi * np.arange(periods) / 24)
                future_vals += np.random.normal(0, std_val * 0.5, periods)
            elif regressor == 'flow':
                # Flujo relativamente estable con peque√±as variaciones
                future_vals = np.full(periods, mean_val) + np.random.normal(0, std_val * 0.3, periods)
            elif regressor == 'vibration':
                # Vibraci√≥n con tendencia ascendente leve (desgaste)
                trend = np.linspace(0, 0.5, periods)
                future_vals = mean_val + trend + np.random.normal(0, std_val * 0.2, periods)
            else:
                # Otros regresores: valores estables
                future_vals = np.full(periods, mean_val) + np.random.normal(0, std_val * 0.3, periods)
            
            df_future[regressor] = future_vals
    
    # Combinar datos hist√≥ricos y futuros para predicci√≥n
    df_predict = pd.concat([
        df_full[['ds', 'y'] + (regressors or [])],
        df_future
    ], ignore_index=True)
    
    # Generar predicciones
    future_forecast = model.predict(df_predict)
    
    # Extraer solo las predicciones futuras
    future_predictions = future_forecast.iloc[-periods:].copy()
    
    return future_predictions, df_future

# Generar predicciones futuras
future_predictions, df_future = generate_future_predictions(
    model, df_raw, periods=168, regressors=key_regressors
)

print("‚úÖ Predicciones futuras generadas!")
print(f"üìÖ Per√≠odo futuro: {future_predictions['ds'].min()} a {future_predictions['ds'].max()}")

# Visualizar predicciones futuras
fig_future = go.Figure()

# Datos hist√≥ricos (√∫ltimos 30 d√≠as)
recent_data = df_raw.tail(720)
fig_future.add_trace(
    go.Scatter(x=recent_data['ds'], y=recent_data['y'], 
              name='Datos Hist√≥ricos', line=dict(color='blue', width=2))
)

# Predicciones futuras
fig_future.add_trace(
    go.Scatter(x=future_predictions['ds'], y=future_predictions['yhat1'], 
              name='Predicciones Futuras', line=dict(color='red', width=3))
)

# Intervalos de confianza futuros (si est√°n disponibles)
if 'yhat1_lower' in future_predictions.columns and 'yhat1_upper' in future_predictions.columns:
    fig_future.add_trace(
        go.Scatter(x=future_predictions['ds'], y=future_predictions['yhat1_upper'], 
                  fill=None, mode='lines', line_color='rgba(0,0,0,0)', 
                  showlegend=False)
    )
    fig_future.add_trace(
        go.Scatter(x=future_predictions['ds'], y=future_predictions['yhat1_lower'], 
                  fill='tonexty', mode='lines', line_color='rgba(0,0,0,0)', 
                  name='Intervalo de Confianza', fillcolor='rgba(255,0,0,0.2)')
    )

# L√≠nea divisoria
last_historical_date = df_raw['ds'].max()
fig_future.add_vline(x=last_historical_date, line_dash="dash", 
                    line_color="green", line_width=2,
                    annotation_text="Inicio Predicciones")

fig_future.update_layout(
    title="üöÄ SENTINEL - Predicciones Futuras (1 Semana)",
    xaxis_title="Fecha",
    yaxis_title="Presi√≥n (PSI)",
    height=600,
    showlegend=True
)

fig_future.show()

# =============================================================================
# 10. DETECCI√ìN DE ANOMAL√çAS Y ALERTAS
# =============================================================================

def detect_anomalies_and_alerts(df_test, forecast_test, threshold_factor=2.5):
    """
    Detecta anomal√≠as y genera alertas basadas en los residuales del modelo
    
    Par√°metros:
    - df_test: datos de prueba reales
    - forecast_test: predicciones de prueba
    - threshold_factor: factor para definir umbral de anomal√≠a
    
    Retorna:
    - DataFrame con anomal√≠as detectadas
    """
    
    print("üö® Detectando anomal√≠as y generando alertas...")
    
    # Calcular residuales
    residuals = df_test['y'].values - forecast_test['yhat1'].values
    
    # Calcular umbrales estad√≠sticos
    residual_mean = np.mean(residuals)
    residual_std = np.std(residuals)
    
    upper_threshold = residual_mean + threshold_factor * residual_std
    lower_threshold = residual_mean - threshold_factor * residual_std
    
    # Detectar anomal√≠as
    anomalies = []
    
    for i, (_, row) in enumerate(df_test.iterrows()):
        residual = residuals[i]
        prediction = forecast_test.iloc[i]['yhat1']
        actual = row['y']
        
        if residual > upper_threshold or residual < lower_threshold:
            severity = "CR√çTICA" if abs(residual) > threshold_factor * 1.5 * residual_std else "ALTA"
            
            anomaly = {
                'timestamp': row['ds'],
                'valor_real': actual,
                'valor_predicho': prediction,
                'residual': residual,
                'severidad': severity,
                'tipo': 'ALTA' if residual > upper_threshold else 'BAJA',
                'descripcion': f"Presi√≥n {'superior' if residual > 0 else 'inferior'} a lo esperado"
            }
            anomalies.append(anomaly)
    
    df_anomalies = pd.DataFrame(anomalies)
    
    print(f"üéØ Resultados de detecci√≥n de anomal√≠as:")
    print(f"  ‚Ä¢ Total de anomal√≠as detectadas: {len(df_anomalies)}")
    print(f"  ‚Ä¢ Umbral superior: +{upper_threshold:.2f}")
    print(f"  ‚Ä¢ Umbral inferior: {lower_threshold:.2f}")
    
    if len(df_anomalies) > 0:
        print(f"\n‚ö†Ô∏è  Distribuci√≥n por severidad:")
        severity_counts = df_anomalies['severidad'].value_counts()
        for severity, count in severity_counts.items():
            print(f"    ‚Ä¢ {severity}: {count} eventos")
        
        print(f"\nüîç Primeras 5 anomal√≠as detectadas:")
        for _, anomaly in df_anomalies.head().iterrows():
            print(f"    ‚Ä¢ {anomaly['timestamp']}: {anomaly['descripcion']} "
                  f"(Real: {anomaly['valor_real']:.2f}, Pred: {anomaly['valor_predicho']:.2f})")
    
    return df_anomalies, upper_threshold, lower_threshold

# Detectar anomal√≠as
df_anomalies, upper_thresh, lower_thresh = detect_anomalies_and_alerts(
    df_test, forecast_test, threshold_factor=2.5
)

# Visualizar anomal√≠as
def plot_anomalies(df_test, forecast_test, df_anomalies, upper_thresh, lower_thresh):
    """Visualizar anomal√≠as detectadas"""
    
    fig = make_subplots(
        rows=2, cols=1,
        subplot_titles=['üéØ Serie Temporal con Anomal√≠as Detectadas', 
                       'üìä Residuales y Umbrales de Detecci√≥n'],
        vertical_spacing=0.1
    )
    
    # Serie temporal con anomal√≠as
    fig.add_trace(
        go.Scatter(x=df_test['ds'], y=df_test['y'], 
                  name='Valores Reales', line=dict(color='blue')),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=forecast_test['ds'], y=forecast_test['yhat1'], 
                  name='Predicciones', line=dict(color='red', dash='dot')),
        row=1, col=1
    )
    
    # Marcar anomal√≠as
    if len(df_anomalies) > 0:
        anomalies_critical = df_anomalies[df_anomalies['severidad'] == 'CR√çTICA']
        anomalies_high = df_anomalies[df_anomalies['severidad'] == 'ALTA']
        
        if len(anomalies_critical) > 0:
            fig.add_trace(
                go.Scatter(x=anomalies_critical['timestamp'], 
                          y=anomalies_critical['valor_real'],
                          mode='markers', name='Anomal√≠as CR√çTICAS',
                          marker=dict(color='red', size=12, symbol='x')),
                row=1, col=1
            )
        
        if len(anomalies_high) > 0:
            fig.add_trace(
                go.Scatter(x=anomalies_high['timestamp'], 
                          y=anomalies_high['valor_real'],
                          mode='markers', name='Anomal√≠as ALTAS',
                          marker=dict(color='orange', size=8, symbol='triangle-up')),
                row=1, col=1
            )
    
    # Residuales con umbrales
    residuals = df_test['y'].values - forecast_test['yhat1'].values
    fig.add_trace(
        go.Scatter(x=df_test['ds'], y=residuals, 
                  mode='markers', name='Residuales',
                  marker=dict(color='gray', size=4, opacity=0.6)),
        row=2, col=1
    )
    
    # Umbrales
    fig.add_hline(y=upper_thresh, line_dash="dash", line_color="red", 
                 annotation_text="Umbral Superior", row=2, col=1)
    fig.add_hline(y=lower_thresh, line_dash="dash", line_color="red", 
                 annotation_text="Umbral Inferior", row=2, col=1)
    fig.add_hline(y=0, line_color="black", line_width=1, row=2, col=1)
    
    fig.update_layout(
        height=800,
        title_text="üö® SENTINEL - Detecci√≥n de Anomal√≠as",
        showlegend=True
    )
    
    fig.update_yaxes(title_text="Presi√≥n (PSI)", row=1, col=1)
    fig.update_yaxes(title_text="Residuales", row=2, col=1)
    fig.update_xaxes(title_text="Fecha", row=2, col=1)
    
    return fig

if len(df_anomalies) > 0:
    anomalies_fig = plot_anomalies(df_test, forecast_test, df_anomalies, upper_thresh, lower_thresh)
    anomalies_fig.show()

# =============================================================================
# 11. RESUMEN EJECUTIVO Y M√âTRICAS FINALES
# =============================================================================

def generate_executive_summary(train_metrics, test_metrics, df_anomalies, df_raw):
    """
    Genera un resumen ejecutivo de los resultados del modelo SENTINEL
    """
    
    print("\n" + "="*80)
    print("üöÄ SENTINEL - RESUMEN EJECUTIVO DEL MODELO NEURALPROPHET")
    print("="*80)
    
    print(f"\nüìä INFORMACI√ìN DEL DATASET:")
    print(f"    ‚Ä¢ Total de registros: {len(df_raw):,}")
    print(f"    ‚Ä¢ Per√≠odo analizado: {df_raw['ds'].min()} a {df_raw['ds'].max()}")
    print(f"    ‚Ä¢ Frecuencia: Horaria")
    print(f"    ‚Ä¢ Variables predictoras: Presi√≥n (objetivo), Temperatura, Flujo, Vibraci√≥n")
    
    print(f"\nüéØ DESEMPE√ëO DEL MODELO:")
    print(f"    üìà ENTRENAMIENTO:")
    print(f"        ‚Ä¢ MAE (Error Absoluto Medio): {train_metrics['MAE']:.4f} PSI")
    print(f"        ‚Ä¢ RMSE (Error Cuadr√°tico Medio): {train_metrics['RMSE']:.4f} PSI")
    print(f"        ‚Ä¢ MAPE (Error Porcentual): {train_metrics['MAPE']:.2f}%")
    print(f"        ‚Ä¢ R¬≤ (Coeficiente Determinaci√≥n): {train_metrics['R¬≤']:.4f}")
    
    print(f"    üß™ PRUEBA:")
    print(f"        ‚Ä¢ MAE (Error Absoluto Medio): {test_metrics['MAE']:.4f} PSI")
    print(f"        ‚Ä¢ RMSE (Error Cuadr√°tico Medio): {test_metrics['RMSE']:.4f} PSI")
    print(f"        ‚Ä¢ MAPE (Error Porcentual): {test_metrics['MAPE']:.2f}%")
    print(f"        ‚Ä¢ R¬≤ (Coeficiente Determinaci√≥n): {test_metrics['R¬≤']:.4f}")
    
    # Evaluaci√≥n de calidad
    if test_metrics['MAPE'] < 5:
        quality = "EXCELENTE üåü"
    elif test_metrics['MAPE'] < 10:
        quality = "BUENA ‚úÖ"
    elif test_metrics['MAPE'] < 15:
        quality = "ACEPTABLE ‚ö†Ô∏è"
    else:
        quality = "NECESITA MEJORA ‚ùå"
    
    print(f"    üèÜ CALIDAD DEL MODELO: {quality}")
    
    print(f"\nüö® DETECCI√ìN DE ANOMAL√çAS:")
    if len(df_anomalies) > 0:
        print(f"    ‚Ä¢ Total de anomal√≠as detectadas: {len(df_anomalies)}")
        severity_counts = df_anomalies['severidad'].value_counts()
        for severity, count in severity_counts.items():
            print(f"    ‚Ä¢ {severity}: {count} eventos")
        
        # Calcular tasa de anomal√≠as
        anomaly_rate = (len(df_anomalies) / len(df_test)) * 100
        print(f"    ‚Ä¢ Tasa de anomal√≠as: {anomaly_rate:.2f}%")
    else:
        print(f"    ‚Ä¢ No se detectaron anomal√≠as significativas")
    
    print(f"\nüîß CONFIGURACI√ìN DEL MODELO:")
    print(f"    ‚Ä¢ Algoritmo: NeuralProphet")
    print(f"    ‚Ä¢ Lags autorregresivos: 48 horas")
    print(f"    ‚Ä¢ Variables ex√≥genas: Temperatura, Flujo, Vibraci√≥n")
    print(f"    ‚Ä¢ Estacionalidades: Diaria, Semanal, Anual")
    print(f"    ‚Ä¢ √âpocas de entrenamiento: 100")
    
    print(f"\n‚úÖ FORTALEZAS DEL MODELO:")
    print(f"    ‚Ä¢ Captura patrones estacionales complejos")
    print(f"    ‚Ä¢ Integra m√∫ltiples variables ex√≥genas")
    print(f"    ‚Ä¢ Genera intervalos de confianza")
    print(f"    ‚Ä¢ Detecci√≥n autom√°tica de anomal√≠as")
    print(f"    ‚Ä¢ Escalable para producci√≥n")
    
    print(f"\n‚ö†Ô∏è  √ÅREAS DE MEJORA:")
    print(f"    ‚Ä¢ Validaci√≥n con datos reales de sensores")
    print(f"    ‚Ä¢ Optimizaci√≥n de hiperpar√°metros")
    print(f"    ‚Ä¢ Incorporaci√≥n de m√°s variables contextuales")
    print(f"    ‚Ä¢ Evaluaci√≥n en diferentes escenarios operativos")
    

    
    print("="*80)
    
    return {
        'model_quality': quality,
        'test_mape': test_metrics['MAPE'],
        'anomalies_detected': len(df_anomalies),
        'anomaly_rate': (len(df_anomalies) / len(df_test)) * 100 if len(df_anomalies) > 0 else 0
    }

# Generar resumen ejecutivo
executive_summary = generate_executive_summary(train_metrics, test_metrics, df_anomalies, df_raw)

# =============================================================================
# 12. EXPORTACI√ìN DE RESULTADOS Y MODELO
# =============================================================================

def export_results_and_model(model, forecast, df_anomalies, metrics):
    """
    Exporta resultados y guarda el modelo para uso posterior
    """
    
    print("\nüíæ Exportando resultados...")
    
    # Crear directorio de resultados
    import os
    results_dir = "sentinel_results"
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    
    # Exportar predicciones
    forecast_export = forecast[['ds', 'yhat1']].copy()
    forecast_export.columns = ['timestamp', 'predicted_pressure']
    forecast_export.to_csv(f"{results_dir}/sentinel_predictions.csv", index=False)
    
    # Exportar anomal√≠as
    if len(df_anomalies) > 0:
        df_anomalies.to_csv(f"{results_dir}/sentinel_anomalies.csv", index=False)
    
    # Crear reporte de m√©tricas
    metrics_report = {
        'model_name': 'NeuralProphet_SENTINEL_Baseline',
        'training_date': datetime.now().isoformat(),
        'train_metrics': train_metrics,
        'test_metrics': test_metrics,
        'anomalies_count': len(df_anomalies)
    }
    
    import json
    with open(f"{results_dir}/sentinel_metrics_report.json", 'w') as f:
        json.dump(metrics_report, f, indent=2, default=str)
    
    # Guardar modelo (serialize)
    # Nota: NeuralProphet usa PyTorch internamente
    try:
        import torch
        torch.save(model.model.state_dict(), f"{results_dir}/sentinel_model_weights.pth")
        
        # Guardar configuraci√≥n del modelo
        model_config = {
            'n_lags': 48,
            'regressors': key_regressors,
            'seasonalities': ['yearly', 'weekly', 'daily'],
            'training_params': {
                'epochs': 100,
                'batch_size': 64,
                'learning_rate': 0.01
            }
        }
        
        with open(f"{results_dir}/sentinel_model_config.json", 'w') as f:
            json.dump(model_config, f, indent=2)
            
        print(f"‚úÖ Modelo guardado en: {results_dir}/")
        
    except Exception as e:
        print(f"‚ö†Ô∏è  Error al guardar modelo: {e}")
    
    print(f"üìÅ Archivos exportados en directorio: {results_dir}/")
    print(f"    ‚Ä¢ sentinel_predictions.csv")
    print(f"    ‚Ä¢ sentinel_anomalies.csv")
    print(f"    ‚Ä¢ sentinel_metrics_report.json")
    print(f"    ‚Ä¢ sentinel_model_weights.pth")
    print(f"    ‚Ä¢ sentinel_model_config.json")
    
    return results_dir

# Exportar resultados
results_directory = export_results_and_model(model, forecast, df_anomalies, test_metrics)

# =============================================================================
# 13. FUNCI√ìN PARA PREDICCI√ìN EN TIEMPO REAL
# =============================================================================

def create_realtime_prediction_function(model, regressors):
    """
    Crea una funci√≥n optimizada para predicci√≥n en tiempo real
    que puede ser integrada con la API de SENTINEL
    """
    
    def predict_next_values(recent_data, hours_ahead=24):
        """
        Funci√≥n para predicci√≥n en tiempo real
        
        Par√°metros:
        - recent_data: DataFrame con datos recientes (m√≠nimo 48 horas)
        - hours_ahead: horas a predecir hacia adelante
        
        Retorna:
        - DataFrame con predicciones
        """
        
        try:
            # Validar datos de entrada
            required_cols = ['ds', 'y'] + regressors
            if not all(col in recent_data.columns for col in required_cols):
                raise ValueError(f"Faltan columnas requeridas: {required_cols}")
            
            if len(recent_data) < 48:
                raise ValueError("Se requieren al menos 48 horas de datos hist√≥ricos")
            
            # Preparar datos para predicci√≥n
            df_predict = recent_data[required_cols].copy()
            df_predict = df_predict.sort_values('ds').reset_index(drop=True)
            
            # Generar predicciones
            predictions = model.predict(df_predict)
            
            # Extraer √∫ltimas predicciones (futuras)
            future_predictions = predictions.tail(hours_ahead)
            
            # Formatear salida
            result = future_predictions[['ds', 'yhat1']].copy()
            result.columns = ['timestamp', 'predicted_pressure']
            result['confidence_lower'] = future_predictions.get('yhat1_lower', None)
            result['confidence_upper'] = future_predictions.get('yhat1_upper', None)
            
            return result.to_dict('records')
            
        except Exception as e:
            return {'error': str(e)}
    
    return predict_next_values

# Crear funci√≥n de predicci√≥n en tiempo real
realtime_predictor = create_realtime_prediction_function(model, key_regressors)

print("\nüîß Funci√≥n de predicci√≥n en tiempo real creada!")
print("üí° Ejemplo de uso:")
print("""
# Datos recientes de sensores
recent_sensor_data = df_raw.tail(72)  # √öltimas 72 horas

# Generar predicci√≥n para pr√≥ximas 24 horas
predictions = realtime_predictor(recent_sensor_data, hours_ahead=24)

# Resultado: lista de diccionarios con timestamp y predicted_pressure
""")

# =============================================================================
# FINALIZACI√ìN DEL NOTEBOOK
# =============================================================================

print("\n" + "üéâ" * 20)
print("‚úÖ NOTEBOOK SENTINEL - NEURALPROPHET COMPLETADO EXITOSAMENTE!")
print("üéâ" * 20)

print(f"\nüìã RESUMEN FINAL:")
print(f"    ‚Ä¢ Modelo baseline establecido con NeuralProphet")
print(f"    ‚Ä¢ MAPE en prueba: {test_metrics['MAPE']:.2f}%")
print(f"    ‚Ä¢ Anomal√≠as detectadas: {len(df_anomalies)}")
print(f"    ‚Ä¢ Funci√≥n de predicci√≥n en tiempo real creada")
print(f"    ‚Ä¢ Resultados exportados en: {results_directory}/")

print(f"\nüöÄ SIGUIENTE FASE:")
print(f"    ‚Ä¢ Integrar con API REST de SENTINEL")
print(f"    ‚Ä¢ Desarrollar dashboard de visualizaci√≥n")
print(f"    ‚Ä¢ Implementar sistema de alertas autom√°ticas")
print(f"    ‚Ä¢ Evaluar modelo TFT personalizado para comparaci√≥n")

print(f"\nüìß Para dudas o mejoras, contactar al equipo de desarrollo de SENTINEL")
print("="*80)