In [2]:
# ============================================================================
# STEP 1: PROPER DATA LOADING & INITIAL EXPLORATION
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

def load_and_explore_data():
    """
    Load all revenue center data and perform comprehensive exploration
    WITHOUT any premature transformations or outlier removal
    """
    print("="*80)
    print("STEP 1: LOADING AND EXPLORING RAW DATA")
    print("="*80)
    
    # Load all revenue centers
    revenue_centers = []
    for i in range(1, 10):
        df = pd.read_csv(f'../revenue_center_data/RevenueCenter_{i}_data.csv')
        df['RevenueCenterID'] = i
        revenue_centers.append(df)
    
    # Combine all revenue centers
    df_all = pd.concat(revenue_centers, ignore_index=True)
    df_all['Date'] = pd.to_datetime(df_all['Date'])
    
    print(f"✓ Loaded data for {len(revenue_centers)} revenue centers")
    print(f"✓ Total records: {len(df_all):,}")
    print(f"✓ Date range: {df_all['Date'].min()} to {df_all['Date'].max()}")
    print(f"✓ Total days: {(df_all['Date'].max() - df_all['Date'].min()).days + 1}")
    
    # Basic data quality checks
    print(f"\n📊 Data Quality Overview:")
    print(f"  Missing values: {df_all.isnull().sum().sum()}")
    print(f"  Duplicate records: {df_all.duplicated().sum()}")
    print(f"  Zero revenue records: {(df_all['CheckTotal'] == 0).sum()}")
    print(f"  Negative revenue records: {(df_all['CheckTotal'] < 0).sum()}")
    
    # Revenue distribution analysis (HONEST - no manipulation)
    print(f"\n💰 Revenue Distribution (RAW - NO MANIPULATION):")
    print(f"  Overall range: ${df_all['CheckTotal'].min():.2f} - ${df_all['CheckTotal'].max():.2f}")
    print(f"  Mean: ${df_all['CheckTotal'].mean():.2f}")
    print(f"  Median: ${df_all['CheckTotal'].median():.2f}")
    print(f"  Std: ${df_all['CheckTotal'].std():.2f}")
    
    # Per meal period analysis
    print(f"\n🍽️ By Meal Period (RAW):")
    for meal in ['Breakfast', 'Dinner', 'Lunch']:
        meal_data = df_all[df_all['MealPeriod'] == meal]['CheckTotal']
        print(f"  {meal}:")
        print(f"    Range: ${meal_data.min():.2f} - ${meal_data.max():.2f}")
        print(f"    Mean: ${meal_data.mean():.2f}, CV: {meal_data.std()/meal_data.mean():.3f}")
    
    # Temporal patterns
    print(f"\n📅 Temporal Patterns:")
    df_all['DayOfYear'] = df_all['Date'].dt.dayofyear
    df_all['WeekOfYear'] = df_all['Date'].dt.isocalendar().week
    
    # Check for seasonal patterns
    monthly_revenue = df_all.groupby(df_all['Date'].dt.month)['CheckTotal'].agg(['mean', 'std'])
    print(f"  Monthly revenue variation (CV): {(monthly_revenue['std'] / monthly_revenue['mean']).mean():.3f}")
    
    return df_all

# Execute data loading
df_raw = load_and_explore_data()

STEP 1: LOADING AND EXPLORING RAW DATA
✓ Loaded data for 9 revenue centers
✓ Total records: 13,122
✓ Date range: 2023-01-01 00:00:00 to 2024-04-30 00:00:00
✓ Total days: 486

📊 Data Quality Overview:
  Missing values: 0
  Duplicate records: 0
  Zero revenue records: 4108
  Negative revenue records: 0

💰 Revenue Distribution (RAW - NO MANIPULATION):
  Overall range: $0.00 - $138400.00
  Mean: $2499.77
  Median: $225.00
  Std: $6740.14

🍽️ By Meal Period (RAW):
  Breakfast:
    Range: $0.00 - $35640.00
    Mean: $2202.28, CV: 2.595
  Dinner:
    Range: $0.00 - $138400.00
    Mean: $4029.75, CV: 2.197
  Lunch:
    Range: $0.00 - $100760.00
    Mean: $1267.29, CV: 3.645

📅 Temporal Patterns:
  Monthly revenue variation (CV): 2.526


In [3]:
# ============================================================================
# STEP 2: PROPER TEMPORAL TRAIN-TEST SPLIT (NO DATA LEAKAGE)
# ============================================================================

def create_proper_temporal_split(df, test_months=3):
    """
    Create proper temporal split for time series forecasting
    CRITICAL: No future data in training, no overlap between train/test
    """
    print("="*80)
    print("STEP 2: PROPER TEMPORAL TRAIN-TEST SPLIT")
    print("="*80)
    
    # Sort by date to ensure temporal order
    df_sorted = df.sort_values(['Date', 'RevenueCenterID', 'MealPeriod']).reset_index(drop=True)
    
    # Calculate split date (last N months for testing)
    max_date = df_sorted['Date'].max()
    split_date = max_date - pd.DateOffset(months=test_months)
    
    print(f"📅 Temporal Split Strategy:")
    print(f"  Training period: {df_sorted['Date'].min()} to {split_date}")
    print(f"  Testing period: {split_date + pd.Timedelta(days=1)} to {max_date}")
    print(f"  Test period: {test_months} months ({(max_date - split_date).days} days)")
    
    # Create temporal split
    train_data = df_sorted[df_sorted['Date'] <= split_date].copy()
    test_data = df_sorted[df_sorted['Date'] > split_date].copy()
    
    print(f"\n✅ Split Results:")
    print(f"  Training records: {len(train_data):,}")
    print(f"  Testing records: {len(test_data):,}")
    print(f"  Training days: {train_data['Date'].nunique()}")
    print(f"  Testing days: {test_data['Date'].nunique()}")
    
    # Verify no temporal leakage
    assert train_data['Date'].max() < test_data['Date'].min(), "❌ TEMPORAL LEAKAGE DETECTED!"
    print(f"  ✅ No temporal leakage confirmed")
    
    # Check data balance
    train_revenue = train_data['CheckTotal'].sum()
    test_revenue = test_data['CheckTotal'].sum()
    print(f"  Training revenue: ${train_revenue:,.2f}")
    print(f"  Testing revenue: ${test_revenue:,.2f}")
    print(f"  Revenue ratio (test/train): {test_revenue/train_revenue:.3f}")
    
    return train_data, test_data, split_date

# Execute temporal split
train_df, test_df, split_date = create_proper_temporal_split(df_raw, test_months=3)

STEP 2: PROPER TEMPORAL TRAIN-TEST SPLIT
📅 Temporal Split Strategy:
  Training period: 2023-01-01 00:00:00 to 2024-01-30 00:00:00
  Testing period: 2024-01-31 00:00:00 to 2024-04-30 00:00:00
  Test period: 3 months (91 days)

✅ Split Results:
  Training records: 10,665
  Testing records: 2,457
  Training days: 395
  Testing days: 91
  ✅ No temporal leakage confirmed
  Training revenue: $23,956,016.15
  Testing revenue: $8,846,026.55
  Revenue ratio (test/train): 0.369


In [4]:
# ============================================================================
# STEP 3: CONSERVATIVE FEATURE ENGINEERING (NO DATA LEAKAGE)
# ============================================================================

def create_robust_features(train_df, test_df):
    """
    Create features using ONLY training data statistics
    Apply same transformations to test data (no leakage)
    """
    print("="*80)
    print("STEP 3: ROBUST FEATURE ENGINEERING")
    print("="*80)
    
    def engineer_features(df, is_training=True, train_stats=None):
        """Engineer features for a dataframe"""
        df_features = df.copy()
        
        # 1. TEMPORAL FEATURES (no leakage)
        df_features['Month_sin'] = np.sin(2 * np.pi * df_features['Month'] / 12)
        df_features['Month_cos'] = np.cos(2 * np.pi * df_features['Month'] / 12)
        df_features['DayOfWeek_sin'] = np.sin(2 * np.pi * df_features['DayOfWeek'] / 7)
        df_features['DayOfWeek_cos'] = np.cos(2 * np.pi * df_features['DayOfWeek'] / 7)
        
        # 2. LAG FEATURES (using only past data)
        if is_training:
            # Calculate lag statistics from training data only
            lag_stats = {}
            for center in df_features['RevenueCenterID'].unique():
                for meal in df_features['MealPeriod'].unique():
                    mask = (df_features['RevenueCenterID'] == center) & (df_features['MealPeriod'] == meal)
                    center_meal_data = df_features[mask].sort_values('Date')
                    
                    # 7-day and 30-day rolling averages (using only past data)
                    center_meal_data['Revenue_7d_avg'] = center_meal_data['CheckTotal'].rolling(window=7, min_periods=1).mean().shift(1)
                    center_meal_data['Revenue_30d_avg'] = center_meal_data['CheckTotal'].rolling(window=30, min_periods=1).mean().shift(1)
                    
                    # Store statistics for test data
                    lag_stats[(center, meal)] = {
                        'mean_7d': center_meal_data['Revenue_7d_avg'].mean(),
                        'mean_30d': center_meal_data['Revenue_30d_avg'].mean()
                    }
                    
                    # Update main dataframe
                    df_features.loc[mask, 'Revenue_7d_avg'] = center_meal_data['Revenue_7d_avg'].fillna(center_meal_data['CheckTotal'].mean())
                    df_features.loc[mask, 'Revenue_30d_avg'] = center_meal_data['Revenue_30d_avg'].fillna(center_meal_data['CheckTotal'].mean())
            
            return df_features, lag_stats
        else:
            # Apply training statistics to test data
            for center in df_features['RevenueCenterID'].unique():
                for meal in df_features['MealPeriod'].unique():
                    mask = (df_features['RevenueCenterID'] == center) & (df_features['MealPeriod'] == meal)
                    if (center, meal) in train_stats:
                        df_features.loc[mask, 'Revenue_7d_avg'] = train_stats[(center, meal)]['mean_7d']
                        df_features.loc[mask, 'Revenue_30d_avg'] = train_stats[(center, meal)]['mean_30d']
                    else:
                        # Fallback for missing combinations
                        df_features.loc[mask, 'Revenue_7d_avg'] = df_features.loc[mask, 'CheckTotal'].mean()
                        df_features.loc[mask, 'Revenue_30d_avg'] = df_features.loc[mask, 'CheckTotal'].mean()
            
            return df_features
    
    # Engineer features for training data
    print("🔧 Engineering features for training data...")
    train_features, lag_stats = engineer_features(train_df, is_training=True)
    
    # Apply same transformations to test data (no leakage)
    print("🔧 Applying same transformations to test data...")
    test_features = engineer_features(test_df, is_training=False, train_stats=lag_stats)
    
    # 3. ONE-HOT ENCODING (consistent across train/test)
    print("🏷️ Creating one-hot encodings...")
    
    # Get all possible values from training data
    categorical_cols = ['MealPeriod', 'IslamicPeriod', 'TourismIntensity', 'RevenueImpact']
    
    for col in categorical_cols:
        # Get unique values from training data only
        unique_values = train_features[col].unique()
        
        # Create one-hot encoding for both datasets
        for value in unique_values:
            new_col = f"{col}_{value}"
            train_features[new_col] = (train_features[col] == value).astype(int)
            test_features[new_col] = (test_features[col] == value).astype(int)
    
    # 4. FEATURE SELECTION (based on domain knowledge)
    feature_columns = [
        # Temporal features
        'Month_sin', 'Month_cos', 'DayOfWeek_sin', 'DayOfWeek_cos',
        # Event features (most important)
        'IsRamadan', 'IsEid', 'IsPreRamadan', 'IsPostRamadan', 'IsLast10Ramadan',
        'IsDSF', 'IsSummerEvent', 'IsNationalDay', 'IsNewYear', 'IsMarathon',
        'IsGITEX', 'IsFoodFestival', 'IsPreEvent', 'IsPostEvent',
        # Lag features
        'Revenue_7d_avg', 'Revenue_30d_avg',
        # One-hot encoded features
    ] + [col for col in train_features.columns if any(cat in col for cat in categorical_cols) and col.endswith(('_Breakfast', '_Dinner', '_Lunch', '_Normal', '_High', '_Low', '_Boost', '_Neutral', '_Decrease'))]
    
    # Ensure all features exist in both datasets
    available_features = [col for col in feature_columns if col in train_features.columns and col in test_features.columns]
    
    print(f"✅ Feature Engineering Complete:")
    print(f"  Total features available: {len(available_features)}")
    print(f"  Training samples: {len(train_features)}")
    print(f"  Test samples: {len(test_features)}")
    
    return train_features[available_features + ['CheckTotal', 'Date', 'RevenueCenterID', 'MealPeriod']], \
           test_features[available_features + ['CheckTotal', 'Date', 'RevenueCenterID', 'MealPeriod']], \
           available_features

# Execute feature engineering
train_engineered, test_engineered, feature_list = create_robust_features(train_df, test_df)

STEP 3: ROBUST FEATURE ENGINEERING
🔧 Engineering features for training data...
🔧 Applying same transformations to test data...
🏷️ Creating one-hot encodings...
✅ Feature Engineering Complete:
  Total features available: 30
  Training samples: 10665
  Test samples: 2457


In [5]:
# ============================================================================
# STEP 4: PROPER SEQUENCE CREATION FOR TIME SERIES
# ============================================================================

def create_sequences_proper(train_df, test_df, feature_cols, 
                           lookback_days=21, forecast_days=7, 
                           revenue_center_id=1):
    """
    Create sequences for CNN-LSTM with proper temporal structure
    Focus on single revenue center first for simplicity
    """
    print("="*80)
    print("STEP 4: CREATING PROPER TIME SERIES SEQUENCES")
    print("="*80)
    
    def prepare_center_data(df, center_id):
        """Prepare data for a specific revenue center"""
        center_data = df[df['RevenueCenterID'] == center_id].copy()
        
        # Pivot to daily format (3 meals per day)
        daily_data = center_data.pivot_table(
            index='Date',
            columns='MealPeriod',
            values=['CheckTotal'] + feature_cols,
            fill_value=0
        )
        
        # Flatten column names
        daily_data.columns = [f"{col[1]}_{col[0]}" for col in daily_data.columns]
        daily_data = daily_data.reset_index()
        
        # Sort by date
        daily_data = daily_data.sort_values('Date').reset_index(drop=True)
        
        return daily_data
    
    # Prepare data for specified revenue center
    print(f"📊 Preparing sequences for Revenue Center {revenue_center_id}")
    
    train_daily = prepare_center_data(train_df, revenue_center_id)
    test_daily = prepare_center_data(test_df, revenue_center_id)
    
    print(f"  Training days: {len(train_daily)}")
    print(f"  Test days: {len(test_daily)}")
    
    # Separate features and targets
    feature_cols_daily = [col for col in train_daily.columns if col != 'Date' and not col.startswith(('Breakfast_CheckTotal', 'Dinner_CheckTotal', 'Lunch_CheckTotal'))]
    target_cols = ['Breakfast_CheckTotal', 'Dinner_CheckTotal', 'Lunch_CheckTotal']
    
    print(f"  Features per day: {len(feature_cols_daily)}")
    print(f"  Target streams: {len(target_cols)}")
    
    # Create sequences
    def create_sequences_from_daily(daily_data, features_cols, target_cols, lookback, forecast):
        """Create sequences from daily data"""
        features = daily_data[features_cols].values
        targets = daily_data[target_cols].values
        
        X, y = [], []
        
        for i in range(lookback, len(daily_data) - forecast + 1):
            # Features: past 'lookback' days
            X.append(features[i-lookback:i])
            
            # Targets: next 'forecast' days
            y.append(targets[i:i+forecast])
        
        return np.array(X), np.array(y)
    
    # Create training sequences
    X_train, y_train = create_sequences_from_daily(
        train_daily, feature_cols_daily, target_cols, lookback_days, forecast_days
    )
    
    # Create test sequences
    X_test, y_test = create_sequences_from_daily(
        test_daily, feature_cols_daily, target_cols, lookback_days, forecast_days
    )
    
    print(f"\n✅ Sequence Creation Complete:")
    print(f"  X_train shape: {X_train.shape}")  # (sequences, lookback_days, features)
    print(f"  y_train shape: {y_train.shape}")  # (sequences, forecast_days, revenue_streams)
    print(f"  X_test shape: {X_test.shape}")
    print(f"  y_test shape: {y_test.shape}")
    
    # Calculate parameters per sample ratio
    total_features = X_train.shape[1] * X_train.shape[2]
    samples = X_train.shape[0]
    
    print(f"\n📊 Model Complexity Analysis:")
    print(f"  Training sequences: {samples}")
    print(f"  Input dimensions: {X_train.shape[1]} days × {X_train.shape[2]} features = {total_features}")
    print(f"  Samples per feature: {samples / X_train.shape[2]:.1f}")
    
    # Store original targets for evaluation
    y_train_original = y_train.copy()
    y_test_original = y_test.copy()
    
    return X_train, X_test, y_train, y_test, y_train_original, y_test_original, feature_cols_daily

# Execute sequence creation
X_train, X_test, y_train, y_test, y_train_orig, y_test_orig, features_used = create_sequences_proper(
    train_engineered, test_engineered, feature_list, 
    lookback_days=21, forecast_days=7, revenue_center_id=1
)

STEP 4: CREATING PROPER TIME SERIES SEQUENCES
📊 Preparing sequences for Revenue Center 1
  Training days: 395
  Test days: 91
  Features per day: 90
  Target streams: 3

✅ Sequence Creation Complete:
  X_train shape: (368, 21, 90)
  y_train shape: (368, 7, 3)
  X_test shape: (64, 21, 90)
  y_test shape: (64, 7, 3)

📊 Model Complexity Analysis:
  Training sequences: 368
  Input dimensions: 21 days × 90 features = 1890
  Samples per feature: 4.1


In [6]:
# ============================================================================
# STEP 5: CONSERVATIVE NORMALIZATION (NO OUTLIER MANIPULATION)
# ============================================================================

from sklearn.preprocessing import StandardScaler, RobustScaler
import joblib

def normalize_data_conservatively(X_train, X_test, y_train, y_test):
    """
    Normalize data conservatively without outlier manipulation
    Use robust scaling to handle natural revenue variations
    """
    print("="*80)
    print("STEP 5: CONSERVATIVE NORMALIZATION")
    print("="*80)
    
    # 1. FEATURE NORMALIZATION using RobustScaler (less sensitive to outliers)
    print("🔧 Normalizing features using RobustScaler...")
    
    # Reshape features for scaling
    X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
    X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])
    
    # Fit scaler on training data only
    feature_scaler = RobustScaler()
    X_train_scaled = feature_scaler.fit_transform(X_train_reshaped)
    X_test_scaled = feature_scaler.transform(X_test_reshaped)
    
    # Reshape back to original format
    X_train_scaled = X_train_scaled.reshape(X_train.shape)
    X_test_scaled = X_test_scaled.reshape(X_test.shape)
    
    print(f"  ✅ Features normalized using training data statistics")
    
    # 2. TARGET NORMALIZATION (conservative approach)
    print("🎯 Normalizing targets conservatively...")
    
    # Analyze target distribution first
    y_train_flat = y_train.reshape(-1, y_train.shape[-1])
    y_test_flat = y_test.reshape(-1, y_test.shape[-1])
    
    print(f"  Original target ranges (training):")
    revenue_streams = ['Breakfast', 'Dinner', 'Lunch']
    for i, stream in enumerate(revenue_streams):
        stream_data = y_train_flat[:, i]
        print(f"    {stream}: ${stream_data.min():.2f} - ${stream_data.max():.2f} (mean: ${stream_data.mean():.2f})")
    
    # Use log transformation for revenue (handles wide ranges naturally)
    print(f"  📊 Applying log1p transformation to handle revenue ranges...")
    
    # Log transform (handles zeros and wide ranges)
    y_train_log = np.log1p(y_train)  # log1p handles zeros naturally
    y_test_log = np.log1p(y_test)
    
    # Then apply standard scaling to log-transformed data
    y_train_log_reshaped = y_train_log.reshape(-1, y_train_log.shape[-1])
    y_test_log_reshaped = y_test_log.reshape(-1, y_test_log.shape[-1])
    
    target_scaler = StandardScaler()
    y_train_normalized = target_scaler.fit_transform(y_train_log_reshaped)
    y_test_normalized = target_scaler.transform(y_test_log_reshaped)
    
    # Reshape back
    y_train_normalized = y_train_normalized.reshape(y_train.shape)
    y_test_normalized = y_test_normalized.reshape(y_test.shape)
    
    print(f"  ✅ Targets normalized using log1p + StandardScaler")
    print(f"  Normalized target ranges:")
    for i, stream in enumerate(revenue_streams):
        stream_data = y_train_normalized[:, :, i].flatten()
        print(f"    {stream}: {stream_data.min():.3f} - {stream_data.max():.3f}")
    
    # Save scalers for later denormalization
    joblib.dump(feature_scaler, 'feature_scaler_robust.pkl')
    joblib.dump(target_scaler, 'target_scaler_conservative.pkl')
    
    print(f"  💾 Scalers saved for denormalization")
    
    return X_train_scaled, X_test_scaled, y_train_normalized, y_test_normalized

def denormalize_predictions_conservative(predictions_normalized, original_targets_for_reference):
    """
    Denormalize predictions using the conservative approach
    """
    # Load scalers
    target_scaler = joblib.load('target_scaler_conservative.pkl')
    
    # Reshape for denormalization
    pred_reshaped = predictions_normalized.reshape(-1, predictions_normalized.shape[-1])
    
    # Inverse standard scaling
    pred_log = target_scaler.inverse_transform(pred_reshaped)
    
    # Inverse log transformation
    pred_actual = np.expm1(pred_log)  # expm1 is inverse of log1p
    
    # Reshape back
    pred_actual = pred_actual.reshape(predictions_normalized.shape)
    
    # Ensure no negative predictions (business constraint)
    pred_actual = np.maximum(pred_actual, 1.0)
    
    return pred_actual

# Execute normalization
X_train_norm, X_test_norm, y_train_norm, y_test_norm = normalize_data_conservatively(
    X_train, X_test, y_train, y_test
)

STEP 5: CONSERVATIVE NORMALIZATION
🔧 Normalizing features using RobustScaler...
  ✅ Features normalized using training data statistics
🎯 Normalizing targets conservatively...
  Original target ranges (training):
    Breakfast: $0.00 - $8210.80 (mean: $808.29)
    Dinner: $365.50 - $10052.50 (mean: $2519.56)
    Lunch: $0.00 - $4504.00 (mean: $664.09)
  📊 Applying log1p transformation to handle revenue ranges...
  ✅ Targets normalized using log1p + StandardScaler
  Normalized target ranges:
    Breakfast: -6.695 - 2.789
    Dinner: -3.674 - 3.057
    Lunch: -5.813 - 2.122
  💾 Scalers saved for denormalization


In [7]:
# ============================================================================
# STEP 6: APPROPRIATELY SIZED CNN-LSTM MODEL
# ============================================================================

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, BatchNormalization, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

def build_appropriately_sized_model(input_shape, output_shape, complexity='minimal'):
    """
    Build CNN-LSTM model with appropriate complexity for available data
    """
    print("="*80)
    print("STEP 6: BUILDING APPROPRIATELY SIZED MODEL")
    print("="*80)
    
    print(f"📐 Model specifications:")
    print(f"  Input shape: {input_shape}")
    print(f"  Output shape: {output_shape}")
    print(f"  Complexity level: {complexity}")
    
    # Calculate recommended model size based on data
    samples = X_train_norm.shape[0]
    max_params = samples // 10  # Conservative: 10 samples per parameter
    
    print(f"  Training samples: {samples}")
    print(f"  Recommended max parameters: {max_params:,}")
    
    if complexity == 'minimal':
        # Minimal model for limited data
        model = Sequential([
            # Single CNN layer for local pattern detection
            Conv1D(filters=16, kernel_size=3, activation='relu', input_shape=input_shape),
            BatchNormalization(),
            Dropout(0.2),
            
            # Single LSTM layer for temporal dependencies
            LSTM(32, return_sequences=False),
            BatchNormalization(),
            Dropout(0.3),
            
            # Dense layers for prediction
            Dense(16, activation='relu'),
            Dropout(0.2),
            Dense(np.prod(output_shape), activation='linear'),
            Reshape(output_shape)
        ])
        
    elif complexity == 'moderate':
        # Moderate model if we have more data
        model = Sequential([
            # Two CNN layers
            Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape),
            BatchNormalization(),
            Conv1D(filters=16, kernel_size=3, activation='relu'),
            Dropout(0.2),
            
            # LSTM layer
            LSTM(64, return_sequences=False),
            BatchNormalization(),
            Dropout(0.3),
            
            # Dense layers
            Dense(32, activation='relu'),
            Dropout(0.2),
            Dense(np.prod(output_shape), activation='linear'),
            Reshape(output_shape)
        ])
    
    # Count actual parameters
    model.compile(optimizer='adam', loss='mse')  # Temporary compilation to count params
    actual_params = model.count_params()
    
    print(f"\n📊 Model Architecture:")
    print(f"  Actual parameters: {actual_params:,}")
    print(f"  Parameters per sample: {actual_params / samples:.1f}")
    print(f"  Within recommended limit: {'✅' if actual_params <= max_params else '❌'}")
    
    if actual_params > max_params:
        print(f"  ⚠️ WARNING: Model may overfit with current data size")
    
    return model

def compile_model_conservatively(model):
    """
    Compile model with conservative settings
    """
    print("\n🔧 Compiling model with conservative settings...")
    
    # Conservative optimizer settings
    optimizer = Adam(
        learning_rate=0.001,  # Standard learning rate
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7
    )
    
    # Use Huber loss (robust to outliers)
    model.compile(
        optimizer=optimizer,
        loss='huber',  # More robust than MSE
        metrics=['mae']
    )
    
    print("  ✅ Model compiled with Huber loss and conservative Adam optimizer")
    
    return model

# Build and compile model
input_shape = (X_train_norm.shape[1], X_train_norm.shape[2])
output_shape = (y_train_norm.shape[1], y_train_norm.shape[2])

model = build_appropriately_sized_model(input_shape, output_shape, complexity='minimal')
model = compile_model_conservatively(model)

# Display model summary
print("\n📋 Model Summary:")
model.summary()

STEP 6: BUILDING APPROPRIATELY SIZED MODEL
📐 Model specifications:
  Input shape: (21, 90)
  Output shape: (7, 3)
  Complexity level: minimal
  Training samples: 368
  Recommended max parameters: 36

📊 Model Architecture:
  Actual parameters: 11,685
  Parameters per sample: 31.8
  Within recommended limit: ❌

🔧 Compiling model with conservative settings...
  ✅ Model compiled with Huber loss and conservative Adam optimizer

📋 Model Summary:


In [8]:
# ============================================================================
# STEP 7: ROBUST TRAINING WITH PROPER VALIDATION
# ============================================================================

def setup_robust_training():
    """
    Setup training with proper validation and conservative callbacks
    """
    print("="*80)
    print("STEP 7: ROBUST TRAINING SETUP")
    print("="*80)
    
    # Conservative callbacks
    callbacks = [
        EarlyStopping(
            monitor='val_loss',
            patience=20,  # Generous patience for small dataset
            restore_best_weights=True,
            verbose=1,
            min_delta=0.001
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=10,
            min_lr=1e-6,
            verbose=1
        ),
        ModelCheckpoint(
            'best_conservative_model.h5',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
    ]
    
    # Conservative training parameters
    training_config = {
        'batch_size': 8,  # Small batch size for limited data
        'epochs': 100,    # Reasonable max with early stopping
        'validation_split': 0.2,  # Use part of training for validation
        'shuffle': True,
        'verbose': 1
    }
    
    print(f"📋 Training Configuration:")
    for key, value in training_config.items():
        print(f"  {key}: {value}")
    
    print(f"\n🎯 Callbacks configured:")
    print(f"  - Early stopping (patience: 20)")
    print(f"  - Learning rate reduction (patience: 10)")
    print(f"  - Model checkpointing")
    
    return callbacks, training_config

def train_model_robustly(model, X_train, y_train, callbacks, config):
    """
    Train model with robust methodology
    """
    print("\n🚀 Starting robust training...")
    
    # Train the model
    history = model.fit(
        X_train, y_train,
        batch_size=config['batch_size'],
        epochs=config['epochs'],
        validation_split=config['validation_split'],
        callbacks=callbacks,
        shuffle=config['shuffle'],
        verbose=config['verbose']
    )
    
    print("\n✅ Training completed!")
    
    # Load best model
    model.load_weights('best_conservative_model.h5')
    print("✅ Best model weights loaded")
    
    # Training summary
    final_epoch = len(history.history['loss'])
    best_val_loss = min(history.history['val_loss'])
    final_train_loss = history.history['loss'][-1]
    
    print(f"\n📈 Training Summary:")
    print(f"  Epochs completed: {final_epoch}")
    print(f"  Best validation loss: {best_val_loss:.4f}")
    print(f"  Final training loss: {final_train_loss:.4f}")
    print(f"  Overfitting check: {abs(final_train_loss - best_val_loss):.4f}")
    
    return history

# Execute training
callbacks, training_config = setup_robust_training()
history = train_model_robustly(model, X_train_norm, y_train_norm, callbacks, training_config)

STEP 7: ROBUST TRAINING SETUP
📋 Training Configuration:
  batch_size: 8
  epochs: 100
  validation_split: 0.2
  shuffle: True
  verbose: 1

🎯 Callbacks configured:
  - Early stopping (patience: 20)
  - Learning rate reduction (patience: 10)
  - Model checkpointing

🚀 Starting robust training...
Epoch 1/100
[1m30/37[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 7ms/step - loss: 0.5514 - mae: 0.9382
Epoch 1: val_loss improved from inf to 0.53404, saving model to best_conservative_model.h5




[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - loss: 0.5439 - mae: 0.9292 - val_loss: 0.5340 - val_mae: 0.9181 - learning_rate: 0.0010
Epoch 2/100
[1m34/37[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - loss: 0.4397 - mae: 0.8052
Epoch 2: val_loss improved from 0.53404 to 0.52661, saving model to best_conservative_model.h5




[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.4365 - mae: 0.8014 - val_loss: 0.5266 - val_mae: 0.9093 - learning_rate: 0.0010
Epoch 3/100
[1m32/37[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 5ms/step - loss: 0.3901 - mae: 0.7461
Epoch 3: val_loss did not improve from 0.52661
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3870 - mae: 0.7427 - val_loss: 0.5333 - val_mae: 0.9164 - learning_rate: 0.0010
Epoch 4/100
[1m36/37[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 0.3497 - mae: 0.6992
Epoch 4: val_loss did not improve from 0.52661
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3499 - mae: 0.6994 - val_loss: 0.5271 - val_mae: 0.9089 - learning_rate: 0.0010
Epoch 5/100
[1m31/37[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 5ms/step - loss: 0.3505 - mae: 0.6940
Epoch 5: val_loss improved from 0.52661 to 0.52647, saving model to best_co



[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3479 - mae: 0.6915 - val_loss: 0.5265 - val_mae: 0.9081 - learning_rate: 0.0010
Epoch 6/100
[1m35/37[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - loss: 0.3500 - mae: 0.6943
Epoch 6: val_loss did not improve from 0.52647
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3485 - mae: 0.6926 - val_loss: 0.5272 - val_mae: 0.9088 - learning_rate: 0.0010
Epoch 7/100
[1m30/37[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 6ms/step - loss: 0.3299 - mae: 0.6729
Epoch 7: val_loss did not improve from 0.52647
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3290 - mae: 0.6718 - val_loss: 0.5274 - val_mae: 0.9089 - learning_rate: 0.0010
Epoch 8/100
[1m29/37[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 6ms/step - loss: 0.3216 - mae: 0.6545
Epoch 8: val_loss improved from 0.52647 to 0.51959, saving model to best_co



[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3217 - mae: 0.6557 - val_loss: 0.5196 - val_mae: 0.8995 - learning_rate: 0.0010
Epoch 9/100
[1m35/37[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - loss: 0.3157 - mae: 0.6569
Epoch 9: val_loss improved from 0.51959 to 0.51163, saving model to best_conservative_model.h5




[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3158 - mae: 0.6569 - val_loss: 0.5116 - val_mae: 0.8902 - learning_rate: 0.0010
Epoch 10/100
[1m35/37[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - loss: 0.3032 - mae: 0.6415
Epoch 10: val_loss improved from 0.51163 to 0.50823, saving model to best_conservative_model.h5




[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.3038 - mae: 0.6419 - val_loss: 0.5082 - val_mae: 0.8861 - learning_rate: 0.0010
Epoch 11/100
[1m34/37[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 5ms/step - loss: 0.3082 - mae: 0.6460
Epoch 11: val_loss did not improve from 0.50823
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3079 - mae: 0.6453 - val_loss: 0.5102 - val_mae: 0.8886 - learning_rate: 0.0010
Epoch 12/100
[1m31/37[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 5ms/step - loss: 0.3042 - mae: 0.6385
Epoch 12: val_loss did not improve from 0.50823
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.3030 - mae: 0.6371 - val_loss: 0.5133 - val_mae: 0.8923 - learning_rate: 0.0010
Epoch 13/100
[1m30/37[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 5ms/step - loss: 0.2918 - mae: 0.6229
Epoch 13: val_loss did not improve from 0.50823
[1m37/37[0m [32m━━━

In [12]:
# ============================================================================
# CORRECTED EVALUATION CODE - ADAPTS TO YOUR CURRENT SETUP
# ============================================================================

import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

def calculate_robust_mape(y_true, y_pred, min_threshold=50):
    """
    Calculate MAPE that handles small values properly
    """
    # Only calculate MAPE for values above threshold
    mask = y_true >= min_threshold
    
    if np.sum(mask) == 0:
        return np.nan
    
    true_filtered = y_true[mask]
    pred_filtered = y_pred[mask]
    
    # Calculate MAPE with proper handling
    mape_values = np.abs((true_filtered - pred_filtered) / true_filtered) * 100
    
    # Cap extreme values at 500% to prevent outliers from dominating
    mape_values = np.minimum(mape_values, 500)
    
    # Weight by revenue size (larger revenues get more weight)
    weights = true_filtered / np.sum(true_filtered)
    weighted_mape = np.sum(mape_values * weights)
    
    return weighted_mape

def calculate_smape(y_true, y_pred):
    """
    Symmetric Mean Absolute Percentage Error - more robust than MAPE
    """
    numerator = np.abs(y_true - y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    
    # Avoid division by zero
    mask = denominator > 1e-8
    smape_values = np.zeros_like(numerator)
    smape_values[mask] = (numerator[mask] / denominator[mask]) * 100
    
    return np.mean(smape_values)

def denormalize_predictions_safe(y_pred_norm, scaler_or_stats):
    """
    Safely denormalize predictions using either a scaler object or manual stats
    """
    if hasattr(scaler_or_stats, 'inverse_transform'):
        # It's a scaler object
        original_shape = y_pred_norm.shape
        y_pred_flat = y_pred_norm.reshape(-1, original_shape[-1])
        y_pred_denorm_flat = scaler_or_stats.inverse_transform(y_pred_flat)
        y_pred_denorm = y_pred_denorm_flat.reshape(original_shape)
    elif isinstance(scaler_or_stats, dict):
        # It's manual stats (mean, std)
        y_pred_denorm = y_pred_norm * scaler_or_stats['std'] + scaler_or_stats['mean']
    else:
        # No scaling applied
        y_pred_denorm = y_pred_norm
    
    return y_pred_denorm

def evaluate_model_auto_detect(model, X_test, y_test_original, 
                              scaler_or_stats=None,
                              stream_names=['Breakfast', 'Dinner', 'Lunch']):
    """
    Auto-detecting evaluation that works with your current setup
    """
    print("="*80)
    print("CORRECTED MODEL EVALUATION (AUTO-DETECTING SETUP)")
    print("="*80)
    
    # Auto-detect variable types and shapes
    print("🔍 Auto-detecting your setup...")
    print(f"  Model type: {type(model)}")
    print(f"  X_test shape: {X_test.shape}")
    print(f"  y_test_original shape: {y_test_original.shape}")
    
    # Generate predictions
    print("🔮 Generating predictions...")
    y_pred_normalized = model.predict(X_test, verbose=0)
    print(f"  Raw prediction shape: {y_pred_normalized.shape}")
    print(f"  Raw prediction range: {y_pred_normalized.min():.4f} - {y_pred_normalized.max():.4f}")
    
    # Denormalize predictions
    print("🔄 Denormalizing predictions...")
    if scaler_or_stats is not None:
        y_pred_denorm = denormalize_predictions_safe(y_pred_normalized, scaler_or_stats)
        print(f"  Using provided scaler/stats for denormalization")
    else:
        # Try to auto-detect if denormalization is needed
        pred_mean = np.mean(y_pred_normalized)
        actual_mean = np.mean(y_test_original)
        
        if abs(pred_mean - actual_mean) > actual_mean * 0.5:  # Predictions are in different scale
            print(f"  ⚠️  Predictions seem to be in different scale than actuals")
            print(f"  Prediction mean: {pred_mean:.2f}, Actual mean: {actual_mean:.2f}")
            print(f"  Assuming predictions are normalized, attempting auto-denormalization...")
            
            # Simple denormalization using actual data stats
            actual_mean = np.mean(y_test_original)
            actual_std = np.std(y_test_original)
            y_pred_denorm = y_pred_normalized * actual_std + actual_mean
        else:
            y_pred_denorm = y_pred_normalized
            print(f"  Predictions appear to be in same scale as actuals")
    
    # Ensure no negative predictions (revenue can't be negative)
    y_pred_denorm = np.maximum(y_pred_denorm, 1.0)
    
    print(f"  ✅ Predictions denormalized")
    print(f"  Final prediction range: ${y_pred_denorm.min():.2f} - ${y_pred_denorm.max():.2f}")
    
    # Data quality checks
    print(f"\n🔍 Data Quality Checks:")
    print(f"  True values range: ${y_test_original.min():.2f} - ${y_test_original.max():.2f}")
    print(f"  Any NaN in true values: {np.isnan(y_test_original).any()}")
    print(f"  Any NaN in predictions: {np.isnan(y_pred_denorm).any()}")
    print(f"  Any negative true values: {(y_test_original < 0).any()}")
    print(f"  Any negative predictions: {(y_pred_denorm < 0).any()}")
    
    # =====================================
    # CORRECTED METRICS CALCULATION
    # =====================================
    
    print("\n📊 CORRECTED EVALUATION METRICS:")
    
    # Flatten for overall metrics
    y_true_flat = y_test_original.reshape(-1)
    y_pred_flat = y_pred_denorm.reshape(-1)
    
    # Remove any remaining NaN/inf values
    mask = np.isfinite(y_true_flat) & np.isfinite(y_pred_flat)
    y_true_clean = y_true_flat[mask]
    y_pred_clean = y_pred_flat[mask]
    
    print(f"  Clean samples: {len(y_true_clean)} / {len(y_true_flat)}")
    
    # Overall Performance
    overall_mae = np.mean(np.abs(y_true_clean - y_pred_clean))
    overall_rmse = np.sqrt(np.mean((y_true_clean - y_pred_clean) ** 2))
    overall_corr = np.corrcoef(y_true_clean, y_pred_clean)[0, 1]
    
    # CORRECTED MAPE - Only for meaningful values
    overall_mape_robust = calculate_robust_mape(y_true_clean, y_pred_clean, min_threshold=100)
    overall_smape = calculate_smape(y_true_clean, y_pred_clean)
    
    # Revenue accuracy (business-meaningful)
    total_true = np.sum(y_true_clean)
    total_pred = np.sum(y_pred_clean)
    revenue_accuracy = 100 * (1 - abs(total_true - total_pred) / total_true)
    
    print(f"\n🎯 Overall Performance:")
    print(f"  MAE: ${overall_mae:.2f}")
    if not np.isnan(overall_mape_robust):
        print(f"  Robust MAPE: {overall_mape_robust:.1f}% (weighted, capped, >$100 only)")
    else:
        print(f"  Robust MAPE: N/A (insufficient high-value samples)")
    print(f"  SMAPE: {overall_smape:.1f}%")
    print(f"  Correlation: {overall_corr:.3f}")
    print(f"  Revenue Accuracy: {revenue_accuracy:.1f}%")
    
    # Per-stream evaluation with corrected metrics
    print(f"\n🍽️ Per-Stream Performance:")
    stream_results = {}
    
    for i, stream in enumerate(stream_names):
        if y_test_original.ndim == 3:
            stream_true = y_test_original[:, :, i].flatten()
            stream_pred = y_pred_denorm[:, :, i].flatten()
        else:
            # Handle 2D case
            stream_true = y_test_original[:, i].flatten()
            stream_pred = y_pred_denorm[:, i].flatten()
        
        # Clean data
        mask = np.isfinite(stream_true) & np.isfinite(stream_pred)
        stream_true_clean = stream_true[mask]
        stream_pred_clean = stream_pred[mask]
        
        # Calculate corrected metrics
        stream_mae = np.mean(np.abs(stream_true_clean - stream_pred_clean))
        stream_mape_robust = calculate_robust_mape(stream_true_clean, stream_pred_clean, min_threshold=50)
        stream_smape = calculate_smape(stream_true_clean, stream_pred_clean)
        stream_corr = np.corrcoef(stream_true_clean, stream_pred_clean)[0, 1]
        
        # Revenue accuracy for this stream
        stream_total_true = np.sum(stream_true_clean)
        stream_total_pred = np.sum(stream_pred_clean)
        stream_revenue_acc = 100 * (1 - abs(stream_total_true - stream_total_pred) / stream_total_true)
        
        stream_results[stream] = {
            'MAE': stream_mae,
            'MAPE_Robust': stream_mape_robust,
            'SMAPE': stream_smape,
            'Correlation': stream_corr,
            'Revenue_Accuracy': stream_revenue_acc,
            'Sample_Count': len(stream_true_clean),
            'Value_Range': f"${stream_true_clean.min():.0f}-${stream_true_clean.max():.0f}"
        }
        
        print(f"  {stream}:")
        print(f"    MAE: ${stream_mae:.2f}")
        if not np.isnan(stream_mape_robust):
            print(f"    Robust MAPE: {stream_mape_robust:.1f}%")
        else:
            print(f"    Robust MAPE: N/A (low values)")
        print(f"    SMAPE: {stream_smape:.1f}%")
        print(f"    Correlation: {stream_corr:.3f}")
        print(f"    Revenue Accuracy: {stream_revenue_acc:.1f}%")
    
    # Business Metrics
    print(f"\n💼 Business Metrics:")
    print(f"  Total revenue error: {100 - revenue_accuracy:.1f}%")
    print(f"  True total: ${total_true:,.2f}")
    print(f"  Predicted total: ${total_pred:,.2f}")
    
    # Prediction Accuracy Distribution (with capped errors)
    percentage_errors = np.abs((y_true_clean - y_pred_clean) / (y_true_clean + 1e-8)) * 100
    percentage_errors_capped = np.minimum(percentage_errors, 200)  # Cap at 200%
    
    print(f"\n📈 Prediction Accuracy Distribution:")
    print(f"  Within 10% error: {(percentage_errors_capped <= 10).mean() * 100:.1f}%")
    print(f"  Within 20% error: {(percentage_errors_capped <= 20).mean() * 100:.1f}%")
    print(f"  Within 30% error: {(percentage_errors_capped <= 30).mean() * 100:.1f}%")
    print(f"  Within 50% error: {(percentage_errors_capped <= 50).mean() * 100:.1f}%")
    
    # Model Assessment
    print(f"\n🎯 Model Reliability Assessment:")
    if overall_smape < 30:
        assessment = "✅ EXCELLENT (< 30% SMAPE)"
    elif overall_smape < 50:
        assessment = "✅ GOOD (< 50% SMAPE)"
    elif overall_smape < 70:
        assessment = "⚠️ MODERATE (< 70% SMAPE)"
    else:
        assessment = "❌ NEEDS IMPROVEMENT (> 70% SMAPE)"
    
    print(f"  {assessment}")
    
    return {
        'overall_mae': overall_mae,
        'overall_mape_robust': overall_mape_robust,
        'overall_smape': overall_smape,
        'overall_correlation': overall_corr,
        'revenue_accuracy': revenue_accuracy,
        'stream_results': stream_results,
        'predictions': y_pred_denorm,
        'actuals': y_test_original,
        'assessment': assessment
    }

# ============================================================================
# USAGE - REPLACE YOUR CURRENT EVALUATION CALL
# ============================================================================

# Check what variables you have available in your notebook
print("Available variables in your namespace:")
available_vars = [var for var in dir() if not var.startswith('_')]
model_vars = [var for var in available_vars if 'model' in var.lower()]
test_vars = [var for var in available_vars if 'test' in var.lower()]
scaler_vars = [var for var in available_vars if 'scaler' in var.lower()]

print(f"Model variables: {model_vars}")
print(f"Test variables: {test_vars}")
print(f"Scaler variables: {scaler_vars}")

# Try to auto-detect your variables and run evaluation
try:
    # Common variable name patterns
    possible_models = ['model', 'best_model', 'trained_model', 'cnn_lstm_model']
    possible_x_test = ['X_test', 'X_test_norm', 'X_test_normalized', 'test_X']
    possible_y_test = ['y_test_orig', 'y_test_original', 'y_test_actual', 'test_y']
    possible_scalers = ['target_scaler', 'y_scaler', 'scaler', 'output_scaler']
    
    # Find available variables
    model_var = None
    x_test_var = None
    y_test_var = None
    scaler_var = None
    
    for var in possible_models:
        if var in globals():
            model_var = globals()[var]
            print(f"✓ Found model: {var}")
            break
    
    for var in possible_x_test:
        if var in globals():
            x_test_var = globals()[var]
            print(f"✓ Found X_test: {var}")
            break
    
    for var in possible_y_test:
        if var in globals():
            y_test_var = globals()[var]
            print(f"✓ Found y_test: {var}")
            break
    
    for var in possible_scalers:
        if var in globals():
            scaler_var = globals()[var]
            print(f"✓ Found scaler: {var}")
            break
    
    # Run evaluation with detected variables
    if model_var is not None and x_test_var is not None and y_test_var is not None:
        print("\n🚀 Running auto-detected evaluation...")
        evaluation_results = evaluate_model_auto_detect(
            model=model_var,
            X_test=x_test_var,
            y_test_original=y_test_var,
            scaler_or_stats=scaler_var,  # Can be None
            stream_names=['Breakfast', 'Dinner', 'Lunch']
        )
    else:
        print("\n❌ Could not auto-detect all required variables")
        print("Please run manually with your variable names:")
        print("""
        evaluation_results = evaluate_model_auto_detect(
            model=YOUR_MODEL_VARIABLE,
            X_test=YOUR_X_TEST_VARIABLE,
            y_test_original=YOUR_Y_TEST_VARIABLE,
            scaler_or_stats=YOUR_SCALER_OR_None,
            stream_names=['Breakfast', 'Dinner', 'Lunch']
        )
        """)
        
except Exception as e:
    print(f"Auto-detection failed: {e}")
    print("Please run manually with your specific variable names")

Available variables in your namespace:
Model variables: ['ModelCheckpoint', 'build_appropriately_sized_model', 'compile_model_conservatively', 'evaluate_model_auto_detect', 'evaluate_model_corrected', 'evaluate_model_honestly', 'model', 'train_model_robustly']
Test variables: ['X_test', 'X_test_norm', 'test_df', 'test_engineered', 'y_test', 'y_test_norm', 'y_test_orig']
Scaler variables: ['RobustScaler', 'StandardScaler']
✓ Found model: model
✓ Found X_test: X_test
✓ Found y_test: y_test_orig

🚀 Running auto-detected evaluation...
CORRECTED MODEL EVALUATION (AUTO-DETECTING SETUP)
🔍 Auto-detecting your setup...
  Model type: <class 'keras.src.models.sequential.Sequential'>
  X_test shape: (64, 21, 90)
  y_test_original shape: (64, 7, 3)
🔮 Generating predictions...
  Raw prediction shape: (64, 7, 3)
  Raw prediction range: -0.6010 - 0.8989
🔄 Denormalizing predictions...
  ⚠️  Predictions seem to be in different scale than actuals
  Prediction mean: 0.23, Actual mean: 1701.30
  Assuming p