In [31]:
# Cell 1: Import required libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, MaxPooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("✓ All libraries imported successfully")
print(f"✓ TensorFlow version: {tf.__version__}")

✓ All libraries imported successfully
✓ TensorFlow version: 2.19.0


In [32]:
# Cell 2: Load the transformed datasets
print("="*50)
print("LOADING CNN-LSTM READY DATASET")
print("="*50)

# Load the datasets
df_transformed = pd.read_csv('cnn_lstm_ready_dataset.csv')
target_data = pd.read_csv('target_data_for_sequences.csv')

print(f"✓ Transformed data shape: {df_transformed.shape}")
print(f"✓ Target data shape: {target_data.shape}")
print(f"✓ Transformed data columns: {len(df_transformed.columns)} features")
print(f"✓ Target data columns: {len(target_data.columns)} target variables")

# Display first few rows
print(f"\nFirst 3 rows of transformed data:")
print(df_transformed.head(3))

LOADING CNN-LSTM READY DATASET
✓ Transformed data shape: (1458, 65)
✓ Target data shape: (1458, 4)
✓ Transformed data columns: 65 features
✓ Target data columns: 4 target variables

First 3 rows of transformed data:
       Year  CheckTotal   is_zero  IsRamadan     IsEid  IsPreRamadan  \
0 -0.575766    0.015624 -0.083103  -0.645685 -0.184506     -0.307562   
1 -0.575766    2.072745 -0.083103  -0.645685 -0.184506     -0.307562   
2 -0.575766   -0.155666 -0.083103  -0.645685 -0.184506     -0.307562   

   IsPostRamadan  IsLast10Ramadan     IsDSF  IsSummerEvent  ...  \
0      -0.307562        -0.207168 -0.261021      -0.389434  ...   
1      -0.307562        -0.207168 -0.261021      -0.389434  ...   
2      -0.307562        -0.207168 -0.261021      -0.389434  ...   

   Event_Ramadan-First10Days  Event_Ramadan-Last10Days  Event_Ramadan-Middle  \
0                  -0.207168                 -0.207168             -0.201706   
1                  -0.207168                 -0.207168            

In [33]:
def create_sequences_for_cnn_lstm(df_transformed, target_data, sequence_length=30, forecast_horizon=7):
    """
    Create sequences for CNN-LSTM training from loaded CSV files
    """
    print("="*50)
    print("CREATING SEQUENCES FOR CNN-LSTM")
    print("="*50)
    
    # Parameters
    SEQ_LENGTH = sequence_length  # Look back 30 days
    FORECAST_HORIZON = forecast_horizon  # Predict next 7 days
    
    # Sort by date to ensure proper sequence order
    df_transformed_sorted = df_transformed.sort_values('Date').reset_index(drop=True)
    target_data_sorted = target_data.sort_values('Date').reset_index(drop=True)
    
    # Pivot target data to wide format
    target_pivot = target_data_sorted.pivot_table(
        index='Date', 
        columns=['RevenueCenterName', 'MealPeriod'], 
        values='CheckTotal', 
        fill_value=0
    ).reset_index()
    
    # Create column names for revenue streams
    target_pivot.columns = ['Date'] + [f"{col[0]}_{col[1]}" for col in target_pivot.columns[1:]]
    
    # Ensure same date range
    common_dates = set(df_transformed_sorted['Date']).intersection(set(target_pivot['Date']))
    df_transformed_sorted = df_transformed_sorted[df_transformed_sorted['Date'].isin(common_dates)].reset_index(drop=True)
    target_pivot = target_pivot[target_pivot['Date'].isin(common_dates)].reset_index(drop=True)
    
    # Remove Date column from features
    feature_columns = [col for col in df_transformed_sorted.columns if col != 'Date']
    features = df_transformed_sorted[feature_columns].values
    
    # Target columns (revenue targets)
    target_columns = [col for col in target_pivot.columns if col != 'Date']
    targets = target_pivot[target_columns].values
    
    print(f"✓ Feature shape: {features.shape}")
    print(f"✓ Target shape: {targets.shape}")
    print(f"✓ Number of feature columns: {len(feature_columns)}")
    print(f"✓ Number of target columns: {len(target_columns)}")
    print(f"✓ Target columns: {target_columns}")
    print(f"✓ Sequence length: {SEQ_LENGTH} days")
    print(f"✓ Forecast horizon: {FORECAST_HORIZON} days")
    
    # Create sequences
    X, y = [], []
    
    for i in range(SEQ_LENGTH, len(features) - FORECAST_HORIZON + 1):
        # Features: past 30 days
        X.append(features[i-SEQ_LENGTH:i])
        
        # Targets: next 7 days
        y.append(targets[i:i+FORECAST_HORIZON])
    
    X = np.array(X)
    y = np.array(y)
    
    print(f"✓ Final X shape: {X.shape}")  # (samples, 30, features)
    print(f"✓ Final y shape: {y.shape}")  # (samples, 7, revenue_targets)
    print(f"✓ Total sequences created: {len(X)}")
    
    return X, y, feature_columns, target_columns

In [34]:
# Cell 3A: Target Normalization Functions
from sklearn.preprocessing import StandardScaler
import joblib

def normalize_targets(y_train, y_test, save_scaler=True):
    """
    Normalize target values for better training stability
    """
    print("="*50)
    print("NORMALIZING TARGET VALUES")
    print("="*50)
    
    # Original data info
    print(f"📊 Original target ranges:")
    print(f"  y_train: ${y_train.min():.2f} - ${y_train.max():.2f}")
    print(f"  y_test: ${y_test.min():.2f} - ${y_test.max():.2f}")
    
    # Reshape for normalization: (samples, days, streams) -> (samples*days, streams)
    original_train_shape = y_train.shape
    original_test_shape = y_test.shape
    
    y_train_reshaped = y_train.reshape(-1, y_train.shape[-1])  # (samples*days, 3)
    y_test_reshaped = y_test.reshape(-1, y_test.shape[-1])     # (samples*days, 3)
    
    print(f"✓ Reshaped for scaling:")
    print(f"  y_train: {original_train_shape} -> {y_train_reshaped.shape}")
    print(f"  y_test: {original_test_shape} -> {y_test_reshaped.shape}")
    
    # Fit scaler on training data only
    target_scaler = StandardScaler()
    y_train_normalized = target_scaler.fit_transform(y_train_reshaped)
    y_test_normalized = target_scaler.transform(y_test_reshaped)
    
    # Reshape back to original format
    y_train_normalized = y_train_normalized.reshape(original_train_shape)
    y_test_normalized = y_test_normalized.reshape(original_test_shape)
    
    print(f"✓ Normalized target ranges:")
    print(f"  y_train: {y_train_normalized.min():.3f} - {y_train_normalized.max():.3f}")
    print(f"  y_test: {y_test_normalized.min():.3f} - {y_test_normalized.max():.3f}")
    print(f"  Mean: {y_train_normalized.mean():.3f}, Std: {y_train_normalized.std():.3f}")
    
    # Save scaler for later denormalization
    if save_scaler:
        joblib.dump(target_scaler, 'target_scaler.pkl')
        print(f"✅ Target scaler saved to 'target_scaler.pkl'")
    
    return y_train_normalized, y_test_normalized, target_scaler

def denormalize_predictions(predictions_normalized, target_scaler):
    """
    Convert normalized predictions back to actual dollar amounts
    """
    original_shape = predictions_normalized.shape
    
    # Reshape for denormalization
    pred_reshaped = predictions_normalized.reshape(-1, predictions_normalized.shape[-1])
    
    # Denormalize
    pred_actual = target_scaler.inverse_transform(pred_reshaped)
    
    # Reshape back
    pred_actual = pred_actual.reshape(original_shape)
    
    return pred_actual

print("✅ Target normalization functions defined")

✅ Target normalization functions defined


In [35]:
# Cell 4: Corrected - Handle data without Date column in features
def clean_and_prepare_data_fixed(df_transformed, target_data):
    """
    Clean dataframes when features don't have Date column
    """
    print("="*50)
    print("CLEANING AND PREPARING DATA FOR CNN-LSTM")
    print("="*50)
    
    # Step 1: Check original data
    print("Original data info:")
    print(f"df_transformed shape: {df_transformed.shape}")
    print(f"df_transformed columns: {list(df_transformed.columns)}")
    print(f"target_data shape: {target_data.shape}")
    print(f"target_data columns: {list(target_data.columns)}")
    
    # Check if data is already aligned by length
    if len(df_transformed) == len(target_data):
        print("✓ Data lengths match - assuming already aligned by row index")
        
        # Step 2: Pivot target data from long to wide format
        print("\n🔄 Pivoting target data to wide format...")
        
        # Add row index to help with pivoting
        target_with_index = target_data.copy()
        target_with_index['row_index'] = target_with_index.index
        
        # Create a day identifier (since we know there are 3 meal periods per day)
        target_with_index['day_id'] = target_with_index['row_index'] // 3
        
        target_pivot = target_with_index.pivot_table(
            index='day_id', 
            columns='MealPeriod', 
            values='CheckTotal', 
            fill_value=0
        ).reset_index()
        
        print(f"✓ Pivoted target shape: {target_pivot.shape}")
        print(f"✓ Pivoted target columns: {list(target_pivot.columns)}")
        
        # Step 3: Aggregate features to day level (average of 3 meal periods per day)
        print("\n📊 Aggregating features to day level...")
        
        # Add day_id to features
        df_features_with_day = df_transformed.copy()
        df_features_with_day['day_id'] = df_features_with_day.index // 3
        
        # Aggregate features by day (mean of the 3 meal periods)
        df_features_daily = df_features_with_day.groupby('day_id').mean().reset_index()
        df_features_daily = df_features_daily.drop('day_id', axis=1)
        
        print(f"✓ Aggregated features shape: {df_features_daily.shape}")
        
        # Step 4: Align the data
        target_values = target_pivot.drop('day_id', axis=1)
        
        # Ensure same number of rows
        min_rows = min(len(df_features_daily), len(target_values))
        df_features_final = df_features_daily.iloc[:min_rows]
        target_values_final = target_values.iloc[:min_rows]
        
        print(f"✓ Final aligned shapes:")
        print(f"Features: {df_features_final.shape}")
        print(f"Targets: {target_values_final.shape}")
        
    else:
        raise ValueError(f"Data length mismatch: features={len(df_transformed)}, targets={len(target_data)}")
    
    # Step 5: Clean data types and handle missing values
    print("\n🧹 Cleaning data types...")
    
    # Features: ensure all numeric
    df_features_clean = df_features_final.select_dtypes(include=[np.number])
    df_features_clean = df_features_clean.fillna(0).astype(np.float32)
    
    # Targets: ensure all numeric
    df_targets_clean = target_values_final.fillna(0).astype(np.float32)
    
    print(f"✅ Final cleaned data:")
    print(f"Features shape: {df_features_clean.shape}")
    print(f"Targets shape: {df_targets_clean.shape}")
    print(f"Target columns: {list(df_targets_clean.columns)}")
    print(f"Data lengths match: {len(df_features_clean) == len(df_targets_clean)}")
    
    return df_features_clean, df_targets_clean

def create_sequences_for_cnn_lstm_corrected(df_features, df_targets, sequence_length=30, forecast_horizon=7):
    """
    Create sequences from properly aligned and cleaned data
    """
    print("\n" + "="*50)
    print("CREATING SEQUENCES FOR CNN-LSTM")
    print("="*50)
    
    # Parameters
    SEQ_LENGTH = sequence_length
    FORECAST_HORIZON = forecast_horizon
    
    # Convert to arrays
    features = df_features.values
    targets = df_targets.values
    feature_columns = df_features.columns.tolist()
    target_columns = df_targets.columns.tolist()
    
    print(f"✓ Feature shape: {features.shape}")
    print(f"✓ Target shape: {targets.shape}")
    print(f"✓ Sequence length: {SEQ_LENGTH} days")
    print(f"✓ Forecast horizon: {FORECAST_HORIZON} days")
    print(f"✓ Target columns: {target_columns}")
    
    # Verify we have enough data
    min_data_needed = SEQ_LENGTH + FORECAST_HORIZON
    if len(features) < min_data_needed:
        raise ValueError(f"Not enough data. Need at least {min_data_needed} rows, got {len(features)}")
    
    # Create sequences
    X, y = [], []
    
    for i in range(SEQ_LENGTH, len(features) - FORECAST_HORIZON + 1):
        # Features: past SEQ_LENGTH days
        X.append(features[i-SEQ_LENGTH:i])
        
        # Targets: next FORECAST_HORIZON days
        y.append(targets[i:i+FORECAST_HORIZON])
    
    X = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.float32)
    
    print(f"✓ Final X shape: {X.shape}")  # (samples, sequence_length, features)
    print(f"✓ Final y shape: {y.shape}")  # (samples, forecast_horizon, revenue_streams)
    print(f"✓ X dtype: {X.dtype}")
    print(f"✓ y dtype: {y.dtype}")
    print(f"✓ Total sequences created: {len(X)}")
    
    # Show example of what each dimension means
    print(f"\n📊 Shape interpretation:")
    print(f"X: ({X.shape[0]} sequences, {X.shape[1]} days history, {X.shape[2]} features)")
    print(f"y: ({y.shape[0]} sequences, {y.shape[1]} days forecast, {y.shape[2]} revenue streams)")
    
    return X, y, feature_columns, target_columns

# Execute the corrected pipeline
try:
    # Step 1: Clean and prepare data without Date column dependency
    df_features_clean, df_targets_clean = clean_and_prepare_data_fixed(df_transformed, target_data)
    
    # Step 2: Create sequences
    X, y, feature_cols, target_cols = create_sequences_for_cnn_lstm_corrected(
        df_features_clean, df_targets_clean
    )
    
    print(f"\n🎉 SUCCESS! Sequences created successfully!")
    print(f"✓ Input sequences (X): {X.shape}")
    print(f"✓ Output sequences (y): {y.shape}")
    print(f"✓ Feature columns: {len(feature_cols)}")
    print(f"✓ Target columns: {target_cols}")
    print(f"✓ Data types: X={X.dtype}, y={y.dtype}")
    
except Exception as e:
    print(f"❌ Error: {e}")
    import traceback
    traceback.print_exc()

CLEANING AND PREPARING DATA FOR CNN-LSTM
Original data info:
df_transformed shape: (1458, 65)
df_transformed columns: ['Year', 'CheckTotal', 'is_zero', 'IsRamadan', 'IsEid', 'IsPreRamadan', 'IsPostRamadan', 'IsLast10Ramadan', 'IsDSF', 'IsSummerEvent', 'IsNationalDay', 'IsNewYear', 'IsMarathon', 'IsGITEX', 'IsAirshow', 'IsFoodFestival', 'IsPreEvent', 'IsPostEvent', 'Month_sin', 'Month_cos', 'DayOfWeek_sin', 'DayOfWeek_cos', 'Meal_Breakfast', 'Meal_Dinner', 'Meal_Lunch', 'Event_Dubai-Airshow', 'Event_Dubai-Food-Festival', 'Event_Dubai-Marathon', 'Event_Dubai-Shopping-Festival', 'Event_Dubai-Summer-Surprises', 'Event_Eid-Adha', 'Event_Flag-Day', 'Event_GITEX-Technology-Week', 'Event_New-Year-Celebrations', 'Event_Normal', 'Event_Post-Dubai-Airshow', 'Event_Post-Dubai-Marathon', 'Event_Post-Eid-Adha', 'Event_Post-Flag-Day', 'Event_Post-GITEX-Technology-Week', 'Event_Post-New-Year-Celebrations', 'Event_Post-Ramadan-Recovery', 'Event_Post-Ramadan-Week1', 'Event_Post-Summer-Event', 'Event_Pre

In [36]:
print(X)
print("----------------------------------------------------------")
print(y)

[[[-0.5757663   0.6442341  -0.08310281 ... -0.3115533  -2.6150928
    4.957716  ]
  [-0.5757663   0.20041224 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663   0.10293791 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  ...
  [-0.5757663   0.01185533 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663   0.11598377 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663  -0.17047678 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]]

 [[-0.5757663   0.20041224 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663   0.10293791 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663   0.44010323 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  ...
  [-0.5757663   0.11598377 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663  -0.17047678 -0.08310281 ... -0.3115533   0.38239563
   -0.20170578]
  [-0.5757663   0.15700552 -0.08310281 ... -0.3115533   0.38239563
   -0.2

In [37]:
# Cell 5: Train-Test Split with clean data
print("="*30)
print("TRAIN-TEST SPLIT")
print("="*30)

# Time-based split (80% train, 20% test)
split_ratio = 0.8
split_index = int(len(X) * split_ratio)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

print(f"✓ Training sequences: {X_train.shape[0]}")
print(f"✓ Testing sequences: {X_test.shape[0]}")
print(f"✓ Input shape per sample: {X_train.shape[1:]}")
print(f"✓ Output shape per sample: {y_train.shape[1:]}")

# Verify data types
print(f"✓ X_train dtype: {X_train.dtype}")
print(f"✓ y_train dtype: {y_train.dtype}")
print(f"✓ X_test dtype: {X_test.dtype}")
print(f"✓ y_test dtype: {y_test.dtype}")

# Check for any problematic values
print(f"\n✓ Data quality check:")
print(f"X_train NaN count: {np.isnan(X_train).sum()}")
print(f"y_train NaN count: {np.isnan(y_train).sum()}")
print(f"X_train Inf count: {np.isinf(X_train).sum()}")
print(f"y_train Inf count: {np.isinf(y_train).sum()}")

print(f"\n✅ Data is ready for training!")

TRAIN-TEST SPLIT
✓ Training sequences: 360
✓ Testing sequences: 90
✓ Input shape per sample: (30, 65)
✓ Output shape per sample: (7, 3)
✓ X_train dtype: float32
✓ y_train dtype: float32
✓ X_test dtype: float32
✓ y_test dtype: float32

✓ Data quality check:
X_train NaN count: 0
y_train NaN count: 0
X_train Inf count: 0
y_train Inf count: 0

✅ Data is ready for training!


In [38]:
# Cell 6: Define CNN-LSTM model architecture
def build_cnn_lstm_model(input_shape, output_shape):
    """
    Build CNN-LSTM hybrid model for hotel revenue forecasting
    """
    print(f"✓ Building model with input shape: {input_shape}")
    print(f"✓ Output shape: {output_shape}")
    
    model = Sequential([
        # CNN layers for feature extraction
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape, name='conv1d_1'),
        Conv1D(filters=64, kernel_size=3, activation='relu', name='conv1d_2'),
        MaxPooling1D(pool_size=2, name='maxpool_1'),
        Dropout(0.2, name='dropout_1'),
        
        # More CNN layers
        Conv1D(filters=32, kernel_size=3, activation='relu', name='conv1d_3'),
        MaxPooling1D(pool_size=2, name='maxpool_2'),
        Dropout(0.2, name='dropout_2'),
        
        # LSTM layers for temporal patterns
        LSTM(100, return_sequences=True, name='lstm_1'),
        Dropout(0.3, name='dropout_3'),
        LSTM(50, return_sequences=False, name='lstm_2'),
        Dropout(0.3, name='dropout_4'),
        
        # Dense layers for final prediction
        Dense(100, activation='relu', name='dense_1'),
        Dropout(0.2, name='dropout_5'),
        Dense(np.prod(output_shape), activation='linear', name='dense_output'),
    ])
    
    # Reshape output to (forecast_days, revenue_streams)
    model.add(tf.keras.layers.Reshape(output_shape, name='reshape_output'))
    
    return model

print("✓ Model building function defined")

✓ Model building function defined


In [39]:
# Cell 6A: Apply Target Normalization
print("="*40)
print("APPLYING TARGET NORMALIZATION")
print("="*40)

# Store original targets for comparison
y_train_original = y_train.copy()
y_test_original = y_test.copy()

# Apply normalization
y_train_norm, y_test_norm, target_scaler = normalize_targets(y_train, y_test, save_scaler=True)

# Update variables for training
y_train = y_train_norm
y_test = y_test_norm

print(f"✅ Target normalization applied!")
print(f"✅ Training will use normalized targets")
print(f"✅ Original targets preserved for comparison")

# Show the difference
print(f"\n📊 Comparison:")
print(f"Original y_train range: ${y_train_original.min():.2f} - ${y_train_original.max():.2f}")
print(f"Normalized y_train range: {y_train.min():.3f} - {y_train.max():.3f}")

APPLYING TARGET NORMALIZATION
NORMALIZING TARGET VALUES
📊 Original target ranges:
  y_train: $5.00 - $10052.50
  y_test: $66.00 - $9657.00
✓ Reshaped for scaling:
  y_train: (360, 7, 3) -> (2520, 3)
  y_test: (90, 7, 3) -> (630, 3)
✓ Normalized target ranges:
  y_train: -1.588 - 9.795
  y_test: -1.334 - 9.494
  Mean: -0.000, Std: 1.000
✅ Target scaler saved to 'target_scaler.pkl'
✅ Target normalization applied!
✅ Training will use normalized targets
✅ Original targets preserved for comparison

📊 Comparison:
Original y_train range: $5.00 - $10052.50
Normalized y_train range: -1.588 - 9.795


In [40]:
# Cell 7: Build and compile the model
print("="*30)
print("BUILDING MODEL")
print("="*30)

# Define input and output shapes
input_shape = (X_train.shape[1], X_train.shape[2])  # (30, features)
output_shape = (y_train.shape[1], y_train.shape[2])  # (7, revenue_streams)

# Build model
model = build_cnn_lstm_model(input_shape, output_shape)

# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

print("\n" + "="*30)
print("MODEL ARCHITECTURE")
print("="*30)
model.summary()

# Count parameters
total_params = model.count_params()
print(f"\n✓ Total parameters: {total_params:,}")

BUILDING MODEL
✓ Building model with input shape: (30, 65)
✓ Output shape: (7, 3)

MODEL ARCHITECTURE



✓ Total parameters: 121,693


In [41]:
# Cell 8: Setup training callbacks
print("="*30)
print("TRAINING SETUP")
print("="*30)

# Define callbacks
callbacks = [
    EarlyStopping(
        monitor='val_loss', 
        patience=15, 
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss', 
        factor=0.5, 
        patience=5, 
        min_lr=1e-7,
        verbose=1
    ),
    ModelCheckpoint(
        'best_cnn_lstm_model.h5', 
        save_best_only=True, 
        monitor='val_loss',
        verbose=1
    )
]

# Training parameters
BATCH_SIZE = 32
EPOCHS = 100

print("✓ Callbacks configured:")
print("  - Early stopping (patience=15)")
print("  - Learning rate reduction (factor=0.5, patience=5)")
print("  - Model checkpoint (best_cnn_lstm_model.h5)")
print(f"✓ Batch size: {BATCH_SIZE}")
print(f"✓ Max epochs: {EPOCHS}")

TRAINING SETUP
✓ Callbacks configured:
  - Early stopping (patience=15)
  - Learning rate reduction (factor=0.5, patience=5)
  - Model checkpoint (best_cnn_lstm_model.h5)
✓ Batch size: 32
✓ Max epochs: 100


In [42]:
# Cell 9 Alternative: Comprehensive data cleaning and training
import tensorflow as tf

print("="*40)
print("COMPREHENSIVE DATA PREPARATION")
print("="*40)

def clean_and_prepare_data(X_train, y_train, X_test, y_test):
    """
    Comprehensive data cleaning for CNN-LSTM training
    """
    print("🔧 Cleaning and preparing data...")
    
    # Convert to numpy arrays if not already
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    
    # Check for object dtype issues
    if X_train.dtype == 'object':
        print("⚠️  X_train has object dtype - converting...")
        X_train = X_train.astype(np.float64)
    
    if y_train.dtype == 'object':
        print("⚠️  y_train has object dtype - converting...")
        y_train = y_train.astype(np.float64)
    
    if X_test.dtype == 'object':
        print("⚠️  X_test has object dtype - converting...")
        X_test = X_test.astype(np.float64)
    
    if y_test.dtype == 'object':
        print("⚠️  y_test has object dtype - converting...")
        y_test = y_test.astype(np.float64)
    
    # Handle NaN and infinite values
    print("🧹 Handling NaN and infinite values...")
    X_train = np.nan_to_num(X_train, nan=0.0, posinf=1e6, neginf=-1e6)
    y_train = np.nan_to_num(y_train, nan=0.0, posinf=1e6, neginf=-1e6)
    X_test = np.nan_to_num(X_test, nan=0.0, posinf=1e6, neginf=-1e6)
    y_test = np.nan_to_num(y_test, nan=0.0, posinf=1e6, neginf=-1e6)
    
    # Convert to float32 (TensorFlow's preferred type)
    X_train = X_train.astype(np.float32)
    y_train = y_train.astype(np.float32)
    X_test = X_test.astype(np.float32)
    y_test = y_test.astype(np.float32)
    
    # Final verification
    print(f"✓ Final data types:")
    print(f"  X_train: {X_train.dtype}, shape: {X_train.shape}")
    print(f"  y_train: {y_train.dtype}, shape: {y_train.shape}")
    print(f"  X_test: {X_test.dtype}, shape: {X_test.shape}")
    print(f"  y_test: {y_test.dtype}, shape: {y_test.shape}")
    
    # Check data ranges
    print(f"✓ Data ranges:")
    print(f"  X_train: [{X_train.min():.3f}, {X_train.max():.3f}]")
    print(f"  y_train: [{y_train.min():.3f}, {y_train.max():.3f}]")
    
    return X_train, y_train, X_test, y_test

# Clean the data
X_train_clean, y_train_clean, X_test_clean, y_test_clean = clean_and_prepare_data(
    X_train, y_train, X_test, y_test
)

print("\n" + "="*30)
print("STARTING TRAINING")
print("="*30)

# Train with cleaned data
try:
    history = model.fit(
        X_train_clean, y_train_clean,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=(X_test_clean, y_test_clean),
        callbacks=callbacks,
        verbose=1
    )
    
    print("\n✅ Training completed successfully!")
    
    # Update variables for next cells
    X_train, y_train = X_train_clean, y_train_clean
    X_test, y_test = X_test_clean, y_test_clean
    
except Exception as e:
    print(f"❌ Training still failed: {e}")
    print("\n🔍 Additional debugging:")
    
    # More detailed debugging
    print(f"X_train unique dtypes: {set(str(x.dtype) for x in X_train.flatten()[:100])}")
    print(f"Sample X_train values: {X_train[0, 0, :10]}")
    print(f"Sample y_train values: {y_train[0, 0, :10]}")
    
    # Check if data contains any strings
    sample_x = X_train[0, 0, :]
    print(f"Sample X contains strings: {any(isinstance(x, str) for x in sample_x.flatten())}")

COMPREHENSIVE DATA PREPARATION
🔧 Cleaning and preparing data...
🧹 Handling NaN and infinite values...
✓ Final data types:
  X_train: float32, shape: (360, 30, 65)
  y_train: float32, shape: (360, 7, 3)
  X_test: float32, shape: (90, 30, 65)
  y_test: float32, shape: (90, 7, 3)
✓ Data ranges:
  X_train: [-2.615, 22.023]
  y_train: [-1.588, 9.795]

STARTING TRAINING
Epoch 1/100
[1m10/12[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 11ms/step - loss: 0.9150 - mae: 0.6489
Epoch 1: val_loss improved from inf to 3.45716, saving model to best_cnn_lstm_model.h5




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 91ms/step - loss: 0.9346 - mae: 0.6528 - val_loss: 3.4572 - val_mae: 1.3137 - learning_rate: 0.0010
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.8669 - mae: 0.6297
Epoch 2: val_loss improved from 3.45716 to 3.23934, saving model to best_cnn_lstm_model.h5




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.8725 - mae: 0.6306 - val_loss: 3.2393 - val_mae: 1.2468 - learning_rate: 0.0010
Epoch 3/100
[1m 6/12[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 0.8015 - mae: 0.5953
Epoch 3: val_loss improved from 3.23934 to 2.74908, saving model to best_cnn_lstm_model.h5




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.8233 - mae: 0.6006 - val_loss: 2.7491 - val_mae: 1.1306 - learning_rate: 0.0010
Epoch 4/100
[1m 8/12[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 7ms/step - loss: 0.9111 - mae: 0.6246 
Epoch 4: val_loss improved from 2.74908 to 2.70749, saving model to best_cnn_lstm_model.h5




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.8503 - mae: 0.6109 - val_loss: 2.7075 - val_mae: 1.1235 - learning_rate: 0.0010
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.6866 - mae: 0.5781
Epoch 5: val_loss did not improve from 2.70749
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.6882 - mae: 0.5786 - val_loss: 2.8854 - val_mae: 1.1613 - learning_rate: 0.0010
Epoch 6/100
[1m 8/12[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 8ms/step - loss: 0.6817 - mae: 0.5653 
Epoch 6: val_loss did not improve from 2.70749
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.6904 - mae: 0.5679 - val_loss: 2.7517 - val_mae: 1.1256 - learning_rate: 0.0010
Epoch 7/100
[1m 8/12[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 7ms/step - loss: 0.7296 - mae: 0.5729 
Epoch 7: val_loss did not improve from 2.70749
[1m12/12[0m [32m━━━



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6511 - mae: 0.5498 - val_loss: 2.5752 - val_mae: 1.0838 - learning_rate: 0.0010
Epoch 10/100
[1m 7/12[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 0.7231 - mae: 0.5658 
Epoch 10: val_loss did not improve from 2.57517
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.7034 - mae: 0.5646 - val_loss: 2.6387 - val_mae: 1.1019 - learning_rate: 0.0010
Epoch 11/100
[1m 6/12[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 11ms/step - loss: 0.6717 - mae: 0.5625
Epoch 11: val_loss did not improve from 2.57517
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.6653 - mae: 0.5619 - val_loss: 2.6595 - val_mae: 1.0966 - learning_rate: 0.0010
Epoch 12/100
[1m10/12[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 7ms/step - loss: 0.7415 - mae: 0.5768 
Epoch 12: val_loss did not improve from 2.57517
[1m12/12[0m [



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.7484 - mae: 0.5729 - val_loss: 2.5468 - val_mae: 1.0686 - learning_rate: 0.0010
Epoch 15/100
[1m 7/12[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 8ms/step - loss: 0.6204 - mae: 0.5312 
Epoch 15: val_loss improved from 2.54685 to 2.39348, saving model to best_cnn_lstm_model.h5




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6424 - mae: 0.5395 - val_loss: 2.3935 - val_mae: 1.0381 - learning_rate: 0.0010
Epoch 16/100
[1m 8/12[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 8ms/step - loss: 0.6491 - mae: 0.5565 
Epoch 16: val_loss did not improve from 2.39348
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.6476 - mae: 0.5540 - val_loss: 2.4804 - val_mae: 1.0550 - learning_rate: 0.0010
Epoch 17/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.6420 - mae: 0.5501
Epoch 17: val_loss did not improve from 2.39348
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.6434 - mae: 0.5502 - val_loss: 2.5618 - val_mae: 1.0740 - learning_rate: 0.0010
Epoch 18/100
[1m 6/12[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 10ms/step - loss: 0.7651 - mae: 0.5822
Epoch 18: val_loss did not improve from 2.39348
[1m12/12[0m [



[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.6380 - mae: 0.5407 - val_loss: 2.3586 - val_mae: 1.0329 - learning_rate: 2.5000e-04
Epoch 27/100
[1m 7/12[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 0.5526 - mae: 0.5213 
Epoch 27: val_loss improved from 2.35862 to 2.26065, saving model to best_cnn_lstm_model.h5




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.5869 - mae: 0.5312 - val_loss: 2.2606 - val_mae: 1.0133 - learning_rate: 2.5000e-04
Epoch 28/100
[1m 7/12[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 0.5678 - mae: 0.5221 
Epoch 28: val_loss did not improve from 2.26065
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.5876 - mae: 0.5288 - val_loss: 2.3077 - val_mae: 1.0244 - learning_rate: 2.5000e-04
Epoch 29/100
[1m 7/12[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 9ms/step - loss: 0.6760 - mae: 0.5511 
Epoch 29: val_loss did not improve from 2.26065
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.6542 - mae: 0.5481 - val_loss: 2.2607 - val_mae: 1.0152 - learning_rate: 2.5000e-04
Epoch 30/100
[1m10/12[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 7ms/step - loss: 0.7040 - mae: 0.5603 
Epoch 30: val_loss did not improve from 2.26065
[1m

In [43]:
# Cell 10: Model Evaluation with Denormalization
print("="*30)
print("COMPREHENSIVE MODEL EVALUATION")
print("="*30)

# IMPORTANT: Model was trained on NORMALIZED targets
# We need to denormalize predictions for evaluation
print("📊 NOTE: Model trained on normalized targets")
print("📊 Denormalizing predictions to actual dollar amounts")

# Make predictions on normalized test set
y_pred_normalized = model.predict(X_test, verbose=0)

# Load scaler and denormalize predictions
try:
    target_scaler = joblib.load('target_scaler.pkl')
    y_pred_actual = denormalize_predictions(y_pred_normalized, target_scaler)
    y_test_actual = y_test_original  # Use original non-normalized test targets
    
    print(f"✅ Predictions denormalized successfully")
    
except Exception as e:
    print(f"⚠️  Could not load scaler: {e}")
    print(f"📊 Using normalized predictions for evaluation")
    y_pred_actual = y_pred_normalized
    y_test_actual = y_test

# Debug shapes
print(f"\n🔍 Shape Debugging:")
print(f"X_test shape: {X_test.shape}")
print(f"y_test_actual shape: {y_test_actual.shape}")
print(f"y_pred_actual shape: {y_pred_actual.shape}")

# Define revenue stream names
revenue_streams = ['Breakfast', 'Dinner', 'Lunch']
print(f"\n📊 Revenue streams: {revenue_streams}")

# Show data ranges (should be in dollars after denormalization)
print(f"\n💰 Denormalized Value Ranges:")
print(f"  Actual revenue: ${y_test_actual.min():.2f} - ${y_test_actual.max():.2f}")
print(f"  Predicted revenue: ${y_pred_actual.min():.2f} - ${y_pred_actual.max():.2f}")

# Calculate metrics on actual dollar amounts
y_test_flat = y_test_actual.reshape(-1)
y_pred_flat = y_pred_actual.reshape(-1)

mae = mean_absolute_error(y_test_flat, y_pred_flat)
mse = mean_squared_error(y_test_flat, y_pred_flat)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((y_test_flat - y_pred_flat) / (np.abs(y_test_flat) + 1e-8))) * 100

print(f"\n✅ Overall Test Metrics (in USD):")
print(f"  MAE: ${mae:.2f}")
print(f"  RMSE: ${rmse:.2f}")
print(f"  MAPE: {mape:.2f}%")

# Performance by revenue stream
print(f"\n✅ Performance by Revenue Stream:")
for stream_idx, stream_name in enumerate(revenue_streams):
    stream_mae = mean_absolute_error(
        y_test_actual[:, :, stream_idx].reshape(-1), 
        y_pred_actual[:, :, stream_idx].reshape(-1)
    )
    stream_corr = np.corrcoef(
        y_test_actual[:, :, stream_idx].reshape(-1),
        y_pred_actual[:, :, stream_idx].reshape(-1)
    )[0, 1]
    print(f"  {stream_name}: MAE = ${stream_mae:.2f}, Correlation = {stream_corr:.3f}")

# Sample predictions
print(f"\n✅ Sample Predictions (First sequence - in USD):")
print("Day | Breakfast_Actual | Breakfast_Pred | Dinner_Actual | Dinner_Pred | Lunch_Actual | Lunch_Pred")
print("-" * 95)
for day in range(min(7, y_test_actual.shape[1])):
    print(f"{day+1:2d}  | ${y_test_actual[0, day, 0]:11.2f}     | ${y_pred_actual[0, day, 0]:9.2f}     | "
          f"${y_test_actual[0, day, 1]:8.2f}     | ${y_pred_actual[0, day, 1]:6.2f}     | "
          f"${y_test_actual[0, day, 2]:7.2f}     | ${y_pred_actual[0, day, 2]:5.2f}")

print(f"\n✅ Evaluation complete with denormalized predictions!")

COMPREHENSIVE MODEL EVALUATION
📊 NOTE: Model trained on normalized targets
📊 Denormalizing predictions to actual dollar amounts








✅ Predictions denormalized successfully

🔍 Shape Debugging:
X_test shape: (90, 30, 65)
y_test_actual shape: (90, 7, 3)
y_pred_actual shape: (90, 7, 3)

📊 Revenue streams: ['Breakfast', 'Dinner', 'Lunch']

💰 Denormalized Value Ranges:
  Actual revenue: $66.00 - $9657.00
  Predicted revenue: $299.30 - $6406.11

✅ Overall Test Metrics (in USD):
  MAE: $859.11
  RMSE: $1286.34
  MAPE: 69.04%

✅ Performance by Revenue Stream:
  Breakfast: MAE = $804.04, Correlation = 0.431
  Dinner: MAE = $1185.78, Correlation = 0.647
  Lunch: MAE = $587.50, Correlation = 0.549

✅ Sample Predictions (First sequence - in USD):
Day | Breakfast_Actual | Breakfast_Pred | Dinner_Actual | Dinner_Pred | Lunch_Actual | Lunch_Pred
-----------------------------------------------------------------------------------------------
 1  | $    2466.00     | $  2360.19     | $ 6548.00     | $5794.64     | $2912.00     | $1564.16
 2  | $    2586.80     | $  2358.25     | $ 4300.00     | $5949.73     | $2686.00     | $1637.47
