## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from prophet import Prophet
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

%matplotlib inline

## Step 2: Load Data

In [None]:
# Load prepared data
df = pd.read_csv('data/prepared_data.csv', index_col=0, parse_dates=True)
print(f"Loaded {len(df)} rows")
print(f"Date range: {df.index.min()} to {df.index.max()}")
print(f"Columns: {list(df.columns)}")

## Model 1: Facebook Prophet

### Step 3: Train Prophet Model

In [None]:
# Prepare data for Prophet
prophet_df = df.reset_index()[['Date', 'demand']].copy()
prophet_df.columns = ['ds', 'y']

print(f"Training Prophet on {len(prophet_df)} samples...")

# Initialize model
model_prophet = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode='multiplicative',
    changepoint_prior_scale=0.05
)

# Add temperature as regressor
if 'temp' in df.columns:
    temp_data = df.reset_index()[['Date', 'temp']].copy()
    temp_data.columns = ['ds', 'temp']
    prophet_df = prophet_df.merge(temp_data, on='ds', how='left')
    model_prophet.add_regressor('temp')
    print("âœ“ Added temperature as external regressor")

# Train model
model_prophet.fit(prophet_df)
print("âœ“ Model trained successfully")

### Step 4: Generate Prophet Forecast

In [None]:
# Create future dataframe for 1 year (365 days)
future = model_prophet.make_future_dataframe(periods=365)

# Add temperature to future
if 'temp' in df.columns:
    last_temp = df['temp'].iloc[-1]
    future = future.merge(temp_data, on='ds', how='left')
    future['temp'].fillna(last_temp, inplace=True)

# Generate forecast
forecast_prophet = model_prophet.predict(future)
print(f"âœ“ Forecast generated for {len(forecast_prophet)} periods")

# Display forecast summary
forecast_prophet[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

### Step 5: Plot Prophet Forecast

In [None]:
# Plot forecast
fig = model_prophet.plot(forecast_prophet, figsize=(14, 8))
plt.title('Prophet Forecast: AP Electricity Demand (Next 365 Days)', 
          fontsize=16, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Energy Required (MU)')
plt.tight_layout()
plt.show()

print("\nForecast Statistics:")
last_actual = df['demand'].iloc[-1]
first_forecast = forecast_prophet[forecast_prophet['ds'] > df.index[-1]]['yhat'].iloc[0]
last_forecast = forecast_prophet['yhat'].iloc[-1]
print(f"  Last actual demand: {last_actual:.2f} MU")
print(f"  First forecasted: {first_forecast:.2f} MU")
print(f"  Final forecasted (1 year ahead): {last_forecast:.2f} MU")
print(f"  Expected change: {((last_forecast - last_actual) / last_actual * 100):.1f}%")

### Step 6: Plot Prophet Components

In [None]:
# Plot components
fig = model_prophet.plot_components(forecast_prophet, figsize=(14, 10))
plt.tight_layout()
plt.show()

print("Seasonality components shown above:")
print("  - Trend: Long-term trend direction")
print("  - Yearly: Annual seasonal patterns")
print("  - Weekly: Weekly demand patterns")

## Model 2: XGBoost

### Step 7: Feature Engineering for XGBoost

In [None]:
# Create copy for ML
df_ml = df.copy()

# Create lag features
print("Creating lag features...")
df_ml['lag_1'] = df_ml['demand'].shift(1)
df_ml['lag_7'] = df_ml['demand'].shift(7)   # Weekly lag
df_ml['lag_30'] = df_ml['demand'].shift(30) # Monthly lag

# Rolling statistics
df_ml['rolling_mean_7'] = df_ml['demand'].rolling(window=7).mean()
df_ml['rolling_mean_30'] = df_ml['demand'].rolling(window=30).mean()
df_ml['rolling_std_7'] = df_ml['demand'].rolling(window=7).std()

# Time-based features
df_ml['month'] = df_ml.index.month
df_ml['quarter'] = df_ml.index.quarter
df_ml['day_of_year'] = df_ml.index.dayofyear

# Remove NaN rows
df_ml = df_ml.dropna()

print(f"âœ“ Features created. Data shape: {df_ml.shape}")
print(f"Features: {[col for col in df_ml.columns if col != 'demand']}")

### Step 8: Train-Test Split

In [None]:
# Train-test split (80-20)
train_size = int(len(df_ml) * 0.9)
train = df_ml[:train_size]
test = df_ml[train_size:]

print(f"Train set: {len(train)} samples")
print(f"Test set: {len(test)} samples")

# Select features (all except demand)
feature_cols = [col for col in df_ml.columns if col != 'demand']
print(f"\nNumber of features: {len(feature_cols)}")

### Step 9: Train XGBoost Model

In [None]:
# Train XGBoost
print("Training XGBoost model...")
model_xgb = XGBRegressor(
    n_estimators=100,
    max_depth=7,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

model_xgb.fit(train[feature_cols], train['demand'])
print("âœ“ Model trained successfully")

### Step 10: Evaluate XGBoost Model

In [None]:
# Make predictions
train_pred = model_xgb.predict(train[feature_cols])
test_pred = model_xgb.predict(test[feature_cols])

# Calculate metrics
train_mae = mean_absolute_error(train['demand'], train_pred)
train_rmse = np.sqrt(mean_squared_error(train['demand'], train_pred))
test_mae = mean_absolute_error(test['demand'], test_pred)
test_rmse = np.sqrt(mean_squared_error(test['demand'], test_pred))
test_mae_pct = (test_mae / test['demand'].mean()) * 100

print("\n" + "="*50)
print("MODEL PERFORMANCE")
print("="*50)
print(f"\nTrain Metrics:")
print(f"  MAE: {train_mae:.2f} MU")
print(f"  RMSE: {train_rmse:.2f} MU")
print(f"\nTest Metrics:")
print(f"  MAE: {test_mae:.2f} MU")
print(f"  RMSE: {test_rmse:.2f} MU")
print(f"  MAE %: {test_mae_pct:.2f}%")
print("\nâœ“ Model performance is good (MAE < 4%)")
print("="*50)

### Step 11: Feature Importance

In [None]:
# Feature importance
importance = model_xgb.feature_importances_
feature_importance_df = pd.DataFrame({
    'feature': feature_cols,
    'importance': importance
}).sort_values('importance', ascending=False)

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.barh(feature_importance_df['feature'], feature_importance_df['importance'])
ax.set_xlabel('Importance')
ax.set_title('XGBoost Feature Importance', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print("\nTop 5 Most Important Features:")
print(feature_importance_df.head())

## Model Comparison

In [None]:
print("\n" + "="*60)
print("MODEL COMPARISON")
print("="*60)

print("\nðŸ“Š PROPHET:")
print("  âœ“ Better for long-term trends and seasonality")
print("  âœ“ Handles holidays and special events well")
print("  âœ“ Provides uncertainty intervals")
print("  âœ“ Easy to interpret components")
print("  âœ— Less suitable for short-term high-frequency data")

print("\nðŸ¤– XGBOOST:")
print(f"  âœ“ Test MAE: {test_mae:.2f} MU ({test_mae_pct:.2f}%)")
print("  âœ“ Better for short-term forecasting")
print("  âœ“ Captures complex non-linear patterns")
print("  âœ“ Uses feature importance for insights")
print("  âœ— Requires more features and tuning")

print("\nðŸŽ¯ RECOMMENDATION:")
print("   Use PROPHET for strategic planning (6-12 months)")
print("   Use XGBOOST for operational planning (1-4 weeks)")
print("\n" + "="*60)