# UK Electricity Demand Forecasting - AWS SageMaker DeepAR
**Author:** Abdul Salam Aldabik

## 1. Setup and Imports

In [None]:
# Install required packages (only if not already installed in SageMaker)
# !pip install sagemaker boto3 pandas

In [None]:
import boto3
import sagemaker
from sagemaker import get_execution_role
import pandas as pd
import numpy as np
import json
import io
import os
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

print(f"SageMaker version: {sagemaker.__version__}")
print(f"Boto3 version: {boto3.__version__}")

## 2. SageMaker Session and Role Setup

In [None]:
# Create SageMaker session and get execution role
sess = sagemaker.Session()
role = get_execution_role()
bucket = sess.default_bucket()
prefix = 'electricity-demand-forecasting'

print(f"SageMaker Role: {role}")
print(f"S3 Bucket: {bucket}")
print(f"S3 Prefix: {prefix}")
print(f"Region: {sess.boto_region_name}")

## 3. Load Data

**STEP 1:** Upload `neso_historic_demand_combined.csv` to SageMaker:
- Drag and drop the file into the JupyterLab file browser (left sidebar)
- Or click the Upload button (↑ icon) and select the file

**STEP 2:** Run the cell below to load the data

In [None]:
# Load the electricity data
df = pd.read_csv('neso_historic_demand_combined.csv')
df['SETTLEMENT_DATE'] = pd.to_datetime(df['SETTLEMENT_DATE'])
df = df.sort_values(['SETTLEMENT_DATE', 'SETTLEMENT_PERIOD'])

print(f"✅ Dataset loaded: {df.shape}")
print(f"Date range: {df['SETTLEMENT_DATE'].min()} to {df['SETTLEMENT_DATE'].max()}")
df.head()

## 4. Prepare Time Series for DeepAR

DeepAR requires data in JSON Lines format with:
- `start`: timestamp of first observation
- `target`: array of time series values
- `cat` (optional): categorical features
- `dynamic_feat` (optional): time-varying features

In [None]:
# Use ND (National Demand) as target
target_column = 'ND'

# Create datetime index combining date and period
df['datetime'] = df.apply(
    lambda row: row['SETTLEMENT_DATE'] + timedelta(minutes=30 * (row['SETTLEMENT_PERIOD'] - 1)),
    axis=1
)

# Set datetime as index and get target series
df = df.set_index('datetime').sort_index()
ts_data = df[target_column].values

print(f"Time series length: {len(ts_data)}")
print(f"Time series range: {df.index.min()} to {df.index.max()}")
print(f"Target column: {target_column}")
print(f"Missing values: {np.isnan(ts_data).sum()}")

In [None]:
# Fill missing values
if np.isnan(ts_data).sum() > 0:
    ts_data = pd.Series(ts_data).fillna(method='ffill').fillna(method='bfill').values
    print(f"Filled {np.isnan(ts_data).sum()} missing values")

In [None]:
# Split into train and test
# Use last 7 days (336 half-hour periods) for testing
prediction_length = 336  # 7 days * 48 half-hours
train_data = ts_data[:-prediction_length]
test_data = ts_data

print(f"Training series length: {len(train_data)}")
print(f"Full series length: {len(test_data)}")
print(f"Prediction length: {prediction_length} (7 days)")

## 5. Create DeepAR Format (JSON Lines)

In [None]:
def series_to_json_obj(ts, start_date, cat=None):
    """Convert time series to DeepAR JSON format"""
    obj = {
        "start": start_date.strftime('%Y-%m-%d %H:%M:%S'),
        "target": ts.tolist()
    }
    if cat is not None:
        obj["cat"] = cat
    return obj

# Get start date
start_date = df.index[0]

# Create training and test JSON objects
train_json = series_to_json_obj(train_data, start_date)
test_json = series_to_json_obj(test_data, start_date)

print("Sample training JSON:")
print(json.dumps({**train_json, 'target': train_json['target'][:10] + ['...']} , indent=2))

In [None]:
# Save to JSON Lines files
def write_json_lines(data, filename):
    """Write JSON objects to file in JSON Lines format"""
    with open(filename, 'w') as f:
        # DeepAR expects one JSON object per line
        json.dump(data, f)
        f.write('\n')

train_file = 'train.json'
test_file = 'test.json'

write_json_lines(train_json, train_file)
write_json_lines(test_json, test_file)

print(f"Created {train_file}")
print(f"Created {test_file}")

## 6. Upload Data to S3

In [None]:
# Upload to S3
train_s3_path = sess.upload_data(
    path=train_file,
    bucket=bucket,
    key_prefix=f'{prefix}/train'
)

test_s3_path = sess.upload_data(
    path=test_file,
    bucket=bucket,
    key_prefix=f'{prefix}/test'
)

print(f"Training data: {train_s3_path}")
print(f"Test data: {test_s3_path}")

## 7. Configure SageMaker DeepAR

In [None]:
# Get DeepAR container image
from sagemaker.image_uris import retrieve

image_uri = retrieve('forecasting-deepar', sess.boto_region_name, version='latest')
print(f"DeepAR container: {image_uri}")

In [None]:
# Create DeepAR estimator
estimator = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    output_path=f's3://{bucket}/{prefix}/output',
    sagemaker_session=sess
)

# Set hyperparameters
# freq: 30min (half-hourly data)
estimator.set_hyperparameters(
    time_freq='30min',
    context_length=336,  # 7 days context
    prediction_length=prediction_length,  # 7 days forecast
    num_cells=40,
    num_layers=2,
    likelihood='gaussian',
    epochs=100,
    mini_batch_size=32,
    learning_rate=0.001,
    dropout_rate=0.1,
    early_stopping_patience=10
)

print("DeepAR estimator configured")

## 8. Train the Model

**⚠️ This will take 10-20 minutes and incur AWS charges!**

In [None]:
# Create training input channels
train_input = sagemaker.inputs.TrainingInput(
    s3_data=train_s3_path,
    content_type='application/json'
)

test_input = sagemaker.inputs.TrainingInput(
    s3_data=test_s3_path,
    content_type='application/json'
)

# Train the model
print("Starting DeepAR training job...")
print("This will take approximately 10-20 minutes.")
print("\n" + "="*50)

estimator.fit({'train': train_input, 'test': test_input})

print("\n" + "="*50)
print("✅ Training complete!")

## 9. Deploy the Model

**⚠️ Deploying creates an endpoint that incurs hourly charges!**

In [None]:
# Deploy model to endpoint
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

print("Deploying DeepAR model...")
print("This will take 5-10 minutes.")

predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m4.xlarge',
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer()
)

print(f"\n✅ Model deployed to endpoint: {predictor.endpoint_name}")

## 10. Make Predictions

In [None]:
# Prepare prediction request
request_data = {
    "instances": [train_json],
    "configuration": {
        "num_samples": 100,
        "output_types": ["mean", "quantiles"],
        "quantiles": ["0.1", "0.5", "0.9"]
    }
}

print("Making predictions...")
results = predictor.predict(request_data)
print("✅ Predictions complete")

In [None]:
# Extract predictions
predictions = results['predictions'][0]
mean_forecast = np.array(predictions['mean'])
quantile_10 = np.array(predictions['quantiles']['0.1'])
quantile_50 = np.array(predictions['quantiles']['0.5'])
quantile_90 = np.array(predictions['quantiles']['0.9'])

print(f"Forecast length: {len(mean_forecast)}")
print(f"Mean forecast range: {mean_forecast.min():.2f} to {mean_forecast.max():.2f}")

## 11. Evaluate Model Performance

In [None]:
# Get actual test values
actual_test = ts_data[-prediction_length:]

# Calculate metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mse = mean_squared_error(actual_test, mean_forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actual_test, mean_forecast)
r2 = r2_score(actual_test, mean_forecast)
mape = np.mean(np.abs((actual_test - mean_forecast) / actual_test)) * 100

print("="*60)
print("AWS SageMaker DeepAR - Model Performance")
print("="*60)
print(f"Mean Squared Error (MSE):     {mse:,.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:,.2f} MW")
print(f"Mean Absolute Error (MAE):     {mae:,.2f} MW")
print(f"Mean Absolute % Error (MAPE):  {mape:.2f}%")
print(f"R² Score:                      {r2:.4f}")
print("="*60)

## 12. Visualize Forecast

In [None]:
# Create date range for forecast
forecast_dates = pd.date_range(
    start=df.index[-prediction_length],
    periods=prediction_length,
    freq='30min'
)

# Plot results
plt.figure(figsize=(15, 6))

# Plot historical data (last 30 days)
historical_window = 1440  # 30 days
plt.plot(
    df.index[-historical_window-prediction_length:-prediction_length],
    ts_data[-historical_window-prediction_length:-prediction_length],
    label='Historical',
    color='blue',
    alpha=0.7
)

# Plot actual test data
plt.plot(
    forecast_dates,
    actual_test,
    label='Actual',
    color='green',
    linewidth=2
)

# Plot forecast
plt.plot(
    forecast_dates,
    mean_forecast,
    label='DeepAR Forecast (Mean)',
    color='red',
    linewidth=2
)

# Plot confidence intervals
plt.fill_between(
    forecast_dates,
    quantile_10,
    quantile_90,
    alpha=0.3,
    color='red',
    label='80% Confidence Interval'
)

plt.xlabel('Date')
plt.ylabel('Electricity Demand (MW)')
plt.title('DeepAR Electricity Demand Forecast (7-day)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 13. Save Results for Model Comparison

In [None]:
# Save metrics to file
results = {
    'model': 'AWS SageMaker DeepAR',
    'mse': float(mse),
    'rmse': float(rmse),
    'mae': float(mae),
    'mape': float(mape),
    'r2': float(r2),
    'forecast_horizon': prediction_length,
    'forecast_horizon_days': 7,
    'training_samples': len(train_data),
    'test_samples': len(actual_test),
    'endpoint_name': predictor.endpoint_name
}

# Save to file
with open('aws_deepar_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("Results saved to aws_deepar_results.json")
print("\nResults:")
print(json.dumps(results, indent=2))

## 14. ⚠️ IMPORTANT: Clean Up Resources

In [None]:
# Delete the endpoint to stop charges
print(f"Deleting endpoint: {predictor.endpoint_name}")
predictor.delete_endpoint()
print("✅ Endpoint deleted successfully!")
print("\n⚠️ Remember to also stop your notebook instance in the SageMaker console!")

## Summary

This notebook:
1. ✅ Loaded UK electricity demand time series data
2. ✅ Prepared data in DeepAR JSON format
3. ✅ Uploaded to S3
4. ✅ Trained AWS SageMaker DeepAR model
5. ✅ Generated probabilistic forecasts with confidence intervals
6. ✅ Evaluated model performance
7. ✅ Saved results for comparison
8. ✅ Cleaned up resources

**Next Steps:**
- Download this notebook from SageMaker
- Save to GitHub as `07_AWS_SageMaker_Model.ipynb`
- Add results to model comparison notebook
- Stop your SageMaker notebook instance!