In [1]:
!pip install pydmd

Collecting pydmd
  Downloading pydmd-2025.8.1-py3-none-any.whl.metadata (22 kB)
Downloading pydmd-2025.8.1-py3-none-any.whl (152 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.7/152.7 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydmd
Successfully installed pydmd-2025.8.1


In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import numpy as np
import pandas as pd
from pydmd import HODMD
from sklearn.metrics import mean_squared_error

# Read and process the new August dataset
data = pd.read_csv('DEMAND_08_NSW_2004.csv')
data['SETTLEMENTDATE'] = pd.to_datetime(data['SETTLEMENTDATE'])
data = data.sort_values('SETTLEMENTDATE')
data.set_index('SETTLEMENTDATE', inplace=True)

# Choose the prediction target day (any date between 2004-08-09 and 2004-08-31 should work)
target_day = pd.to_datetime("2004-08-22")  # Example: predicting for August 22

# Define the 4 input days
day_minus_1 = target_day - pd.Timedelta(days=1)
day_minus_2 = target_day - pd.Timedelta(days=2)
week_same_day = target_day - pd.Timedelta(days=7)
week_day_before = target_day - pd.Timedelta(days=8)

# Extract 48 half-hourly data points from each of the four days
def get_day_values(day):
    try:
        return data.loc[day.strftime('%Y-%m-%d'), 'TOTALDEMAND'].values
    except KeyError:
        print(f"Missing data for {day.strftime('%Y-%m-%d')}")
        return np.zeros(48)

training_days = [day_minus_2, day_minus_1, week_day_before, week_same_day]
snapshots = np.concatenate([get_day_values(day) for day in training_days])

# Define error metrics
def calculate_mape(actual, predicted):
    return np.mean(np.abs((actual - predicted) / actual)) * 100

def evaluate_d(d):
    d = int(d)
    hodmd = HODMD(svd_rank=0, exact=True, opt=True, d=d).fit(snapshots[None])

    future_time_steps = 48  # Predict next day
    hodmd.dmd_time["tend"] = snapshots.shape[0] + future_time_steps

    snapshots_actual = get_day_values(target_day)
    dmd_predictions = hodmd.reconstructed_data[0].real[-future_time_steps:]

    mae = np.mean(np.abs(snapshots_actual - dmd_predictions))
    mse = mean_squared_error(snapshots_actual, dmd_predictions)
    rmse = np.sqrt(mse)
    mape = calculate_mape(snapshots_actual, dmd_predictions)

    return mae, rmse, mape

# Search for best d
coarse_range = range(10, 192, 50)
best_d = min(coarse_range, key=lambda d: evaluate_d(d)[1])

fine_range = range(max(1, best_d - 20), min(192, best_d + 20), 5)
best_d = min(fine_range, key=lambda d: evaluate_d(d)[1])

final_range = range(max(1, best_d - 5), min(192, best_d + 5), 2)
best_d = min(final_range, key=lambda d: evaluate_d(d)[1])

# Final evaluation
best_mae, best_rmse, best_mape = evaluate_d(best_d)
print(f"Optimal d: {best_d}, RMSE: {best_rmse}, MAE: {best_mae}, MAPE: {best_mape}")


Optimal d: 41, RMSE: 379.84814410928345, MAE: 305.38540511585524, MAPE: 3.858076832912496
