In [1]:
import pandas as pd
import datetime 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os 
from datetime import datetime
from astral.sun import sun
from astral import Observer
from astral import LocationInfo
from astral.location import Location

In [2]:
# Save current directory
current_directory = os.getcwd()

# Set print options to suppress scientific notation and show 3 decimal places
np.set_printoptions(suppress=True, precision=5)
pd.options.display.float_format = '{:.5f}'.format

# Suppress all warnings globally
import warnings
warnings.filterwarnings("ignore")

In [3]:
file_path = os.path.join(current_directory, 'X.csv')
X = pd.read_csv(file_path, index_col = 0)

file_path = os.path.join(current_directory, 'y.csv')
y = pd.read_csv(file_path, index_col = 0)

In [4]:
X

Unnamed: 0,ghi,temp,wind,year,month,day,hour,is_monday,is_tuesday,is_wednesday,...,wind_lag_23,wind_lag_24,wind_diff,ghi_x_temp,ghi_x_wind,temp_x_wind,Temperature_Index,CDD,HDD,wind_power_density
0,733.01000,15.20000,0.76000,2022,4,13,10,0,0,1,...,0.00000,0.00000,0.00000,11141.75200,557.08760,11.55200,6.80000,0.00000,6.80000,0.26887
1,677.00000,16.13000,1.10000,2022,4,13,11,0,0,1,...,0.00000,0.00000,0.34000,10920.01000,744.70000,17.74300,5.87000,0.00000,12.67000,0.81524
2,651.00000,17.11000,1.45000,2022,4,13,12,0,0,1,...,0.00000,0.00000,0.35000,11138.61000,943.95000,24.80950,4.89000,0.00000,17.56000,1.86728
3,475.00000,17.69000,1.38000,2022,4,13,13,0,0,1,...,0.00000,0.00000,-0.07000,8402.75000,655.50000,24.41220,4.31000,0.00000,21.87000,1.60969
4,584.00000,17.95000,1.24000,2022,4,13,14,0,0,1,...,0.00000,0.00000,-0.14000,10482.80000,724.16000,22.25800,4.05000,0.00000,25.92000,1.16781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19095,0.00000,11.24000,1.20000,2024,6,17,1,1,0,0,...,1.08000,0.88000,-0.02000,0.00000,0.00000,13.48800,10.76000,5737.47000,202614.07000,1.05840
19096,0.00000,10.98000,1.21000,2024,6,17,2,1,0,0,...,1.02000,1.08000,0.01000,0.00000,0.00000,13.28580,11.02000,5737.47000,202625.09000,1.08508
19097,0.00000,11.09000,1.18000,2024,6,17,3,1,0,0,...,0.88000,1.02000,-0.03000,0.00000,0.00000,13.08620,10.91000,5737.47000,202636.00000,1.00636
19098,41.32000,12.11000,0.97000,2024,6,17,4,1,0,0,...,1.17000,0.88000,-0.21000,500.38520,40.08040,11.74670,9.89000,5737.47000,202645.89000,0.55901


In [5]:
y

Unnamed: 0,power_consumption
0,0.76000
1,1.10000
2,1.45000
3,1.38000
4,1.24000
...,...
19095,1.20000
19096,1.21000
19097,1.18000
19098,0.97000


In [None]:
from pmdarima import auto_arima
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Example Data: Simulated power consumption and temperature
np.random.seed(42)
date_range = pd.date_range(start="2023-01-01", periods=1000, freq="H")
power_consumption = 500 + 10 * np.sin(np.linspace(0, 10 * np.pi, 1000)) + np.random.normal(scale=5, size=1000)
temperature = 20 + 5 * np.cos(np.linspace(0, 10 * np.pi, 1000)) + np.random.normal(scale=2, size=1000)

df = pd.DataFrame({"timestamp": date_range, "power_consumption": power_consumption, "temperature": temperature})
df.set_index("timestamp", inplace=True)

# Step 1: Train-Test Split
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]

# Step 2: Auto-ARIMA for Parameter Selection
def determine_best_parameters(train, exog_train, seasonal_period=24):
    """
    Automatically determine the best SARIMAX parameters using auto_arima.
    """
    model = auto_arima(
        train,  # Target variable
        exogenous=exog_train,  # Exogenous variables
        seasonal=True,  # Enable seasonal ARIMA
        m=seasonal_period,  # Number of time steps in a seasonal period
        stepwise=True,  # Use stepwise search to reduce computation
        suppress_warnings=True,  # Suppress warnings
        trace=True  # Print progress
    )
    print(f"Best order: {model.order}")
    print(f"Best seasonal order: {model.seasonal_order}")
    return model.order, model.seasonal_order

# Determine best parameters
order, seasonal_order = determine_best_parameters(
    train["power_consumption"],
    exog_train=train["temperature"]
)

order = (1,1,1)
seasonal_order = (1,1,1,24)

# Step 3: Fit SARIMAX Model
def train_sarimax(train, exog_train, order, seasonal_order):
    """
    Train a SARIMAX model with specified parameters.
    """
    model = SARIMAX(
        train,
        exog=exog_train,
        order=order,
        seasonal_order=seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    fitted_model = model.fit(disp=False)
    return fitted_model

# Train the SARIMAX model with the selected parameters
fitted_model = train_sarimax(
    train["power_consumption"],
    exog_train=train["temperature"],
    order=order,
    seasonal_order=seasonal_order
)

# Step 4: Rolling Forecast
def rolling_forecast(fitted_model, test, exog_test, forecast_horizon=24):
    """
    Generate rolling day-ahead forecasts with overlapping windows.
    Aggregates overlapping forecasts using the mean.
    """
    # Store overlapping predictions
    predictions = []
    indices = []  # To track prediction indices

    for i in range(0, len(test) - forecast_horizon + 1):  # Overlapping windows
        forecast = fitted_model.forecast(
            steps=forecast_horizon,
            exog=exog_test.iloc[i:i + forecast_horizon]
        )
        predictions.append(forecast)
        indices.append(test.index[i:i + forecast_horizon])  # Track indices of each forecast

    # Convert predictions and indices into a DataFrame
    predictions_df = pd.DataFrame(predictions).T  # Transpose for time steps as rows
    predictions_df.index = indices[0]  # Use the first window's indices for alignment

    # Aggregate predictions by taking the mean across overlapping forecasts
    aggregated_predictions = predictions_df.mean(axis=1)

    return aggregated_predictions

# Rolling forecast
forecast_horizon = 24  # Next 24 hours
predictions = rolling_forecast(
    fitted_model,
    test["power_consumption"],
    exog_test=test["temperature"],
    forecast_horizon=forecast_horizon
)

# Evaluate the Model
actual = test["power_consumption"][:len(predictions)]
mse = mean_squared_error(actual, predictions)
print(f"Mean Squared Error: {mse}")

# Step 5: Plot Results
plt.figure(figsize=(12, 6))
plt.plot(test.index[:len(predictions)], actual, label="Actual")
plt.plot(test.index[:len(predictions)], predictions, label="Predicted", linestyle="--")
plt.legend()
plt.title("Day-Ahead Power Consumption Forecast")
plt.xlabel("Time")
plt.ylabel("Power Consumption")
plt.grid()
plt.show()


Performing stepwise search to minimize aic
 ARIMA(2,1,2)(1,0,1)[24] intercept   : AIC=4945.708, Time=2.67 sec
 ARIMA(0,1,0)(0,0,0)[24] intercept   : AIC=5388.222, Time=0.03 sec
 ARIMA(1,1,0)(1,0,0)[24] intercept   : AIC=5152.417, Time=0.51 sec
 ARIMA(0,1,1)(0,0,1)[24] intercept   : AIC=4944.001, Time=0.58 sec
 ARIMA(0,1,0)(0,0,0)[24]             : AIC=5386.222, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[24] intercept   : AIC=4942.623, Time=0.06 sec
 ARIMA(0,1,1)(1,0,0)[24] intercept   : AIC=4943.976, Time=0.49 sec
 ARIMA(0,1,1)(1,0,1)[24] intercept   : AIC=4945.905, Time=1.45 sec
 ARIMA(1,1,1)(0,0,0)[24] intercept   : AIC=4942.380, Time=0.10 sec
