In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error
import os

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')

In [16]:
# Load the dataset
data_path = r'C:\Users\kowsh\PycharmProjects\NLP\data\ML471_S4_Datafile_Concept.csv'
df = pd.read_csv(data_path)

# Data Preprocessing
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)
df = df.dropna()  # Remove missing values

# Split the data into training (80%) and testing (20%)
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]

print(f"Total size: {len(df)}")
print(f"Train size: {len(train)}")
print(f"Test size: {len(test)}")
df.head()

Unnamed: 0_level_0,Consumption,Festivals/Special_events,Power_Consumption_diff
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1988-02-01,105.672,1,-1.8332
1988-03-01,97.4502,1,-8.2218
1988-04-01,92.4714,1,-4.9788
1988-05-01,90.3151,1,-2.1563
1988-06-01,93.0904,1,2.7753


In [17]:
from statsmodels.ts_api import SARIMAX

# Fit SARIMA model using specified orders
# SARIMA((1, 0, 2) × (0, 1, 1, 12))
model = SARIMAX(train['Consumption'], 
                order=(1, 0, 2), 
                seasonal_order=(0, 1, 1, 12),
                enforce_stationarity=False,
                enforce_invertibility=False)
results = model.fit(disp=False)

print(results.summary())

ModuleNotFoundError: No module named 'statsmodels.ts_api'

In [None]:
# Generate forecasts for the test period
forecast, conf_int = model.predict(n_periods=len(test), return_conf_int=True)
forecast_series = pd.Series(forecast, index=test.index)

# Visualization
plt.figure(figsize=(12, 6))
plt.plot(train.index, train['Consumption'], label='Train', color='blue')
plt.plot(test.index, test['Consumption'], label='Actual', color='orange', linestyle='--')
plt.plot(test.index, forecast_series, label='Forecast', color='green', linestyle='--')

plt.title('SARIMA Forecast of Monthly Power Consumption')
plt.xlabel('Datetime')
plt.ylabel('Consumption')
plt.legend()
plt.show()

In [None]:
# Evaluation Metrics
mae = mean_absolute_error(test['Consumption'], forecast_series)
mape = np.mean(np.abs((test['Consumption'] - forecast_series) / test['Consumption'])) * 100
rmse = np.sqrt(mean_squared_error(test['Consumption'], forecast_series))

print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.4f}%")
print(f"RMSE: {rmse:.4f}")

In [None]:
# Residual Analysis
residuals = model.resid()

plt.figure(figsize=(12, 6))
plt.plot(residuals)
plt.title('Residuals of SARIMA Model')
plt.axhline(0, color='red', linestyle='--')
plt.show()