In [3]:
# ============================================
# ARMA(2,2) Forecast & Residual Visualization
# ============================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.diagnostic import acorr_ljungbox

# --------------------------------------------
# 1. Load Dataset
# --------------------------------------------
df = pd.read_csv("ML470_S3_Diabetes_Data_Preprocessed_Concept.csv")

df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

series = df['Power_Consumption_diff']

# --------------------------------------------
# 2. Train-Test Split (80-20)
# --------------------------------------------
train_size = int(len(series) * 0.8)
train = series[:train_size]
test = series[train_size:]

# --------------------------------------------
# 3. Fit ARMA(2,2)
# --------------------------------------------
model = ARIMA(train, order=(2, 0, 2))
model_fit = model.fit()

# --------------------------------------------
# 4. Forecast
# --------------------------------------------
forecast = model_fit.forecast(steps=len(test))

# --------------------------------------------
# 5. Plot 1: ARMA Forecast Plot
# --------------------------------------------
plt.figure(figsize=(12,6))

plt.plot(train.index, train, label='Train', color='blue')
plt.plot(test.index, test, label='Actual', linestyle='--', color='orange')
plt.plot(test.index, forecast, label='Forecast', linestyle='--', color='green')

plt.title("ARMA Forecast Plot")
plt.xlabel("Date")
plt.ylabel("Power Consumption (Differenced)")
plt.legend()
plt.show()

# --------------------------------------------
# 6. Plot 2: Residuals
# --------------------------------------------
residuals = model_fit.resid

plt.figure(figsize=(12,4))
plt.plot(residuals, color='blue')
plt.axhline(0, linestyle='--', color='black')

plt.title("Residuals")
plt.xlabel("Date")
plt.ylabel("Residuals")
plt.show()

# --------------------------------------------
# 7. Ljung-Box Test (Optional)
# --------------------------------------------
lb_test = acorr_ljungbox(residuals, lags=[10], return_df=True)
print(lb_test)


KeyError: 'Date'