Import Libraries

In [9]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.holtwinters import ExponentialSmoothing

Clean & Filter the DataFrame

Set Date as Index

Plot Timeseries

In [2]:
def plot_timeseries_data(grain, df):

    fig = go.Figure()

    fig.add_trace(go.Scatter(x = df.index, y = df['value'], mode = 'lines'))

    fig.update_layout(title = f'{grain} {df['value'].name} Time Series',
                    xaxis_title = f'Date ({grain})',
                    yaxis_title = f'{df['value'].name}',
                    template = 'plotly_dark')

    return fig.show()

ACF Plot

In [4]:
def acf_plot(df, lag):
    plt.figure(figsize=(8, 5))
    plot_acf(df['total'], lags=lag)
    plt.title(f"ACF of {df['value'].name}")
    return plt.show()

Decomposition

In [None]:
def decomposition(df):
    # ---  Apply Additive and Multiplicative Decomposition ---
    additive_decomposition = seasonal_decompose(df["value"], model="additive", period=12)
    multiplicative_decomposition = seasonal_decompose(df["value"], model="multiplicative", period=12)

    # Plot both decompositions side by side
    fig, axes = plt.subplots(4, 2, figsize=(14, 10), sharex=True)

    # Additive Decomposition Plots
    axes[0, 0].plot(df.index, df["value"], color="black")
    axes[0, 0].set_title("Original Time Series")

    axes[1, 0].plot(df.index, additive_decomposition.trend, color="blue")
    axes[1, 0].set_title("Additive Trend Component")

    axes[2, 0].plot(df.index, additive_decomposition.seasonal, color="green")
    axes[2, 0].set_title("Additive Seasonal Component")

    axes[3, 0].plot(df.index, additive_decomposition.resid, color="red")
    axes[3, 0].set_title("Additive Residual Component")

    # Multiplicative Decomposition Plots
    axes[0, 1].plot(df.index, df["value"], color="black")
    axes[0, 1].set_title("Original Time Series")

    axes[1, 1].plot(df.index, multiplicative_decomposition.trend, color="blue")
    axes[1, 1].set_title("Multiplicative Trend Component")

    axes[2, 1].plot(df.index, multiplicative_decomposition.seasonal, color="green")
    axes[2, 1].set_title("Multiplicative Seasonal Component")

    axes[3, 1].plot(df.index, multiplicative_decomposition.resid, color="red")
    axes[3, 1].set_title("Multiplicative Residual Component")

    plt.tight_layout()
    return plt.show()

ADF Test

In [None]:
def adf_test(df, add_mul):
    if add_mul == 'add':
        additive_decomposition = seasonal_decompose(df["value"], model="additive", period=12)
        # --- Perform ADF Test on Residuals (Assuming Additive is Best) ---
        residuals = additive_decomposition.resid.dropna()
        adf_result = adfuller(residuals)
    elif add_mul == 'mul':
        multiplicative_decomposition = seasonal_decompose(df["value"], model="multiplicative", period=12)
        # --- Perform ADF Test on Residuals (Assuming Multiplicative is Best) ---
        residuals = multiplicative_decomposition.resid.dropna()
        adf_result = adfuller(residuals)

    print(f"ADF Statistic: {adf_result[0]:.4f}")
    print(f"p-value: {adf_result[1]:.4f}")
    return residuals

ACF & PACF Plot Residuals

In [8]:
def acf_pacf_residuals(residuals, lag):
        # ---  ACF and PACF Plots of Residuals ---
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))

        # ACF plot
        plot_acf(residuals, lags=lag, ax=axes[0])
        axes[0].set_title("ACF of Residuals")

        # PACF plot
        plot_pacf(residuals, lags=lag, ax=axes[1])
        axes[1].set_title("PACF of Residuals")

        plt.tight_layout()
        plt.show()

Lijung Box Test

In [None]:
def ljung_box(residuals):
    # --- Perform Ljung-Box Test ---
    ljungbox_result = acorr_ljungbox(residuals, lags=[20], return_df=True)
    p_value_ljungbox = ljungbox_result["lb_pvalue"].values[0]

    print(f"Ljung-Box p-value: {p_value_ljungbox:.4f}")
    print("Tests if data is whitenoise (if it is then no correllation) low p = not whitenoise")

Forecast

In [10]:
def holt_winters(df, test, add_mul, season, steps_to_forecast, steps_to_plot):
    # Ensure the index is datetime
    df.index = pd.to_datetime(df.index)

    # Train-test split: Exclude the last 12 weeks for training
    train_data = df.iloc[:-test] # all data except last 12 weeks
    test_data = df.iloc[-test:]   # only last 12 weeks

    # Fit Holt-Winters Exponential Smoothing model
    hw_model = ExponentialSmoothing(
        train_data["value"],  # Use the "count" column
        trend=add_mul,          # Additive trend
        seasonal=add_mul,       # Additive seasonality
        seasonal_periods=season   # 52 weeks in a year (for weekly data)
    ).fit()

    # Forecast the next 12 weeks
    forecast_values = hw_model.forecast(steps=steps_to_forecast)

    # Estimate confidence intervals using residual standard deviation
    residuals = train_data["value"] - hw_model.fittedvalues
    std_dev = np.std(residuals)
    confidence_interval = 1.96 * std_dev  # 95% Confidence Interval

    # Convert forecasted values to a Pandas Series with correct weekly index
    forecast_series = pd.Series(forecast_values.values, index=test_data.index)

    # Select the last 2 years of actual data for plotting
    df_last2years = df.iloc[-steps_to_plot:]  # Show last 104 weeks (2 years)

    # Create the figure
    fig = go.Figure()

    # **1. Actual Data - Blue Line**
    fig.add_trace(go.Scatter(
        x=df_last2years.index,
        y=df_last2years["value"],
        mode='lines',
        name=f'Actual {df['value'].name}',
        line=dict(color='blue')
    ))

    # **2. Forecasted Data - Red Dashed Line**
    fig.add_trace(go.Scatter(
        x=forecast_series.index,
        y=forecast_series,
        mode='lines',
        name='Holt-Winters Forecast',
        line=dict(color='red', dash='dash')
    ))

    # **3. Confidence Interval - Shaded Region**
    fig.add_trace(go.Scatter(
        x=forecast_series.index.tolist() + forecast_series.index[::-1].tolist(),
        y=(forecast_series + confidence_interval).tolist() + (forecast_series - confidence_interval)[::-1].tolist(),
        fill='toself',
        fillcolor='rgba(255, 0, 0, 0.2)',  # Light red shading
        line=dict(color='rgba(255,255,255,0)'),
        name='95% Confidence Interval'
    ))

    # **Customize Layout**
    fig.update_layout(
        title="Holt-Winters Forecast for Weekly Vehicle Thefts (Last 2 Years Shown)",
        xaxis_title="Date",
        yaxis_title=f"{df['value'].name}",
        template="plotly_dark",
        xaxis=dict(
            tickformat="%b %Y",  # Show month & year on X-axis
            tickangle=45,        # Rotate labels
            tickmode="auto"
        ),
        yaxis=dict(showgrid=True),
        hovermode="x unified"  # Show all values on hover
    )

    # Show the plot
    fig.show()