In [49]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import warnings 
import numpy as np
import pandas as pd
import itertools
from rich.progress import Progress
from rich.console import Console
from itertools import product

# Suppress warnings
warnings.filterwarnings("ignore")

console = Console()

In [50]:
# Hyperparameter Grid Search for SARIMAX
def sarimax_grid_search(train, param_grid):
    best_score, best_params = float("inf"), None
    for params in param_grid:
        order, seasonal_order = params[:3], (params[3], params[4], params[5], params[6])

        model = SARIMAX(train, order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
        results = model.fit(disp=False)
        score = results.aic  # Using AIC as evaluation metric
        if score < best_score:
            best_score, best_params = score, params
    return best_params

# Walk-Forward Validation
def walk_forward_validation(series, param_grid):
    train_size = int(len(series) * 0.8)
    train, test = series[:train_size], series[train_size:]
    history = list(train)
    predictions = []

    best_params = sarimax_grid_search(train, param_grid)
    order, seasonal_order = best_params[:3], (best_params[3], best_params[4], best_params[5], best_params[6])
    
    for t in range(len(test)):
        model = SARIMAX(history, order=order, seasonal_order=seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
        results = model.fit(disp=False)
        yhat = results.forecast(steps=1)[0]
        predictions.append(yhat)
        history.append(test.iloc[t])

    mae = mean_absolute_error(test, predictions)
    mse = mean_squared_error(test, predictions)
    r2 = r2_score(test, predictions)
    
    print(f"Walk-Forward Validation MAE: {mae:.3f}")
    print(f"Walk-Forward Validation MSE: {mse:.3f}")
    print(f"Walk-Forward Validation R2 Score: {r2:.3f}")
    
    return mae, mse, r2

# Naive Forecast Benchmark
def naive_forecast(series):
    return series.shift(1)  # Predicts the last value as the next value

def evaluate_naive(series):
    train_size = int(len(series) * 0.8)
    train, test = series[:train_size], series[train_size:]
    naive_preds = naive_forecast(test)
    naive_mae = mean_absolute_error(test[1:], naive_preds[1:])  # Skip NaN value
    naive_mse = mean_squared_error(test[1:], naive_preds[1:])
    naive_r2 = r2_score(test[1:], naive_preds[1:])
    
    print(f"Naive Forecast MAE: {naive_mae:.3f}")
    print(f"Naive Forecast MSE: {naive_mse:.3f}")
    print(f"Naive Forecast R2 Score: {naive_r2:.3f}")
    
    return naive_mae, naive_mse, naive_r2





In [51]:
# Re-load the dataset (user needs to re-upload the file)
df = pd.read_csv(f"../data/Top_12_German_Companies_Financial_Data.csv") # Load the dataset

# Filter dataset for SAP SE only
sap_df = df[df["Company"] == "SAP SE"].copy()

# Convert Period column to datetime format
sap_df["Period"] = pd.to_datetime(sap_df["Period"], format="%m/%d/%Y")

# Sort data by Period for proper time-series analysis
sap_df = sap_df.sort_values(by="Period")

# Convert revenue to numeric
sap_df["Revenue"] = pd.to_numeric(sap_df["Revenue"], errors="coerce")
sap_df= sap_df[["Revenue", 'Period']]


In [52]:
# Convert 'Year' column to datetime if it's not already
sap_df['ds'] = pd.to_datetime(sap_df['Period'], format='%Y')
#sap_df['unique_id'] = "all"
sap_df = sap_df.rename(columns={"Revenue": "y"}) 
# sap_df = sap_df[["unique_id", "ds", "y"]]
sap_df = sap_df[[ "ds", "y"]] 
sap_df = sap_df.set_index("ds")
sap_df

Unnamed: 0_level_0,y
ds,Unnamed: 1_level_1
2017-03-31,6568715630
2017-06-30,6644029236
2017-09-30,18227487487
2017-12-31,17560385805
2018-03-31,19774628627
2018-06-30,5282345417
2018-09-30,8133295785
2018-12-31,10184334320
2019-03-31,18300753695
2019-06-30,13878289168


In [53]:
# Define hyperparameter grid
p_values = [0, 1, 2]
d_values = [0, 1]
q_values = [0, 1, 2]
P_values = [0, 1]
D_values = [0, 1]
Q_values = [0, 1]
sp_values = [4]
param_grid = list(product(p_values, d_values, q_values, P_values, D_values, Q_values, sp_values))

walk_forward_validation(sap_df, param_grid)
# time_series_cv(sap_df, param_grid)
evaluate_naive(sap_df)

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''