In [1]:
import pandas as pd
import numpy as np
from numpy import linalg as LA
from matplotlib import pyplot as plt
from src import dmd

# Data

In [2]:
sales_train_evaluation = pd.read_csv('data/sales_train_evaluation.csv')
sample_submission = pd.read_csv('data/sample_submission.csv')

# Functions

In [3]:
def submission_format(predicted_sales_validation, predicted_sales_evaluation):
    submission = sample_submission.copy()
    num_forecasts = 28
    submission[submission.columns[1:num_forecasts+1]] = np.concatenate((predicted_sales_validation, predicted_sales_evaluation), axis=0)
    return submission

# Forecasting

In [4]:
num_days_evaluation = 1941
num_forecasts = 28
rank = 10

sales_validation = sales_train_evaluation.iloc[:,-num_days_evaluation:-num_forecasts].to_numpy()

In [None]:
predicted_sales_cur_dmd = dmd.forecast(sales_validation, rank, num_forecasts, 'cur')
# set negative sales to zero
predicted_sales_cur_dmd[predicted_sales_cur_dmd<0]=0

predicted_sales_svd_dmd = dmd.forecast(sales_validation, rank, num_forecasts, 'svd')
# set negative sales to zero
predicted_sales_svd_dmd[predicted_sales_svd_dmd<0]=0

# Error Analysis

In [None]:
true_sales = sales_train_evaluation.iloc[:,-num_forecasts:].to_numpy()

In [None]:
relative_error_cur_dmd = LA.norm(true_sales - predicted_sales_cur_dmd)/LA.norm(true_sales)
print(f"The relative error for the CUR DMD is {relative_error_cur_dmd}.")

In [None]:
relative_error_svd_dmd = LA.norm(true_sales - predicted_sales_svd_dmd)/LA.norm(true_sales)
print(f"The relative error for the SVD DMD is {relative_error_svd_dmd}.")

In [None]:
true_sales.shape

# Plots

In [None]:
forecast_days = np.arange(1,num_forecasts+1)
plt.plot(forecast_days, predicted_sales_svd_dmd.sum(axis=0), label = "predicted by SVD DMD")
plt.plot(forecast_days, predicted_sales_cur_dmd.sum(axis=0), label = "predicted by CUR DMD")
plt.plot(forecast_days, true_sales.sum(axis=0), label = "actual")
plt.legend()
plt.show()