In [None]:
# !pip uninstall plotly pandas protobuf==3.20.0 tensorflow==2.6.2 scikit-learn numpy
# !pip install plotly pandas scikit-learn numpy statsmodels

In [None]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error
import statsmodels.api as sm
import plotly.express as px
import pandas as pd
import numpy as np
from data_util_common import canopy_dataset

In [None]:
if __name__ == '__main__':
    T = 24
    data, date, time, label = canopy_dataset()
    print(data.columns)
    data = data[[label, 'Irradiance_1_Wm2']]
    date_time = data.index
    timestamp = data.index
    data.reset_index(inplace=True, drop=True)

    T = 24 * 7
    past = T
    future = T
    step = 1

    X = [data.iloc[i:i+T, :] for i in range(0, len(data.index)-T, T)]
    timestamp = [timestamp[i:i+T] for i in range(0, len(data.index)-T, T)]

    mae = []

    tscv = TimeSeriesSplit(gap=0, n_splits=12)
    for i, (train_index, test_index) in enumerate(tscv.split(X)):
        print(f"Fold {i}:")

        train_data = np.asarray([X[t] for t in train_index])
        train_data = train_data.reshape((T * len(train_index), 2))

        train_timestamp = np.asarray([timestamp[t] for t in train_index])
        train_timestamp = train_timestamp.reshape((T * len(train_timestamp), 1))
        train_timestamp = pd.to_datetime(train_timestamp, format='%Y-%m-%d %H:%M:%S')

        test_data = np.asarray([X[t] for t in test_index])
        test_data = test_data.reshape((T * len(test_index), 2))

        print(f"TRAIN - LEN {len(train_data)}")

        print(f"TRAIN SIZE {len(train_data)} TEST SIZE {len(test_data)}")
        train = train_data[:, 0]
        exogenous_train = train_data[:, 1]

        test = test_data[:, 0]
        exogenous_test = test_data[:, 1]

        order = (2, 0, 2)  # (p, d, q)
        seasonal_order = (1, 0, 1, T)  # (P, D, Q, s)
        sarima = sm.tsa.SARIMAX(endog=train,
                                exog=exogenous_train,
                                order=order,
                                seasonal_order=seasonal_order,
                                initialization='approximate_diffuse',
                                method='cg'
                                ).fit(low_memory=True)

        predictions = sarima.predict(exog=exogenous_test,
                                     start=len(train), end=len(train)+len(test)-1)
        print(sarima.summary())

        mae.append(mean_absolute_error(test, predictions))

    print("MAE: {}".format(np.mean(mae)))
    with open('sarima/sarima_cavl_mae.npy', 'wb') as f:
        np.save(f, mae)

    fig = px.box(np.asarray(mae))
    fig.show()