In [28]:
#!pip install -U aeon
#!pip install aeon[all_extras]
import warnings
import pandas as pd
from matplotlib import pyplot as plt
from aeon.visualisation import plot_series
from sklearn.metrics import mean_absolute_percentage_error as mape
from all_functions import *
import os
from aeon.clustering.averaging import elastic_barycenter_average
from sklearn.neighbors import KNeighborsRegressor
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.seasonal import seasonal_decompose
from VotingCombination import VotingCombination
from sklearn.svm import SVR
import os
from tslearn.barycenters import \
    euclidean_barycenter, \
    dtw_barycenter_averaging, \
    dtw_barycenter_averaging_subgradient, \
    softdtw_barycenter
warnings.filterwarnings("ignore")
%matplotlib inline

def get_predictions_csv(path, format, start_index):
    df = pd.read_csv(path, sep=";")
    results = {}
    params = {}
    # for format in formats:
    filtered_df = df[df['DATA'] == format]
    
    columns_p1_to_p12 = filtered_df.loc[:, 'P1':'P12']
    
    # values_list = columns_p1_to_p12.values.tolist()
    values_list = columns_p1_to_p12.values.flatten().tolist()     
    results = pd.Series(values_list, index=start_index)
    params = filtered_df['PARAMS'].iloc[0]
            
    return results, params

def get_params_model(caminho_arquivo, transformation):
    df = pd.read_csv(caminho_arquivo, sep=';')
    
    df_filtrado = df[df['DATA'] == transformation]
    params_dict = ast.literal_eval(df_filtrado['PARAMS'].iloc[0])
    
    return params_dict

def convert_to_list(series_str):
    return eval(series_str)

def get_preds_hybrid(path, test_date, start_index):
    df = pd.read_csv(path, sep=";")
    results = {}
    filtered_df = df[df['test_range'] == test_date]
    columns_p1_to_p12 = filtered_df.loc[:, 'P1':'P12']
    values_list = columns_p1_to_p12.values.flatten().tolist()     
    results = pd.Series(values_list, index=start_index)
    return results

def save_csv(nome, csv_file, uf, derivado, models, series, test, preds, horizon=12, window=12):
    y_baseline = series[-horizon*2:-horizon].values
    rmse_result = rmse(test, preds)
    mape_result = mape(test, preds)
    pocid_result = pocid(test, preds)
    pbe_result = pbe(test, preds)
    mcpm_result = mcpm(rmse_result, mape_result, pocid_result)
    mase_result = mase(test, preds, y_baseline)

    df_result = pd.DataFrame({'DATA': nome, 'UF': uf, 'PRODUCT': derivado, 'MODEL':  f"{'_'.join(models)}", 'PARAMS': str({}), 'WINDOW': window, 'HORIZON': horizon,  
                                            'RMSE': rmse_result, 'MAPE': mape_result, 'POCID': pocid_result, 'PBE': pbe_result, 'MASE': mase_result,
                                            'P1': preds[0], 'P2': preds[1], 'P3': preds[2], 'P4': preds[3], 'P5': preds[4],
                                            'P6': preds[5], 'P7': preds[6], 'P8': preds[7], 'P9': preds[8], 'P10': preds[9],
                                            'P11': preds[10], 'P12': preds[11]
                                            }, index=[0])
    df_result.to_csv(csv_file, sep=';', mode='a', header=False, index=False)

In [29]:
derivados = ["gasolinac"]
transformations = ["normal", "deseasonal", "log"]
models = ["knn", "rf", "deepar", "svr", "catboost", "arima"]
noResid = False
test_date = '2023-03_2024-02'
results_file = './combination'
horizon = 12
window = 12

dirs = [
    '../datasets/venda/mensal/uf/gasolinac/',
    # '../datasets/venda/mensal/uf/etanolhidratado/',
    # '../datasets/venda/mensal/uf/glp/',
    # '../datasets/venda/mensal/uf/oleodiesel/',
    # '../datasets/venda/mensal/uf/querosenedeaviacao/',
]
colunas = ['DATA', 'UF', 'PRODUCT', 'MODEL', 'PARAMS', 'WINDOW', 'HORIZON', 'RMSE', 'MAPE', 'POCID', 'PBE', 'MASE',
           'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12',
           ]
for directory in dirs:
    for file in os.listdir(directory) :
        if file.endswith('.csv'):
            uf = file.split("_")[1].upper()
            derivado = file.split("_")[2].split(".")[0]
            full_path = os.path.join(directory, file)
            series = read_series(full_path)
            _, test = train_test_stats(series, horizon)

            predictions = {}
            preds_elastic = []
            preds_barycenter = []
            
            for model in models:
                for transform in transformations:
                    series_preds = get_preds_hybrid(f'./results_hybrid/{model}/{derivado}/{transform}/transform_{uf}.csv', test_date, test.index)
                    predictions[f'{model}_{transform}'] = series_preds
                    preds_elastic.append(np.array([[series_preds.values]]))
                    preds_barycenter.append(series_preds.values)

                    if noResid:
                        series_noresid = get_preds_hybrid(f'./results_hybrid/{model}_noresid/{derivado}/{transform}/transform_{uf}.csv', test_date, test.index)
                        predictions[f'{model}_noresid_{transform}'] = series_noresid
                        preds_elastic.append(np.array([[series_noresid.values]]))
                        preds_barycenter.append(series_noresid.values)


            voting = VotingCombination(predictions, combination='median')
            preds_median = voting.predict()

            path_derivado = results_file + f'/{derivado}'
            csv_file = path_derivado + f"/transform_{uf.upper()}.csv"
            os.makedirs(path_derivado, exist_ok=True)
            if not os.path.exists(csv_file):
                pd.DataFrame(columns=colunas).to_csv(csv_file, sep=';', index=False)
                
            save_csv("median",csv_file, uf, derivado, models, series, test, preds_median, horizon, window)

            voting = VotingCombination(predictions, combination='mean')
            preds_mean = voting.predict()
            save_csv("mean",csv_file, uf, derivado, models, series, test, preds_mean, horizon, window)

            preds_softdtw = softdtw_barycenter(preds_barycenter, max_iter=35, gamma=0.01)
            preds_softdtw = pd.Series(preds_softdtw.flatten().tolist(), index=test.index)
            save_csv("softdtw_barycenter",csv_file, uf, derivado, models, series, test, preds_softdtw, horizon, window)

            preds_dtw_subgradient = dtw_barycenter_averaging_subgradient(preds_barycenter, max_iter=35)
            preds_dtw_subgradient = pd.Series(preds_dtw_subgradient.flatten().tolist(), index=test.index)
            save_csv("DBA_subgradient", csv_file, uf, derivado, models, series, test, preds_dtw_subgradient, horizon, window)

            preds_dtw_avg = dtw_barycenter_averaging(preds_barycenter, max_iter=35)
            preds_dtw_avg = pd.Series(preds_dtw_avg.flatten().tolist(), index=test.index)
            save_csv("DBA", csv_file, uf, derivado, models, series, test, preds_dtw_avg, horizon, window)

            capt = np.vstack(preds_elastic)
            elastic = elastic_barycenter_average(capt, distance="twe", reach=15)
            preds_elastic = pd.Series(elastic[0].tolist(), index=test.index)
            save_csv("EBA_twe", csv_file, uf, derivado, models, series, test, preds_elastic, horizon, window)
