In [86]:
from EAExperiment import EAExperiment
import pandas as pd
import functools
from skfeature.function.statistical_based import CFS
import numpy as np
from metrics import evaluate
import math
import pprint
from sklearn.tree import DecisionTreeRegressor

In [62]:
def create_propositional_table_dataframe(df,w, target, h):
    columns =[]
    for i in range(w,0,-1):
        columns.extend([s + "_lag"+str(i-1) for s in df.columns])
    for t in target:
        for j in range(h):
            columns.append(t+"_ahead"+str(j+1))
    dataframe = pd.DataFrame(columns=columns)
    return dataframe
    
def create_propositional_table(df, w, h, target):
    columns =[]
    for i in range(w,0,-1):
        columns.extend([s + "_lag"+str(i-1) for s in df.columns])
    for t in target:
        for j in range(h):
            columns.append(t+"_ahead"+str(j+1))
    dataframe = pd.DataFrame(columns=columns)
    
    indexes = []
    for i in range((len(df)-w-h+1)):
        window = df.iloc[i:(i+w)]
        row = window.values.reshape(1, len(window.columns)*len(window))
        targets = {}
        for t in target:
            row = np.append(row, df[t].iloc[(i+w):(i+w+h)])
            
        dataframe.loc[i]=row.reshape(1, len(row))[0]
        indexes.append(window.index[-1])
    
    dataframe = dataframe.set_index(pd.Series(indexes))
    
    return dataframe

In [157]:
regrt_model = DecisionTreeRegressor(max_depth=5)

models = {
    "decision_tree_regressor": regrt_model
}

In [181]:
def calculate_metrics(y_test, y_pred, target, model_name, metrics = ('mae', 'mape', 'rmse','mse')):
    horizons = y_test.columns.values
    index_horizons = np.append(horizons,target+"_mean")
    index = [np.array([model_name for i in range(7)]), index_horizons]
    metrics = pd.DataFrame(evaluate(y_test, y_pred, metrics=metrics))
    metrics.loc[6] = metrics.values.mean(axis=0)
    metrics = metrics.set_index(index)
    
    return metrics

In [182]:
def execute_baseline(df_propositional_table, models, target, train_split=0.75):
    tran_split = int(df_propositional_table.shape[0]*0.75)

    X_train = df_propositional_table.iloc[:tran_split, :-6]
    y_train = df_propositional_table.iloc[:tran_split, -6:]

    X_test = df_propositional_table.iloc[tran_split:, :-6]
    y_test = df_propositional_table.iloc[tran_split:, -6:]
    
    test_metrics_global = None
    train_metrics_global = None
    
    results_global = None
    for name, model in models.items():
    
        model.fit(X_train,y_train)

        test_pred = regrt_model.predict(X_test)
        train_pred = regrt_model.predict(X_train)
        
        test_metrics = calculate_metrics(y_test, test_pred, target, name)
        train_metrics = calculate_metrics(y_train, train_pred, target, name)
        
        if test_metrics_global is None and train_metrics_global is None:
            test_metrics_global = test_metrics
            train_metrics_global = train_metrics
        else:
            test_metrics_global.append(test_metrics)
            train_metrics_global.append(train_metrics)
        
        results_model = pd.concat({"Real": y_real, "Pred": y_test}, axis=1, names=["Type", "Horizon"])
        results = pd.concat({"RegressionTree": results_model}, axis=1, names=["Model", "Type", "Horizon"])
        
        if results_global is None:
            results_global = results
        else:
            results_global = pd.concat([results_global, results], axis=1, join='inner')
    
    return train_metrics_global, test_metrics_global, results_global
            

In [183]:
def execute(df, window, horizon, targets, models, train_split=0.75):
    for target in targets:
        df_propositional = create_propositional_table(df, 12, 6, [target])
        train_metrics, test_metrics, results = execute_baseline(df_propositional, models, target, train_split)
        
        train_metrics.to_csv("metrics/train_metrics_"+target)
        test_metrics.to_csv("metrics/test_metrics_"+target)
        train_metrics.to_csv("results/results_"+target)

In [184]:
df = pd.read_csv('data.csv')
df = df.set_index("PERIODO")

In [185]:
execute(df, 12, 6, ["PESO_7", "PESO_8", "PESO_20"], models)