In [1]:
import pandas as pd
import math
import datetime
import statistics
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from pyFTS.benchmarks import Measures

## Dataset Energy Appliance

In [2]:
data = pd.read_csv('/home/hugo/projetos-doutorado/imp_doutorado/datasets/energydata_complete.csv', sep=',')
y_appliance = data['Appliances'].values

## Persistence method - Naive forecasting

In [3]:
# Create a lag feature
var = pd.DataFrame(y_appliance)
dataframe = pd.concat([var.shift(1), var], axis=1)
dataframe.columns = ['t', 't+1']

In [4]:
# Create a baseline model (Naive model)
def model_persistence(x):
  return x

def test_persistence(test_X):
    predictions = list()
    for x in test_X:
        yhat = model_persistence(x)
        predictions.append(yhat)
    return predictions

## Sliding Window PyFTS:Persistence

In [5]:
result = {
     "window": [],
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "r2": []
}

tam = len(dataframe)
n_windows = 30
windows_length = math.floor(tam / n_windows)
for ct, ttrain, ttest in Util.sliding_window(dataframe[1:tam], windows_length, 0.75, inc=1):
    if len(ttest) > 0:
        
        print('-' * 20)
        print(f'training window {(ct)}')
        
        train_X, train_y = ttrain.iloc[:,0], ttrain.iloc[:,1]
        test_X, test_y = ttest.iloc[:,0], ttest.iloc[:,1]
        
        forecast = test_persistence(test_X)
        
        #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics")
        rmse = Measures.rmse(test_y,forecast)
        mape = Measures.mape(test_y,forecast)
        smape = Measures.smape(test_y,forecast)
        mae = mean_absolute_error(test_y, forecast)
        r2 = r2_score(test_y, forecast)
        
        result["rmse"].append(rmse)
        result["mape"].append(mape)
        result["smape"].append(smape)
        result["mae"].append(mae)
        result["r2"].append(r2)
        result["window"].append(ct)
        
measures = pd.DataFrame(result)

--------------------
training window 0
--------------------
training window 657
--------------------
training window 1314
--------------------
training window 1971
--------------------
training window 2628
--------------------
training window 3285
--------------------
training window 3942
--------------------
training window 4599
--------------------
training window 5256
--------------------
training window 5913
--------------------
training window 6570
--------------------
training window 7227
--------------------
training window 7884
--------------------
training window 8541
--------------------
training window 9198
--------------------
training window 9855
--------------------
training window 10512
--------------------
training window 11169
--------------------
training window 11826
--------------------
training window 12483
--------------------
training window 13140
--------------------
training window 13797
--------------------
training window 14454
--------------------
training w

In [6]:
measures

Unnamed: 0,window,rmse,mape,smape,mae,r2
0,0,83.578501,31.577383,12.910968,40.0,0.494281
1,657,94.045267,30.324414,13.827792,42.865854,0.533654
2,1314,85.916863,31.462692,13.637417,42.439024,0.742491
3,1971,11.868178,21.664819,10.09731,8.353659,0.658185
4,2628,71.410015,26.855716,11.742493,31.646341,0.323434
5,3285,52.358986,25.370156,11.609699,23.170732,0.401356
6,3942,111.04273,27.215359,11.983,46.95122,0.295036
7,4599,61.604561,24.611911,11.655565,25.731707,0.558625
8,5256,47.311887,22.788527,10.801119,20.792683,0.409725
9,5913,95.67794,27.998723,12.309146,39.207317,0.434133


### Mean Statistics Persistence:

In [7]:
final_result = {
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "r2": []
}

final_result["rmse"].append(statistics.mean(measures['rmse']))
final_result["mape"].append(statistics.mean(measures['mape']))
final_result["smape"].append(statistics.mean(measures['smape']))
final_result["mae"].append(statistics.mean(measures['mae']))
final_result["r2"].append(statistics.mean(measures['r2']))
        
final_measures_persistence = pd.DataFrame(final_result)

print("Mean Statistics Persistence: ")
final_measures_persistence

Mean Statistics Persistence: 


Unnamed: 0,rmse,mape,smape,mae,r2
0,64.749228,24.828392,11.160746,29.107724,0.40926
