In [1]:
import pandas as pd
import math
import datetime
import statistics
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from pyFTS.benchmarks import Measures

In [2]:
def cal_nrmse(rmse, y):
    x = max(y)-min(y)
    return (rmse/x)

## Dataset Energy Appliance

In [3]:
data = pd.read_csv('/home/hugo/projetos-doutorado/imp_doutorado/datasets/energydata_complete.csv', sep=',')
y_appliance = data['Appliances'].values

## Persistence method - Naive forecasting

In [4]:
# Create a lag feature
var = pd.DataFrame(y_appliance)
dataframe = pd.concat([var.shift(1), var], axis=1)
dataframe.columns = ['t', 't+1']

In [6]:
# Create a baseline model (Naive model)
def model_persistence(x):
  return x

def test_persistence(test_X):
    predictions = list()
    for x in test_X:
        yhat = model_persistence(x)
        predictions.append(yhat)
    return predictions

## Sliding Window PyFTS:Persistence

In [9]:
result = {
     "window": [],
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "r2": [],
     "nrmse":[]
}

tam = len(dataframe)
n_windows = 30
windows_length = math.floor(tam / n_windows)
for ct, ttrain, ttest in Util.sliding_window(dataframe[1:tam], windows_length, 0.75, inc=1):
    if len(ttest) > 0:
        
        print('-' * 20)
        print(f'training window {(ct)}')
        
        train_X, train_y = ttrain.iloc[:,0], ttrain.iloc[:,1]
        test_X, test_y = ttest.iloc[:,0], ttest.iloc[:,1]
        
        forecast = test_persistence(test_X)
        
        #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics")
        rmse = Measures.rmse(test_y,forecast)
        mape = Measures.mape(test_y,forecast)
        smape = Measures.smape(test_y,forecast)
        mae = mean_absolute_error(test_y, forecast)
        r2 = r2_score(test_y, forecast)
        nrmse = cal_nrmse(rmse,test_y)
        
        result["rmse"].append(round(rmse,3))
        result["mape"].append(round(mape,3))
        result["smape"].append(round(smape,3))
        result["mae"].append(round(mae,3))
        result["r2"].append(round(r2,3))
        result["nrmse"].append(round(nrmse,3))
        result["window"].append(ct)
        
measures = pd.DataFrame(result)

--------------------
training window 0
--------------------
training window 657
--------------------
training window 1314
--------------------
training window 1971
--------------------
training window 2628
--------------------
training window 3285
--------------------
training window 3942
--------------------
training window 4599
--------------------
training window 5256
--------------------
training window 5913
--------------------
training window 6570
--------------------
training window 7227
--------------------
training window 7884
--------------------
training window 8541
--------------------
training window 9198
--------------------
training window 9855
--------------------
training window 10512
--------------------
training window 11169
--------------------
training window 11826
--------------------
training window 12483
--------------------
training window 13140
--------------------
training window 13797
--------------------
training window 14454
--------------------
training w

In [10]:
measures

Unnamed: 0,window,rmse,mape,smape,mae,r2,nrmse
0,0,83.579,31.577,12.911,40.0,0.494,0.174
1,657,94.045,30.324,13.828,42.866,0.534,0.116
2,1314,85.917,31.463,13.637,42.439,0.742,0.104
3,1971,11.868,21.665,10.097,8.354,0.658,0.119
4,2628,71.41,26.856,11.742,31.646,0.323,0.125
5,3285,52.359,25.37,11.61,23.171,0.401,0.128
6,3942,111.043,27.215,11.983,46.951,0.295,0.152
7,4599,61.605,24.612,11.656,25.732,0.559,0.092
8,5256,47.312,22.789,10.801,20.793,0.41,0.097
9,5913,95.678,27.999,12.309,39.207,0.434,0.126


In [11]:
measures.to_csv (r'win_persistence_aec.csv', index = False, header=True)

### Mean Statistics Persistence:

In [13]:
final_result = {
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "nrmse":[],
     "rmse_std": [],
     "mae_std": [],
     "mape_std": [],
     "smape_std": [],
     "nrmse_std": []
}

final_result["rmse"].append(round(statistics.mean(measures['rmse']),3))
final_result["mape"].append(round(statistics.mean(measures['mape']),3))
final_result["smape"].append(round(statistics.mean(measures['smape']),3))
final_result["mae"].append(round(statistics.mean(measures['mae']),3))
final_result["nrmse"].append(round(statistics.mean(measures['nrmse']),3))

final_result["rmse_std"].append(round(statistics.stdev(measures['rmse']),3))
final_result["mape_std"].append(round(statistics.stdev(measures['mape']),3))
final_result["smape_std"].append(round(statistics.stdev(measures['smape']),3))
final_result["mae_std"].append(round(statistics.stdev(measures['mae']),3))
final_result["nrmse_std"].append(round(statistics.stdev(measures['nrmse']),3))

        
final_measures_persistence = pd.DataFrame(final_result)

print("Mean Statistics Persistence: ")
final_measures_persistence

Mean Statistics Persistence: 


Unnamed: 0,rmse,mape,smape,mae,nrmse,rmse_std,mae_std,mape_std,smape_std,nrmse_std
0,64.749,24.828,11.161,29.108,0.125,28.836,12.824,4.742,1.76,0.028
