In [5]:
import pandas as pd
import math
import datetime
import statistics
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from pyFTS.benchmarks import Measures

In [6]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame) 
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

In [7]:
def cal_nrmse(rmse, y):
    x = max(y)-min(y)
    return (rmse/x)

## Dataset Household Power Consumption 

In [25]:
filepath = '/home/hugo/projetos-doutorado/Appliance-Energy-Prediction/data/household_power_consumption.csv'
data = pd.read_csv(filepath, sep = ";")
data = data.drop(labels=['Time','Date'], axis=1)
#data = data.loc[0: : 30] # 30 minutes
data.dropna(inplace = True)
data = clean_dataset(data)
y_appliance = data['Global_active_power'].values

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


## Persistence method - Naive forecasting

In [26]:
# Create a lag feature
var = pd.DataFrame(y_appliance)
dataframe = pd.concat([var.shift(1), var], axis=1)
dataframe.columns = ['t', 't+1']

In [27]:
dataframe

Unnamed: 0,t,t+1
0,,4.216
1,4.216,5.360
2,5.360,5.374
3,5.374,5.388
4,5.388,3.666
...,...,...
2049275,0.946,0.946
2049276,0.946,0.944
2049277,0.944,0.938
2049278,0.938,0.934


In [28]:
# Create a baseline model (Naive model)
def model_persistence(x):
  return x

def test_persistence(test_X):
    predictions = list()
    for x in test_X:
        yhat = model_persistence(x)
        predictions.append(yhat)
    return predictions

## Sliding Window PyFTS:Persistence

In [29]:
result = {
     "window": [],
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "nrmse":[]
}

tam = len(dataframe)
n_windows = 30
windows_length = math.floor(tam / n_windows)
for ct, ttrain, ttest in Util.sliding_window(dataframe[1:tam], windows_length, 0.75, inc=1):
    if len(ttest) > 0:
        
        print('-' * 20)
        print(f'training window {(ct)}')
        
        train_X, train_y = ttrain.iloc[:,0], ttrain.iloc[:,1]
        test_X, test_y = ttest.iloc[:,0], ttest.iloc[:,1]
        
        forecast = test_persistence(test_X)
        
        #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics")
        rmse = Measures.rmse(test_y,forecast)
        mape = Measures.mape(test_y,forecast)
        smape = Measures.smape(test_y,forecast)
        mae = mean_absolute_error(test_y, forecast)
        nrmse = cal_nrmse(rmse,test_y)
        
        result["rmse"].append(round(rmse,3))
        result["mape"].append(round(mape,3))
        result["smape"].append(round(smape,3))
        result["mae"].append(round(mae,3))
        #result["r2"].append(round(r2,3))
        result["nrmse"].append(round(nrmse,3))
        result["window"].append(ct)
        
measures = pd.DataFrame(result)

--------------------
training window 0
--------------------
training window 68309
--------------------
training window 136618
--------------------
training window 204927
--------------------
training window 273236
--------------------
training window 341545
--------------------
training window 409854
--------------------
training window 478163
--------------------
training window 546472
--------------------
training window 614781
--------------------
training window 683090
--------------------
training window 751399
--------------------
training window 819708
--------------------
training window 888017
--------------------
training window 956326
--------------------
training window 1024635
--------------------
training window 1092944
--------------------
training window 1161253
--------------------
training window 1229562
--------------------
training window 1297871
--------------------
training window 1366180
--------------------
training window 1434489
--------------------
training w

In [30]:
measures

Unnamed: 0,window,rmse,mape,smape,mae,nrmse
0,0,0.393,11.197,4.303,0.141,0.05
1,68309,0.324,6.799,3.148,0.109,0.042
2,136618,0.325,10.481,4.313,0.105,0.043
3,204927,0.267,8.771,3.875,0.076,0.037
4,273236,0.193,7.48,3.505,0.054,0.026
5,341545,0.282,7.543,3.473,0.089,0.038
6,409854,0.294,6.258,2.901,0.094,0.032
7,478163,0.329,6.109,2.83,0.115,0.035
8,546472,0.256,5.818,2.712,0.081,0.038
9,614781,0.289,6.258,2.9,0.089,0.043


In [31]:
#measures.to_csv (r'win_persistence_hpc_30min.csv', index = False, header=True)

In [32]:
measures.to_csv (r'win_persistence_hpc_1min.csv', index = False, header=True)

### Mean Statistics Persistence:

In [33]:
final_result = {
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "nrmse":[],
     "rmse_std": [],
     "mae_std": [],
     "mape_std": [],
     "smape_std": [],
     "nrmse_std": []
}

final_result["rmse"].append(round(statistics.mean(measures['rmse']),3))
final_result["mape"].append(round(statistics.mean(measures['mape']),3))
final_result["smape"].append(round(statistics.mean(measures['smape']),3))
final_result["mae"].append(round(statistics.mean(measures['mae']),3))
#final_result["r2"].append(statistics.mean(measures['r2']))
final_result["nrmse"].append(round(statistics.mean(measures['nrmse']),3))

final_result["rmse_std"].append(round(statistics.stdev(measures['rmse']),3))
final_result["mape_std"].append(round(statistics.stdev(measures['mape']),3))
final_result["smape_std"].append(round(statistics.stdev(measures['smape']),3))
final_result["mae_std"].append(round(statistics.stdev(measures['mae']),3))
final_result["nrmse_std"].append(round(statistics.stdev(measures['nrmse']),3))

final_measures_persistence = pd.DataFrame(final_result)

print("Mean Statistics Persistence: ")
final_measures_persistence

Mean Statistics Persistence: 


Unnamed: 0,rmse,mape,smape,mae,nrmse,rmse_std,mae_std,mape_std,smape_std,nrmse_std
0,0.256,7.648,3.418,0.084,0.037,0.065,0.024,1.885,0.702,0.01


In [34]:
#final_measures_persistence.to_csv (r'persistence_hpc_30min.csv', index = False, header=True)

In [35]:
final_measures_persistence.to_csv (r'persistence_hpc_1min.csv', index = False, header=True)