In [3]:
import pandas as pd
import math
import datetime
import statistics
from pyFTS.common import Util
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from pyFTS.benchmarks import Measures

In [4]:
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame) 
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

## Dataset Household Power Consumption 

In [5]:
filepath = '/home/hugo/projetos-doutorado/Appliance-Energy-Prediction/data/household_power_consumption.csv'
data = pd.read_csv(filepath, sep = ";")
data = data.drop(labels=['Time','Date'], axis=1)
data = data.loc[0: : 30] # 30 minutes
data.dropna(inplace = True)
data = clean_dataset(data)
y_appliance = data['Global_active_power'].values

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


## Persistence method - Naive forecasting

In [6]:
# Create a lag feature
var = pd.DataFrame(y_appliance)
dataframe = pd.concat([var.shift(1), var], axis=1)
dataframe.columns = ['t', 't+1']

In [8]:
dataframe

Unnamed: 0,t,t+1
0,,4.216
1,4.216,2.720
2,2.720,3.452
3,3.452,4.298
4,4.298,3.262
...,...,...
68303,1.716,1.556
68304,1.556,1.120
68305,1.120,1.468
68306,1.468,1.082


In [7]:
# Create a baseline model (Naive model)
def model_persistence(x):
  return x

def test_persistence(test_X):
    predictions = list()
    for x in test_X:
        yhat = model_persistence(x)
        predictions.append(yhat)
    return predictions

## Sliding Window PyFTS:Persistence

In [9]:
result = {
     "window": [],
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "r2": []
}

tam = len(dataframe)
n_windows = 30
windows_length = math.floor(tam / n_windows)
for ct, ttrain, ttest in Util.sliding_window(dataframe[1:tam], windows_length, 0.75, inc=1):
    if len(ttest) > 0:
        
        print('-' * 20)
        print(f'training window {(ct)}')
        
        train_X, train_y = ttrain.iloc[:,0], ttrain.iloc[:,1]
        test_X, test_y = ttest.iloc[:,0], ttest.iloc[:,1]
        
        forecast = test_persistence(test_X)
        
        #print("[{0: %H:%M:%S}]".format(datetime.datetime.now()) + f" getting statistics")
        rmse = Measures.rmse(test_y,forecast)
        mape = Measures.mape(test_y,forecast)
        smape = Measures.smape(test_y,forecast)
        mae = mean_absolute_error(test_y, forecast)
        r2 = r2_score(test_y, forecast)
        
        result["rmse"].append(rmse)
        result["mape"].append(mape)
        result["smape"].append(smape)
        result["mae"].append(mae)
        result["r2"].append(r2)
        result["window"].append(ct)
        
measures = pd.DataFrame(result)

--------------------
training window 0
--------------------
training window 2276
--------------------
training window 4552
--------------------
training window 6828
--------------------
training window 9104
--------------------
training window 11380
--------------------
training window 13656
--------------------
training window 15932
--------------------
training window 18208
--------------------
training window 20484
--------------------
training window 22760
--------------------
training window 25036
--------------------
training window 27312
--------------------
training window 29588
--------------------
training window 31864
--------------------
training window 34140
--------------------
training window 36416
--------------------
training window 38692
--------------------
training window 40968
--------------------
training window 43244
--------------------
training window 45520
--------------------
training window 47796
--------------------
training window 50072
-------------------

In [10]:
measures

Unnamed: 0,window,rmse,mape,smape,mae,r2
0,0,1.173253,63.832047,21.851777,0.715174,0.215967
1,2276,1.09707,56.162451,20.805278,0.638647,0.252628
2,4552,0.911143,80.697309,26.758724,0.532366,0.170269
3,6828,0.855796,86.786904,28.997663,0.443613,0.171121
4,9104,0.659613,62.948013,20.000334,0.309694,0.321457
5,11380,0.971584,74.072213,25.069597,0.557627,0.205047
6,13656,1.11765,58.117336,20.848932,0.636766,0.200619
7,15932,1.079677,51.232242,18.788346,0.666896,0.384027
8,18208,0.959426,55.898488,20.936509,0.558538,0.274738
9,20484,1.009275,59.799137,21.541467,0.571599,0.132012


### Mean Statistics Persistence:

In [11]:
final_result = {
     "rmse": [],
     "mape": [],
     "smape": [],
     "mae": [],
     "r2": []
}

final_result["rmse"].append(statistics.mean(measures['rmse']))
final_result["mape"].append(statistics.mean(measures['mape']))
final_result["smape"].append(statistics.mean(measures['smape']))
final_result["mae"].append(statistics.mean(measures['mae']))
final_result["r2"].append(statistics.mean(measures['r2']))
        
final_measures_persistence = pd.DataFrame(final_result)

print("Mean Statistics Persistence: ")
final_measures_persistence

Mean Statistics Persistence: 


Unnamed: 0,rmse,mape,smape,mae,r2
0,0.898652,63.793864,22.511609,0.514393,0.068228
