We propose a small framework for evaluating forecasts on time series data.

In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt



In [63]:
class Evaluator:

    data = None

    def get_data_wind(self,points_for_predicting=20, nb_samples=100):
        data_wind = pd.read_csv("wind_data_fr_2021.csv", sep=";")
        data = pd.DataFrame(columns = range(nb_samples), index= range(points_for_predicting+1))
        n = points_for_predicting+1
        self.data = data.apply(lambda c: data_wind["Eolien (MW)"].iloc[(x:=np.random.randint(0, 17520-n)): x+n], result_type="broadcast")
        

    def get_data_sine(self, points_for_predicting=20, a=1, nb_samples=100):
        data = pd.DataFrame(columns = range(nb_samples), index= range(points_for_predicting+1))
        self.data = data.apply(lambda x: [np.sin(100*np.random.rand() + a*phi) for phi in (np.linspace(0,1,num=points_for_predicting+1))], axis=0)

    def __init__(self, nb_samples=100, seed=13, eval_mode = "sine"):
        
        if eval_mode not in ["sine", "wind"]:
            raise TypeError("Please provide as 'eval_mode' argument either 'sine' or 'wind'.")
        np.random.seed(seed)

        if eval_mode == "sine":
            self.get_data_sine(points_for_predicting=20, nb_samples=nb_samples)
        else:
            self.get_data_wind(points_for_predicting=20, nb_samples=nb_samples)

    def get_points_for_predicting(self):
        return self.data.iloc[:20]

    def get_labels(self):
        return self.data.iloc[20]
    
    def eval_rmse(self, forecasts: np.array ):
        ''' 
            evaluate the root mean squared error of the given forecasts
         '''
        rmse = sum(np.square(forecasts - self.data.iloc[-1].to_numpy()))
        return np.sqrt(rmse)

    def eval_rel_err(self, forecasts: np.array ):
        ''' 
            evaluate the relative error of the given forecasts
         '''
        realized = self.data.iloc[-1].to_numpy()
        rel_err = np.mean(np.abs((forecasts - realized)/realized))
        return rel_err

In [73]:
### test evaluators with stupid forecasts that take the last value as forecast
wind_eval = Evaluator(eval_mode="wind")
points_training = wind_eval.data.iloc[:-1] ## all points of each column except the last one can be used to forecast this last one

stupid_forecasts = points_training.iloc[-1].to_numpy()

print("RMSE of stupid forecasts : {err:.2f}".format(err=wind_eval.eval_rmse(stupid_forecasts)))
print("relative error of stupid forecasts : {err:.2f}%.".format(err= wind_eval.eval_rel_err(stupid_forecasts)*100))

RMSE of stupid forecasts : 2124.56
relative error of stupid forecasts : 5.80%.
