# Creating a Baseline using entso_e's forecast as a feature
We create a model using only the time series and including their forecast

In [1]:
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import datetime 
import numpy as np
import pickle 

from utils import train_val_test_split
from utils import get_model_metrics

In [2]:
df = pd.read_csv("Complete_daily_data.csv",parse_dates=[0])
df = df.loc[:,['Date', 'System total load in MAW',
       'Wind Offshore in MAW', 'Wind Onshore in MAW', 'Solar in MAW',
       'predicted_Wind Offshore in MAW', 'predicted_Wind Onshore in MAW',
       'predicted_System total load in MAW', 'predicted_Solar in MAW']] 

In [3]:
target_vars = ["System total load in MAW","Wind Offshore in MAW","Wind Onshore in MAW","Solar in MAW"]

In [4]:
df.dropna(inplace=True)

In [5]:
X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(df, target_vars, 90, 90)

The shape of the data set is: (202513, 9)

--------------------------------------------
The shape of the train set is: (185233, 4)
The shape of the target variable is: (185233, 4)
--------------------------------------------

--------------------------------------------
The shape of the validation set is: (8640, 4)
The shape of the target variable for the validation set is: (8640, 4)
--------------------------------------------

--------------------------------------------
The shape of the test set is: (8640, 4)
The shape of the target variable for the test set is: (8640, 5)
--------------------------------------------


In [6]:
list_entso = ["predicted_System total load in MAW", "predicted_Wind Offshore in MAW", "predicted_Wind Onshore in MAW",  "predicted_Solar in MAW"]
_, _, _, _, _, y_entso = train_val_test_split(df, list_entso, 90, 90)
y_entso_e = y_entso.iloc[:,[0,1,2,3]].to_numpy()

The shape of the data set is: (202513, 9)

--------------------------------------------
The shape of the train set is: (185233, 4)
The shape of the target variable is: (185233, 4)
--------------------------------------------

--------------------------------------------
The shape of the validation set is: (8640, 4)
The shape of the target variable for the validation set is: (8640, 4)
--------------------------------------------

--------------------------------------------
The shape of the test set is: (8640, 4)
The shape of the target variable for the test set is: (8640, 5)
--------------------------------------------


In [7]:
def decision_tree(X_train,y_train,target):
    model = tree.DecisionTreeRegressor()
    model = model.fit(X_train,y_train)   
#     with open(("./models/DecisionTreeModel_"+target+".pickle"),"wb") as f:
#         pickle.dump(model, f)
    return model

def random_forest(X_train,y_train,target):
    model = RandomForestRegressor(n_jobs=-1)
    model = model.fit(X_train,y_train)  
#     with open(("./models/RandomForestModel_"+target+".pickle"),"wb") as f:
#         pickle.dump(model, f)  
    return model

In [8]:
cnt = 0
for i in target_vars:
        y_train_specific = y_train.loc[:,i]
        y_test_specific = y_test.loc[:,["Date",i]]
        
        tree_ = decision_tree(X_train, y_train_specific,i)
        RF = random_forest(X_train,y_train_specific,i)
        
        y_tree_pred = tree_.predict(X_test)
        y_rf_pred = RF.predict(X_test)
        
        print("Tree:")
        print(i)
        print("______======________")
        get_model_metrics(y_test_specific,y_tree_pred)
        print("Random Forest:")
        print(i)
        print("______======________")
        get_model_metrics(y_test_specific,y_rf_pred)
        print("Entso-e:")
        print(i)
        print("______======________")
        get_model_metrics(y_test_specific,y_entso_e[:,cnt])
        cnt += 1

Tree:
System total load in MAW
----------------------------------------------
The overall mean absolute error of the model in MW is: 2158.43
----------------------------------------------
----------------------------------------------
The overall mean absolute scaled error of the model in MW is: 4.0836
Please note: to calculate the MASE, the prediction for the first observation was omitted
----------------------------------------------
Random Forest:
System total load in MAW
----------------------------------------------
The overall mean absolute error of the model in MW is: 1458.25
----------------------------------------------
----------------------------------------------
The overall mean absolute scaled error of the model in MW is: 2.7585
Please note: to calculate the MASE, the prediction for the first observation was omitted
----------------------------------------------
Entso-e:
System total load in MAW
----------------------------------------------
The overall mean absolute erro