## Library

In [42]:
import pickle
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler
from sklearn.linear_model import LinearRegression

In [43]:
import cufflinks
cufflinks.go_offline()

In [44]:
%run ..\..\Data\triangulars.ipynb

## Function

In [45]:
def predict1(X, y, model):
    from sklearn.preprocessing import MaxAbsScaler
    # scale data
    X_train1, X_test1, y_train, y_test = train_test_split(X, y, test_size = 365, shuffle = False)
    scaler1 = MaxAbsScaler(); scaler1.fit(y_train)
    y_train = scaler1.transform(y_train).reshape(-1) 
    y_test = scaler1.transform(y_test).reshape(-1)
    
    scaler = MaxAbsScaler(); scaler.fit(X_train1)
    X_train = scaler.transform(X_train1)
    X_test = scaler.transform(X_test1)
    # predict with loaded model (gplearn)
    y_predtr = model.predict(X_train)
    y_predte = model.predict(X_test)
    # join forecasting and train prediction
    y_pred = np.hstack([y_predtr, y_predte])
    # create dataframe
    y_pred = scaler1.inverse_transform(y_pred.reshape(-1, 1))
    plotting = pd.DataFrame(y_pred, index = consumo.index, columns = ["predict"])
    plotting["real"] = y.to_numpy()
    return plotting

In [46]:
def predict2(X, y, model):
    from sklearn.preprocessing import MaxAbsScaler
    # scale data
    X_train1, X_test1, y_train, y_test = train_test_split(X, y, test_size = 365, shuffle = False)
    scaler1 = MaxAbsScaler(); scaler1.fit(y_train)
    y_train = scaler1.transform(y_train).reshape(-1) 
    y_test = scaler1.transform(y_test).reshape(-1)
    
    X_train, X_test = X_train1.to_numpy(), X_test1.to_numpy()
    # predict with loaded model (gplearn)
    y_predtr = model.predict(X_train)
    y_predte = model.predict(X_test)
    # join forecasting and train prediction
    y_pred = np.hstack([y_predtr, y_predte])
    # create dataframe
    y_pred = scaler1.inverse_transform(y_pred.reshape(-1, 1))
    plotting = pd.DataFrame(y_pred, index = consumo.index, columns = ["predict"])
    plotting["real"] = y.to_numpy()
    return plotting

In [47]:
def mape_comp(y):
    return np.mean(np.abs((y.real - y.predict)/y.real))*100

## Load Data

In [48]:
path_consumo = Path().resolve().parents[1] / "Data" / "Data1.xlsx"
consumo = pd.read_excel(path_consumo)
consumo = consumo.set_index("fecha").loc["2007-01-01":"2020-03-30"]

## Create X

In [49]:
festivos=pd.read_excel(Path().resolve().parents[1] / "Data" /"Festivos.xlsx")
festivos2=pd.read_excel(Path().resolve().parents[1] / "Data" / "Festivos2.xlsx")

In [50]:
fest = triangulars().festivos(X = consumo, festivos = festivos, festivos2 = festivos2)

In [51]:
t = np.arange(1, consumo.size+1).reshape(-1, 1)
day = consumo.index.dayofyear
wend=consumo.index.weekday
wday=consumo.index.weekday
weekd = consumo.index.weekday
month = consumo.index.month

weekd1 = pd.get_dummies(weekd, prefix = "wday", drop_first = True)
weekd1.index = consumo.index

X = triangulars().diffseason(consumo)
# X1 = triangulars().diffclima(consumo)
X = pd.concat([X, weekd1], axis = 1)
X = pd.concat([X, fest], axis = 1)

X["t"] = t
X["day"] = day
X["month"] = month
X["weekd"] = weekd
X["wend"]=wend
X["wday"]=wday
X["wend"] = X["wend"].replace([0,1,2,3,4,5,6],[.5,0,0,0,.5,1,1])
X["wday"] = X["wday"].replace([0,1,2,3,4,5,6],[.5,1,1,1,.5,0,0])

In [52]:
X1 = triangulars().diffseason(consumo)
X1 = pd.concat([X1, fest, weekd1], axis = 1)
X1["t"] = t
X1["day"] = day/day.max()

In [53]:
# X_train, X_test, y_train, y_test = train_test_split(X, consumo, test_size = 365, shuffle = False)
# t_train, t_test = train_test_split(t, test_size = 365, shuffle = False)

## Load Model

In [54]:
def open_model(path):
    path_to_model = Path().resolve().parents[1] / "Models" / "GRegressor" / f"{path}"
    with open(path_to_model, "rb") as f:
        model = pickle.load(f)
    return model

## Plot

### gpX_FouriergpX

Model trained with with X on GPlearn and then trained with fourier variables as functions on GPlearn and X as input

In [55]:
model1 = open_model("GR_gp_dwsmwds.pkl")
model2 = open_model("gpX_FouriergpX")

Unnamed: 0_level_0,0
fecha,Unnamed: 1_level_1
2007-01-01,-37454.430672
2007-01-02,-9642.448242
2007-01-03,1386.745427
2007-01-04,1956.042686
2007-01-05,4241.768185
...,...
2020-03-26,8145.732652
2020-03-27,7066.854435
2020-03-28,-1501.531140
2020-03-29,-5482.826036


In [65]:
first = predict1(X, consumo, model1)
second = predict2(X, pd.DataFrame(first.real - first.predict), model2)

In [66]:
first.iplot()

In [67]:
plott = pd.concat([(first[["predict"]] + second[["predict"]]), first.real],axis = 1)
mape = mape_comp(plott)

In [68]:
plott.iplot(title = f"Mape: {mape}")

### gpX_gpt

Model trained with with X on GPlearn and then trained with fourier variables as functions on GPlearn and X as input

In [69]:
model1 = open_model("GR_gp_dwsmwds.pkl")
model2 = open_model("gpX_gpt")

In [70]:
first = predict1(X, consumo, model1)
second = predict2(pd.DataFrame(t), pd.DataFrame(first.real - first.predict), model2)

In [71]:
first[["real", "predict"]].iplot()

In [72]:
second[["real", "predict"]].iplot()

In [73]:
plott = pd.concat([(first[["predict"]] + second[["predict"]]), first.real],axis = 1)
mape = mape_comp(plott)

In [74]:
plott.iplot(title = f"Mape: {mape}")

### Fourier 1

In [75]:
model = open_model("fourier1_t")

In [85]:
# scale data
X_train, X_test, y_train, y_test = train_test_split(X1, consumo, test_size = 365, shuffle = False)
scaler1 = MaxAbsScaler(); scaler1.fit(y_train)
y_train = scaler1.transform(y_train).reshape(-1) 
y_test = scaler1.transform(y_test).reshape(-1)
# fit linear regression
run = LinearRegression().fit(X_train, y_train)
y_res_tr = run.predict(X_train)
y_res_te = run.predict(X_test)
# join forecasting and train prediction
y_res = np.hstack([y_res_tr, y_res_te])

y_pred = scaler1.inverse_transform(y_res.reshape(-1, 1))

plott = pd.DataFrame(y_pred, index = consumo.index, columns = ["predict"])
plott["real"] = consumo.to_numpy().reshape(-1)

y_residual = plott.real - plott.predict

In [87]:
pre = predict2(pd.DataFrame(t), pd.DataFrame(y_residual), model)

In [88]:
plotting = pd.concat([(plott[["predict"]] + pre[["predict"]]), plott.real],axis = 1)
mape = mape_comp(plott)

In [89]:
plotting.iplot(title = f"Mape: {mape}")

### Fourier2

In [97]:
# scale data
X_train, X_test, y_train, y_test = train_test_split(X1, consumo, test_size = 365, shuffle = False)
scaler1 = MaxAbsScaler(); scaler1.fit(y_train)
y_train = scaler1.transform(y_train).reshape(-1) 
y_test = scaler1.transform(y_test).reshape(-1)
# fit linear regression
run = LinearRegression().fit(X_train, y_train)
y_res_tr = run.predict(X_train)
y_res_te = run.predict(X_test)
# join forecasting and train prediction
y_res = np.hstack([y_res_tr, y_res_te])

y_pred = scaler1.inverse_transform(y_res.reshape(-1, 1))

plott = pd.DataFrame(y_pred, index = consumo.index, columns = ["predict"])
plott["real"] = consumo.to_numpy().reshape(-1)

y_residual = plott.real - plott.predict

In [100]:
model_ = open_model("fourier2_t")

In [102]:
pre = predict2(pd.DataFrame(t), pd.DataFrame(y_residual), model_)

NotFittedError: SymbolicRegressor not fitted.

In [103]:
plotting = pd.concat([(plott[["predict"]] + pre[["predict"]]), plott.real],axis = 1)
mape = mape_comp(plott)

In [104]:
plotting.iplot(title = f"Mape: {mape}")