In [2]:
# Additional imports are required
import os 

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import seaborn as sns
from statsmodels.tsa.seasonal import STL
from scipy.stats import norm, kstest

from sklearn import linear_model
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Plots
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 18, 7

# Prerocessing for FEDOT
from fedot.core.data.data import InputData
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams

# FEDOT 
from fedot.core.chains.node import PrimaryNode, SecondaryNode
from fedot.core.chains.ts_chain import TsForecastingChain

In [5]:
df_rean=pd.read_csv('./data_time_series/2002_ARCTIC_reanalysis_GRID.csv', sep=';')
df_rean['dates'] =  pd.to_datetime(df_rean['dates'], format='%Y-%m-%d')

for column in list(df_rean.columns.values):
    if column!='dates':
        df_rean[column]=df_rean[column]-np.mean(df_rean[column])
        stl = STL(df_rean[column], period=80)
        res = stl.fit()
        params = norm.fit(res.resid)
        ks = kstest(res.resid, 'norm', params, N=1000)        
        if ks[1]>=0.05:
            #sns.distplot(res.resid, fit=norm, hist=False)
            #plt.show()
            df_rean[column]=df_rean[column]-res.resid
        else:
            print(column)
            
df_nemo=pd.read_csv('./data_time_series/2002_NEMO_SPITZ_GRID.csv', sep=';')
df_nemo['dates'] =  pd.to_datetime(df_nemo['dates'], format='%d.%m.%Y')

for column in list(df_nemo.columns.values):
    if column!='dates':
        df_nemo[column]=df_nemo[column]-np.mean(df_nemo[column])
        stl = STL(df_nemo[column], period=80)
        res = stl.fit()
        params = norm.fit(res.resid)
        ks = kstest(res.resid, 'norm', params, N=1000)
        if ks[1]>=0.05:
            #sns.distplot(res.resid, fit=norm, hist=False)
            #plt.show()
            #print(ks)
            df_nemo[column]=df_nemo[column]-res.resid
        else:
            print(column)

In [6]:
def make_forecast(chain, train_data, len_forecast: int, max_window_size: int):
    # Here we define which task should we use, here we also define two main 
    # hyperparameters: forecast_length and max_window_size
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast,
                                    max_window_size=max_window_size,
                                    return_all_steps=False,
                                    make_future_prediction=True))

    # Prepare data to train the model
    train_input = InputData(idx=np.arange(0, len(train_data)),
                            features=None,
                            target=train_data,
                            task=task,
                            data_type=DataTypesEnum.ts)

    # Make a "blank", here we need just help FEDOT understand that the 
    # forecast should be made exactly the "len_forecast" length
    predict_input = InputData(idx=np.arange(0, len_forecast),
                              features=None,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

    # Fit it
    chain.fit_from_scratch(train_input)

    # Predict
    predicted_values = chain.forecast(initial_data=train_input,
                                      supplementary_data=predict_input).predict
    
    return predicted_values

In [7]:
def get_hybrid_coeffs(nemo_tr, arima_tr, real_tr):
    df=pd.DataFrame()
    df['x1']=nemo_tr
    df['x2']=arima_tr
    df['y']=real_tr
    X = df[['x1', 'x2']]
    Y = df['y']
    regr = linear_model.LinearRegression()
    regr.fit(X, Y)

    return (regr.intercept_, regr.coef_)

## Earlines of forecast - 30 days

In [53]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE',
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])

forcact_window=30
coeff_train_window=10

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
        
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3)},
                                     ignore_index=True)
 
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])
new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/30_10_RIDGE_HYB_errors.csv', sep=';', index=False)

## Earlines of forecast - 90 days

In [57]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE',
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])
forcact_window=90
coeff_train_window=30

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
        
        
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),                                      
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3),
                                      
                                     } , ignore_index=True)
        
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])

new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/90_30_RIDGE_HYB_errors.csv', sep=';', index=False)

## Earlines of forecast - 60 days

In [72]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE',
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])

forcact_window=60
coeff_train_window=20

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
                
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),                                      
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3),
                                      
                                     } , ignore_index=True)
        
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])

new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/60_20_RIDGE_HYB_errors.csv', sep=';', index=False)

## Earlines of forecast - 40 days

In [None]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE',
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])
forcact_window=40
coeff_train_window=10

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
        
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3),
                                      
                                     } , ignore_index=True)
        
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])

new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/40_10_RIDGE_HYB_errors.csv', sep=';', index=False)

## Earlines of forecast - 50 days

In [69]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE'
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])
forcact_window=50
coeff_train_window=20

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
        
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3),
                                      
                                     } , ignore_index=True)
        
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])

new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/50_20_RIDGE_HYB_errors.csv', sep=';', index=False)

## Earlines of forecast - 70 days

In [70]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE',
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])

forcact_window=70
coeff_train_window=20 

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
        
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3),
                                      
                                     } , ignore_index=True)
        
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])

new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/70_20_RIDGE_HYB_errors.csv', sep=';', index=False)

## Earlines of forecast - 80 days

In [71]:
errors_df=pd.DataFrame(columns = ['POINT', 'MSE_RIDGE', 'MAE_RIDGE', 'MAPE_RIDGE',
                                  'MSE_HYB', 'MAE_HYB', 'MAPE_HYB'])

forcact_window=80
coeff_train_window=20

for column in list(df_rean.columns.values):
    if column!='dates':
        test = df_rean[column][-forcact_window+coeff_train_window:]
        test.index = df_rean['dates'][-forcact_window+coeff_train_window:]
        train = df_rean[column][:-forcact_window]
        train.index = df_rean['dates'][:-forcact_window]
        
        nemo_pr=df_nemo[column][-forcact_window:]
        nemo_pr.index = df_nemo['dates'][-forcact_window:]
        
        nemo_coef_tr=df_nemo[column][-forcact_window:-forcact_window+coeff_train_window]
        nemo_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr=df_rean[column][-forcact_window:-forcact_window+coeff_train_window]
        real_coef_tr.index = df_rean['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        # Define PrimaryNode models - its first level models
        node_first = PrimaryNode('trend_data_model')
        node_second = PrimaryNode('residual_data_model')
        # Define SecondaryNode models - its second level models
        node_trend_model = SecondaryNode('ridge', nodes_from=[node_first])
        node_residual_model = SecondaryNode('ridge', nodes_from=[node_second])
        # Root node - make final prediction
        node_final = SecondaryNode('svr', nodes_from=[node_trend_model, node_residual_model])
        final_chain = TsForecastingChain(node_final)        
        predicted_values = pd.Series(make_forecast(chain = final_chain,
                                 train_data = train, 
                                 len_forecast = forcact_window,
                                 max_window_size = 60))
        predicted_values.index = df_nemo['dates'][-forcact_window:]

        ridge_coef_tr=predicted_values[:coeff_train_window]
        ridge_coef_tr.index = df_nemo['dates'][-forcact_window:-forcact_window+coeff_train_window]
        
        nemo_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][0]
        ridge_coef=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[1][1]
        const=get_hybrid_coeffs(nemo_coef_tr, ridge_coef_tr, real_coef_tr)[0]
                        
        hyb_forecast=nemo_coef*nemo_pr+ridge_coef*predicted_values+const
        hyb_forecast.index = df_nemo['dates'][-forcact_window:]
        
        #plt.rcParams['figure.figsize'] = [20, 4]
        #plt.plot(df_rean['dates'], df_rean[column], c='g', label='reanalysis')
        #plt.plot(nemo_pr, c='orange', label='nemo')
        #plt.plot(predicted_values, c='red', label='ridge')
        #plt.plot(hyb_forecast, c='black', label='hyb_forecast')
        #plt.axvline(x=nemo_coef_tr.index[0], c='black', linestyle=':')
        #plt.axvline(x=nemo_coef_tr.index[-1], c='black', linestyle=':')
        #plt.legend()
        #plt.title(column)
        #plt.show()
        
        errors_df = errors_df.append({'POINT': column, 
                                      'MSE_RIDGE': mean_squared_error(test, predicted_values[coeff_train_window:]),
                                      'MAE_RIDGE': mean_absolute_error(test, predicted_values[coeff_train_window:]),
                                      'MAPE_RIDGE': round(np.mean(np.abs((test-predicted_values[coeff_train_window:])/test))*100,3),
                                      'MSE_HYB':mean_squared_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAE_HYB':mean_absolute_error(test, hyb_forecast[coeff_train_window:]),
                                      'MAPE_HYB':round(np.mean(np.abs((test-hyb_forecast[coeff_train_window:])/test))*100,3),
                                      
                                     } , ignore_index=True)
        
errors_df['MSE_DIF']=abs(errors_df['MSE_HYB'])-abs(errors_df['MSE_RIDGE'])
errors_df['MAE_DIF']=abs(errors_df['MAE_HYB'])-abs(errors_df['MAE_RIDGE'])
errors_df['MAPE_DIF']=abs(errors_df['MAPE_HYB'])-abs(errors_df['MAPE_RIDGE'])

new = errors_df['POINT'].str.split("_", n = 1, expand = True)

errors_df['x']=new[0]
errors_df['y']=new[1]

errors_df.to_csv('./errors_for_grid/80_20_RIDGE_HYB_errors.csv', sep=';', index=False)