# Installs

In [None]:
!pip install numpy==1.19.5
!pip install pandas==1.3.4
!pip install statsmodels==0.13.2
!pip install scikit-learn==1.0.2
!pip install xgboost==1.5.2
!pip install catboost==1.0.5
!pip install deep-forest==0.1.5
!pip install tensorflow==2.7.0
!pip install keras-tuner --upgrade
!pip install shap --upgrade

# Imports

In [None]:
from datetime import datetime, date, time
# import onetick.py as otp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# from IPython.core.interactiveshell import InteractiveShell

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.model_selection import train_test_split, GridSearchCV, ParameterGrid
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

from statsmodels.tsa.seasonal import STL

from xgboost import XGBRegressor
from catboost import CatBoostRegressor, Pool, cv, sum_models
from deepforest import CascadeForestRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
from sklearn.ensemble import StackingRegressor

from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error
import shap

# InteractiveShell.ast_node_interactivity = "all"
# pd.set_option('display.max_rows', 200)

# Loading data

## ETF

In [None]:
# db = 'NYSE_TAQ'
# tick_type = 'TRD'
# symbols = ['QQQ']
# start = otp.dt(2021, 4, 1, 9, 30)
# end = otp.dt(2022, 4, 1, 16, 0)
# bucket = 600
# timezone='EST5EDT'

# data = otp.DataSource(db = db, tick_type = tick_type, symbol = symbols, start = start, end = end, identify_input_ts=True)

# data = data.agg({'VOLUME': otp.agg.sum(data['SIZE']),
#                  'TICK_VOLUME': otp.agg.count()},
#                  group_by = ['TICKER'],
#                  bucket_interval = bucket)

# data['hhmm'] = data['Time'].dt.strftime(format='%H:%M')

# etf_df = otp.run(data, apply_times_daily=True, timezone=timezone)

In [None]:
# etf_df = etf_df.groupby(['Time', 'hhmm'], as_index=False)['VOLUME'].sum()
# etf_df = etf_df[(etf_df['Time'].dt.dayofweek!=5) & (etf_df['Time'].dt.dayofweek!=6)].reset_index(drop=True)
# etf_ts = etf_df.set_index('Time').copy(deep=True)

# etf_ts

In [None]:
# etf_ts.to_csv('./data/etf_nq.csv')

In [None]:
etf_ts = pd.read_csv('../data/etf_nq.csv').set_index('Time')
etf_ts

## Futures

In [None]:
# db = 'CME'
# tick_type = 'TRD'
# symbols = ['NQ\H21', 'NQ\M21', r'NQ\U21', 'NQ\Z21', 'NQ\H22', 'NQ\M22'] #H M U Z
# start = otp.dt(2021, 4, 1, 9, 30)
# end = otp.dt(2022, 4, 1, 16, 0)
# bucket = 600
# timezone='EST5EDT'

# data = otp.DataSource(db = db, tick_type = tick_type, symbol = symbols, start = start, end = end, identify_input_ts=True)
# data['TICKER'] = data['SYMBOL_NAME']
# data = data.agg({'VOLUME': otp.agg.sum(data['SIZE']),
#                  'TICK_VOLUME': otp.agg.count()},
#                  group_by = ['TICKER'],
#                  bucket_interval = bucket)

# data['hhmm'] = data['Time'].dt.strftime(format='%H:%M')

# fut_df = otp.run(data, apply_times_daily=True, timezone=timezone)

In [None]:
# fut_df = fut_df.groupby(['Time', 'hhmm'], as_index=False)['VOLUME'].sum()
# fut_df = fut_df[(fut_df['Time'].dt.dayofweek!=5) & (fut_df['Time'].dt.dayofweek!=6)].reset_index(drop=True)
# fut_ts = fut_df.set_index('Time').copy(deep=True)

# fut_ts

In [None]:
# fut_ts.to_csv('./data/fut_nq.csv')

In [None]:
fut_ts = pd.read_csv('../data/fut_nq.csv').set_index('Time')
fut_ts

## Options

In [None]:
# db = 'US_OPTIONS'
# tick_type = 'TRD'
# start = otp.dt(2021, 4, 1)
# end = otp.dt(2022, 4, 1)
# bucket = 600
# timezone='EST5EDT'

# data = otp.DataSource(db=db, tick_type=tick_type, start=start, end=end, identify_input_ts=True)
# data['Date'] = data['Time'].dt.date()

# volume_date = data.agg({'VOLUME': otp.agg.sum(data['SIZE'])}, group_by=['Date'])
# volume_10min = data.agg({'VOLUME': otp.agg.sum(data['SIZE'])}, bucket_interval = bucket)

# # all_symbols = otp.Symbols(db='US_OPTIONS', date=start, keep_db=True, pattern='QQQ   220302%')
# all_symbols = otp.Symbols(db='US_OPTIONS', start=start, end=end, keep_db=True, pattern='QQQ%')

# symbols_date = otp.funcs.merge([volume_date], symbols=all_symbols, identify_input_ts=True)

# most_traded_by_days = symbols_date.high('VOLUME', n=5, group_by=['Date'])
# most_traded_by_days = otp.run(most_traded_by_days, timezone=timezone)

# symbols = list(pd.unique(most_traded_by_days['SYMBOL_NAME']))
# symbols_10min = otp.funcs.merge([volume_10min], symbols=symbols, identify_input_ts=True)
# symbols_10min['hhmm'] = symbols_10min['Time'].dt.strftime(format='%H:%M')

# symbols_10min['Date'] = symbols_10min['Time'].dt.date()
# symbols_10min = symbols_10min.agg({'VOLUME': otp.agg.sum(symbols_10min['VOLUME'])}, group_by = ['Date', 'hhmm'])

# opt_df = otp.run(symbols_10min, timezone=timezone)
# opt_df

In [None]:
# opt_df['Time'] = pd.to_datetime(opt_df['Date'].astype(str) + ' ' + opt_df['hhmm'])
# opt_df = opt_df[(opt_df['Time'].dt.dayofweek!=5) & (opt_df['Time'].dt.dayofweek!=6)].reset_index(drop=True)
# opt_ts = opt_df.loc[(opt_df['Time'].dt.time > time(9, 30)) & (opt_df['Time'].dt.time <= time(16, 0))]
# opt_ts = opt_ts.drop('Date', axis=1).set_index('Time').copy(deep=True)
# opt_ts

In [None]:
# opt_ts.to_csv('./data/opt_nq.csv')

In [None]:
opt_ts = pd.read_csv('../data/opt_nq.csv').set_index('Time')
opt_ts

## Join together

In [None]:
ts = fut_ts.join(etf_ts, lsuffix='_fut', rsuffix='_etf', how='inner')
ts = ts.join(opt_ts, how='inner')
ts = ts.rename(columns={'VOLUME':'VOLUME_opt'}).drop(['hhmm_fut','hhmm_etf'], axis=1)

columns = ['VOLUME_fut', 'VOLUME_etf', 'VOLUME_opt']

test_size = 0.1
val_size = 0.1

val_len = int(ts.shape[0]*val_size + 0.5)
test_len = int(ts.shape[0]*test_size + 0.5)
train_len = int(ts.shape[0] - val_len - test_len + 0.5)

train_indexes = list(range(train_len))
val_indexes = list(range(train_len, train_len+val_len))
test_indexes = list(range(train_len+val_len, train_len+val_len+test_len))

ts

# Data overview

## Autocorrelation

In [None]:
fig, ax = plt.subplots(figsize=(20, 8))
plt_pacf = plot_acf(ts['VOLUME_fut'], ax = ax, lags = np.arange(150), auto_ylims = True)

# Preprocessing

## Remove strong outliers

In [None]:
std_num=4
for column in columns:
    mean = ts.iloc[train_indexes].mean(numeric_only=True)[column]
    std = ts.iloc[train_indexes].std(numeric_only=True)[column]
    up_border = mean + std_num * std
    down_border = mean - std_num * std
    ts[column] = ts[column].where(ts[column] <= up_border, up_border)
    ts[column] = ts[column].where(ts[column] >= down_border, down_border)

## Remove seasonality

### Intraday averaging

In [None]:
bins = 39
window_days = 5
ts[['VOLUME_fut_agg', 'VOLUME_etf_agg', 'VOLUME_opt_agg']] = np.NaN
all_hhmm = pd.unique(ts['hhmm'])

for i in range(0, ts.shape[0]-bins*(window_days+1)+1, 39):
    for hhmm in all_hhmm:
        hhmm_df = ts.iloc[i:bins*window_days+i].loc[ts['hhmm']==hhmm]
        for column in columns:
            vol_agg = hhmm_df.loc[:,column].mean()
            ts.iloc[i+bins*window_days:i+bins*(window_days+1)].loc[ts['hhmm']==hhmm, f'{column}_agg'] = vol_agg

In [None]:
ts_agg = ts.groupby('hhmm').mean()

for column in columns:
    fig, ax = plt.subplots(figsize=(20, 8))
    ax = ts_agg[column].plot(ax=ax, title=column)
    ax.set_xticks(list(range(ts_agg.shape[0])), list(ts_agg.index), rotation='vertical')
    ax.grid()
    ax.set_xlim(0, ts_agg.shape[0]-1)

### Calculate: VOLUME-INTRADAY_AVERAGE

In [None]:
# ts_unseason = ts.join(ts_agg, on='hhmm', rsuffix='_agg').copy(deep=True)
ts_unseason = ts.copy(deep=True)

for column in columns:
    ts_unseason[column] = ts_unseason[column] - ts_unseason[f'{column}_agg']

ts_unseason.dropna(inplace=True)
ts_unseason

# Features

## Partial autocorrelation

In [None]:
for column in columns:
    fig, ax = plt.subplots(figsize=(20, 8))
    plt_pacf = plot_pacf(ts_unseason[column], ax = ax, lags = np.arange(150), auto_ylims = True, method='ywm', title=column)

## Features

### Add lags

In [None]:
df = ts_unseason[columns+['hhmm', 'VOLUME_fut_agg']].copy(deep=True)

periods = [1, 2, 3, 4, 37, 38, 39, 40]
target = ['VOLUME_fut']
features_columns = []
for column in columns:
    for lag in periods:
        feature_col_name = f'{column}_lag_{lag}'
        df[feature_col_name] = df.shift(lag)[column]
        features_columns.append(feature_col_name)

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df

### Features importance

In [None]:
cat = CatBoostRegressor(iterations=100)
cat.fit(df[features_columns], df[target], verbose=0, plot=False)

explainer = shap.TreeExplainer(cat)
shap_values=explainer.shap_values(Pool(df[features_columns], df[target]))
shap.summary_plot(shap_values, df[features_columns])

top_features = pd.DataFrame(shap_values, columns=df[features_columns].columns).apply(lambda x: abs(x)).sum().sort_values(ascending=False)[:10].index
top_features

# Splitting

In [None]:
val_size /= (1 - test_size)
x_train, x_test, y_train, y_test = train_test_split(df[top_features], df[target], test_size=test_size, shuffle=False)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=val_size, shuffle=False)

In [None]:
x_train

In [None]:
y_train

In [None]:
x_train.index, x_val.index, x_test.index

# Scaling

In [None]:
scaler = MinMaxScaler()
scaler.fit(x_train)

x_train = pd.DataFrame(data=scaler.transform(x_train), index=x_train.index, columns=x_train.columns)
x_val = pd.DataFrame(data=scaler.transform(x_val), index=x_val.index, columns=x_val.columns)
x_test = pd.DataFrame(data=scaler.transform(x_test), index=x_test.index, columns=x_test.columns)

In [None]:
x_train

# Training - Prediction

## XGBoost

In [None]:
xgb = XGBRegressor(nthread=4, n_estimators=300, max_depth=1, min_child_weight=6, learning_rate=0.08)
xgb.fit(x_train, y_train, eval_set=[(x_val, y_val)], verbose=0)

In [None]:
prediction_xgb = pd.DataFrame(xgb.predict(x_test), index=x_test.index, columns=['prediction'])

tdf_test = prediction_xgb.join(df)[['prediction','VOLUME_fut_agg', 'VOLUME_fut']].copy(deep=True)
tdf_test['VOLUME_prediction'] = tdf_test['prediction'] + tdf_test['VOLUME_fut_agg']
tdf_test['VOLUME_original'] = tdf_test['VOLUME_fut'] + tdf_test['VOLUME_fut_agg']

tdf_test['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

std_orig = tdf_test.std(numeric_only=True)['VOLUME_original']
r2_xgb = r2_score(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mae_xgb = mean_absolute_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mape_xgb = mean_absolute_percentage_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])

print('StdDev:', std_orig)
print('MAE/StdDev', mae_xgb/std_orig*100)
print('R2, MAE, MAPE:')
(r2_xgb, mae_xgb, mape_xgb)

In [None]:
tdf_test.iloc[:500]['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test.iloc[:500]['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

In [None]:
# Optimization
# xgb = XGBRegressor(nthread=8)
# param_grid = {
#     'max_depth': [1],
#     'min_child_weight': [7],
#     'n_estimators': [250],
#     'learning_rate': [0.09]
# }
# # 100, 600, 50
# # 0.01, 0.02, 0.05, 0.1, 0.15
# gs = GridSearchCV(
#     estimator=xgb,
#     param_grid=param_grid,
#     cv=5, 
#     n_jobs=8, 
#     scoring='neg_root_mean_squared_error',
#     verbose=2
# )

# fitted_model = gs.fit(x_train, y_train, eval_set=[(x_val, y_val)], verbose=0)

In [None]:
# pd.DataFrame(gs.cv_results_).sort_values('mean_test_score', ascending=False)

## CatBoost

In [None]:
# cat = CatBoostRegressor(learning_rate=0.08, depth=4, l2_leaf_reg=1, iterations=1000)
# cat.fit(x_train, y_train, eval_set=Pool(x_val, y_val), use_best_model=True, verbose=0, plot=True)

param_grid = {'depth': [4], 'l2_leaf_reg': [1], 'learning_rate': [0.1], 'models_num': [10], 'early_stopping_rounds': [30]}
for idx, params in enumerate(ParameterGrid(param_grid)):
    models_num = params['models_num']
    cv_dataset = Pool(data=pd.concat([x_train, x_val]), label=pd.concat([y_train, y_val]))

    cat_params = {"iterations": 1000,
                  "depth": params['depth'],
                  "l2_leaf_reg": params['l2_leaf_reg'],
                  "learning_rate": params['learning_rate'],
                  "loss_function": "MAE",
                  "custom_metric": 'R2',
                  # "eval_metric": 'BalancedAccuracy',
                  "use_best_model": True,
                  "verbose": False}

    res = cv(cv_dataset,
             cat_params,
             fold_count=models_num,
             shuffle=False,
             early_stopping_rounds=params['early_stopping_rounds'],
             type='TimeSeries ',
             return_models=True,
             plot = True,
             logging_level='Silent')
    print(params)
    print(res[0]['test-MAE-mean'].min())

In [None]:
cat = sum_models(res[1], [1/models_num]*models_num)
    
prediction_cat = pd.DataFrame(cat.predict(x_test), index=x_test.index, columns=['prediction'])

tdf_test = prediction_cat.join(df)[['prediction','VOLUME_fut_agg', 'VOLUME_fut']].copy(deep=True)
tdf_test['VOLUME_prediction'] = tdf_test['prediction'] + tdf_test['VOLUME_fut_agg']
tdf_test['VOLUME_original'] = tdf_test['VOLUME_fut'] + tdf_test['VOLUME_fut_agg']

tdf_test['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

std_orig = tdf_test.std(numeric_only=True)['VOLUME_original']
r2_cat = r2_score(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mae_cat = mean_absolute_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mape_cat = mean_absolute_percentage_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])

print('StdDev:', std_orig)
print('MAE/StdDev', mae_cat/std_orig*100)
print('R2, MAE, MAPE:')
print(r2_cat, mae_cat, mape_cat)

In [None]:
tdf_test.iloc[:500]['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test.iloc[:500]['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

In [None]:
# cat = CatBoostRegressor()
# grid = {'learning_rate': [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1],
#         'depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
#         'l2_leaf_reg': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}

# grid_search_result = cat.grid_search(grid, 
#                                        X=pd.concat([x_train, x_val]), 
#                                        y=pd.concat([y_train, y_val]),
#                                        cv=5,
#                                        shuffle=False,
#                                        verbose=False,
#                                        plot=True)

In [None]:
# pd.DataFrame(grid_search_result)

## DNN

In [None]:
class DNN(kt.HyperModel):
    def build(self, hp):
        from tensorflow.keras.models import Sequential
        from tensorflow.keras.layers import Dense, Dropout

        model = Sequential()
        model.add(Dense(units=hp.Choice('units1', [4, 8, 16, 32]), activation = hp.Choice('activation1', ['relu', 'elu'])))
        # model.add(Dropout(0.1))     
        model.add(Dense(units=hp.Choice('units2', [4, 8, 16, 32]), activation = hp.Choice('activation2', ['relu', 'elu'])))
        # model.add(Dropout(0.1))     
        model.add(Dense(1))

        model.compile(loss = 'mean_squared_error', optimizer = 'RMSprop')
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            *args, 
            epochs=hp.Choice('epochs', [256, 512, 1024]), 
            batch_size=hp.Choice('batch_size', [16, 32, 64, 128]),
            shuffle=False, 
            verbose=0,
            **kwargs
        )

tuner = kt.RandomSearch(
    DNN(),
    objective='val_loss',
    max_trials = 100,
    directory='./dnn_checkpoints/',
    overwrite=True,
)
early_stopping = EarlyStopping(monitor='val_loss', patience=32, restore_best_weights=True)
tuner.search(x_train, y_train, validation_data=(x_val, y_val), callbacks=[early_stopping])

In [None]:
tuner.results_summary()

In [None]:
dnn_models = tuner.get_best_models(num_models=3)

In [None]:
prediction_dnn = pd.DataFrame(pd.concat([pd.DataFrame(dnn.predict(x_test), index=x_test.index) for dnn in dnn_models], axis=1).mean(axis=1), columns=['prediction'])

tdf_test = prediction_dnn.join(df)[['prediction','VOLUME_fut_agg', 'VOLUME_fut']].copy(deep=True)
tdf_test['VOLUME_prediction'] = tdf_test['prediction'] + tdf_test['VOLUME_fut_agg']
tdf_test['VOLUME_original'] = tdf_test['VOLUME_fut'] + tdf_test['VOLUME_fut_agg']

tdf_test['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

std_orig = tdf_test.std(numeric_only=True)['VOLUME_original']
r2_dnn = r2_score(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mae_dnn = mean_absolute_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mape_dnn = mean_absolute_percentage_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])

print('StdDev:', std_orig)
print('MAE/StdDev', mae_dnn/std_orig*100)
print('R2, MAE, MAPE:')
(r2_dnn, mae_dnn, mape_dnn)

In [None]:
tdf_test.iloc[:500]['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test.iloc[:500]['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

## Deep Forest (Cascade Forest)

In [None]:
ccd = CascadeForestRegressor(n_jobs=8, n_estimators=3, n_trees=300, max_layers=15)
ccd.fit(pd.concat([x_train, x_val]).values, np.ravel(pd.concat([y_train, y_val]).values))

In [None]:
prediction_ccd = pd.DataFrame(ccd.predict(x_test), index=x_test.index, columns=['prediction'])

tdf_test = prediction_ccd.join(df)[['prediction','VOLUME_fut_agg', 'VOLUME_fut']].copy(deep=True)
tdf_test['VOLUME_prediction'] = tdf_test['prediction'] + tdf_test['VOLUME_fut_agg']
tdf_test['VOLUME_original'] = tdf_test['VOLUME_fut'] + tdf_test['VOLUME_fut_agg']

tdf_test['VOLUME_original'].plot(figsize=[20, 8], legend=True)
tdf_test['VOLUME_prediction'].plot(figsize=[20, 8], legend=True)

std_orig = tdf_test.std(numeric_only=True)['VOLUME_original']
r2_ccd = r2_score(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mae_ccd = mean_absolute_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
mape_ccd = mean_absolute_percentage_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])

print('StdDev:', std_orig)
print('MAE/StdDev', mae_ccd/std_orig*100)
print('R2, MAE, MAPE:')
(r2_ccd, mae_ccd, mape_ccd)

In [None]:
# # Optimization
# res = {'params':[],
#       'metrics':[]}

# param_grid = {'n_estimators': [1,2,3], 'n_trees': [250,300,350], 'max_layers': [15, 25, 35]}
# for idx, params in enumerate(ParameterGrid(param_grid)):
#     print(params)
#     r2_ccd, mae_ccd, mape_ccd = 0, 0, 0
#     for j in range(1,11):
#         ccd = CascadeForestRegressor(n_jobs=8, n_estimators=params['n_estimators'], n_trees=params['n_trees'], max_layers=params['max_layers'], verbose=0)
#         ccd.fit(pd.concat([x_train]).values, np.ravel(pd.concat([y_train]).values))

#         prediction_ccd = pd.DataFrame(ccd.predict(x_val), index=x_val.index, columns=['prediction'])

#         tdf_test = prediction_ccd.join(df)[['prediction','VOLUME_fut_agg', 'VOLUME_fut']].copy(deep=True)
#         tdf_test['VOLUME_prediction'] = tdf_test['prediction'] + tdf_test['VOLUME_fut_agg']
#         tdf_test['VOLUME_original'] = tdf_test['VOLUME_fut'] + tdf_test['VOLUME_fut_agg']

#         r2_ccd += r2_score(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
#         mae_ccd += mean_absolute_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])
#         mape_ccd += mean_absolute_percentage_error(tdf_test['VOLUME_original'], tdf_test['VOLUME_prediction'])

#     r2_ccd /= 10
#     mae_ccd /=10
#     mape_ccd /= 10
#     print('R2, MAE, MAPE:')
#     print(r2_ccd, mae_ccd, mape_ccd)

## Totals

In [None]:
print('XGBoost:', r2_xgb, mae_xgb, mape_xgb)
print('CatBoost:', r2_cat, mae_cat, mape_cat)
print('DNN:', r2_dnn, mae_dnn, mape_dnn)
print('Cascade Forest:', r2_ccd, mae_ccd, mape_ccd)

# Ensemble model