In [1]:
import pandas as pd
import numpy as np

from datetime import datetime as dt
import datetime
from dateutil.relativedelta import relativedelta

from tqdm import tqdm_notebook
import tqdm

import pickle

import os
import warnings

warnings.filterwarnings('ignore')

In [13]:
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso, LassoCV, Ridge, RidgeCV

from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import precision_score, recall_score, roc_auc_score, f1_score, \
                            accuracy_score, auc, precision_recall_curve, r2_score

In [3]:
import matplotlib.pylab as plt
import seaborn as sns

In [4]:
n_days = 20

start_date = datetime.datetime(2023, 4, 3)

start_dates = [start_date + relativedelta(days = i) for i in range(n_days + 1)]
start_dates = [i.date().strftime('%Y_%m_%d') for i in start_dates]

In [49]:
df = pd.DataFrame()  
pair = 'BTCUSDT'

freq = 1

tc = 'received_time_r'

for date in start_dates:
    
    lt_df = pd.read_csv(f'~/LAST_TRADE_{pair}_{date}_{freq}.csv').rename(columns = {'event_time_r': 'received_time_r'})

    lt_df.columns = [tc] + [i + '_LT' for i in lt_df.columns[1:]]

    df = df.append(lt_df)

extremes = dict()

for c in df.columns[1:-1]:
    if df[c].max() == np.inf or df[c].max() > np.inf:
        extremes[c] = 1
    elif df[c].min() == -np.inf or df[c].min() < -np.inf:
        extremes[c] = 1
    else:
        pass
    
for c in extremes.keys():
    df[c] = np.where(df[c] >= np.inf, df[c][df[c] < np.inf].quantile(0.9), df[c])
    df[c] = np.where(df[c] <= -np.inf, df[c][df[c] > -np.inf].quantile(0.1), df[c])
    

no_variation = dict()

for c in df.columns[1:-1]:
    if np.nanstd(df[c]) == 0:
        no_variation[c] = 1

df.drop(columns = no_variation.keys(), inplace = True) 

df['minute'] = df['received_time_r'].apply(lambda x: x[-5:-3])
df['second'] = df['received_time_r'].apply(lambda x: x[-2:])

minutes_df = pd.get_dummies(df['minute'], prefix = 'minute', drop_first = True).astype(int)
seconds_df = pd.get_dummies(df['second'], prefix = 'second', drop_first = True).astype(int)

df.drop(columns = ['minute', 'second'], inplace = True)

df = pd.concat([df, minutes_df], axis = 1)
df = pd.concat([df, seconds_df], axis = 1)

del minutes_df
del seconds_df

df['received_time_r'] = df['received_time_r'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

top_features = pd.read_excel(f'top_features_sw_{freq}.xlsx')


In [50]:
def mae(y_true, y_pred):
    return np.nanmean(abs(y_true - y_pred))

def rmae(y_true, y_pred):
    return mae(y_true, y_pred) / np.nanmean(y_true)

def sliding_window_cv(data, target, min_sample, timestamp_col, window, algorythm, algo_params, freq, 
                      pair, top_features, scaling = False):
    
    date_col = 'date_col'
    
    data[date_col] = data[timestamp_col].apply(lambda x: x.date())
    start_date, end_date = data[date_col].min(), data[date_col].max()
    
    delta = (end_date - start_date).days
    n_iter = delta - window - min_sample + 1
    
    results = dict()
    
    l = int(data.shape[0] * 0.1)
    
    for i in tqdm.tqdm_notebook(range(n_iter + 1)):
    #for i in tqdm.tqdm_notebook(range(1)):
        delta = relativedelta(days = +i)
        start, end = start_date + delta, start_date + delta + relativedelta(days = +min_sample)
        
        train = data[(data[date_col] >= start) & (data[date_col] < end)]
        test = data[(data[date_col] >= end) & (data[date_col] < end + relativedelta(days = +window))]
        
        print(train[date_col].min(), train[date_col].max(), test[date_col].unique())
        
        x_train, y_train = train.drop(columns = [target, timestamp_col, date_col]), train[target]
        x_test, y_test = test.drop(columns = [target, timestamp_col, date_col]), test[target]
        
        x_train = x_train[top_features['feature'].unique()]
        x_test = x_test[top_features['feature'].unique()]
        
        if scaling:
            sc = StandardScaler()
            x_train = sc.fit_transform(x_train)
            x_test = sc.transform(x_test)
            
        model = algorythm()
        
        reg = GridSearchCV(model, algo_params, n_jobs = 10, verbose = 4, scoring = 'neg_mean_absolute_error')
        reg.fit(x_train, y_train)
        
        y_pred = reg.best_estimator_.predict(x_test)

        threshold = np.nanquantile(y_pred[:l], (y_train == 0).mean())

        y_pred_bin = np.where(y_pred[l:] < threshold, 1, 0)
        y_test_bin = np.where(y_test[l:] == 0, 1, 0)

        recall = recall_score(y_test_bin, y_pred_bin)
        precision = precision_score(y_test_bin, y_pred_bin)
        f1 = f1_score(y_test_bin, y_pred_bin)
        r2 = r2_score(y_test, y_pred)
        r2_rw = r2_score(y_test[:-1], y_test[1:])

        mae_model = abs(y_pred - y_test)[1:]
        mae_rw = abs(y_test[1:] - y_test[:-1])
        mae_avg = abs(y_test - y_train.mean())

        diff_rw = mae_rw - mae_model
        tstat_rw = diff_rw.mean() / diff_rw.std() * np.sqrt(diff_rw.shape[0] - 1)

        diff_avg = mae_avg - mae_model
        tstat_avg = diff_avg.mean() / diff_avg.std() * np.sqrt(diff_avg.shape[0] - 1)
        
        results[end] = {'mae': mae(y_test, y_pred), 'rmae': rmae(y_test, y_pred),
                        'recall': recall, 'precision': precision, 'f1': f1,
                        'r2': r2, 'r2_rw': r2_rw, 'tstat_rw': tstat_rw, 'tstat_avg': tstat_avg,
                        'avg_zero_test': y_test_bin.mean(), 'avg_zero_pred': y_pred_bin.mean(),
                        'params': reg.best_params_}
        
    return results

def expanding_window_cv(data, target, min_sample, timestamp_col, window, algorythm, algo_params, freq,
                        pair, top_features, scaling = False):
    
    date_col = 'date_col'
    
    data[date_col] = data[timestamp_col].apply(lambda x: x.date())
    start_date, end_date = data[date_col].min(), data[date_col].max()
    
    delta = (end_date - start_date).days
    n_iter = delta - window - min_sample + 1
    
    results = dict()
    
    l = int(data.shape[0] * 0.1)
    
    for i in range(n_iter + 1):
        delta = relativedelta(days = +i)
        start, end = start_date, start_date + delta + relativedelta(days = +min_sample)
        
        train = data[(data[date_col] >= start) & (data[date_col] < end)]
        test = data[(data[date_col] >= end) & (data[date_col] < end + relativedelta(days = +window))]
        
        print(train[date_col].min(), train[date_col].max(), test[date_col].unique())
        
        x_train, y_train = train.drop(columns = [target, timestamp_col, date_col]), train[target]
        x_test, y_test = test.drop(columns = [target, timestamp_col, date_col]), test[target]
        
        x_train = x_train[top_features['feature'].unique()]
        x_test = x_test[top_features['feature'].unique()]
        
        if scaling:
            sc = StandardScaler()
            x_train = sc.fit_transform(x_train)
            x_test = sc.transform(x_test)
            
        model = algorythm()
        
        reg = GridSearchCV(model, algo_params, n_jobs = 10, verbose = 4, scoring = 'neg_mean_absolute_error')
        reg.fit(x_train, y_train)
        
        y_pred = reg.best_estimator_.predict(x_test)
        
        y_pred = model.predict(x_test)
        
        threshold = np.nanquantile(y_pred[:l], (y_train == 0).mean())

        y_pred_bin = np.where(y_pred[l:] < threshold, 1, 0)
        y_test_bin = np.where(y_test[l:] == 0, 1, 0)

        recall = recall_score(y_test_bin, y_pred_bin)
        precision = precision_score(y_test_bin, y_pred_bin)
        f1 = f1_score(y_test_bin, y_pred_bin)
        r2 = r2_score(y_test, y_pred)
        r2_rw = r2_score(y_test[:-1], y_test[1:])

        mae_model = abs(y_pred - y_test)[1:]
        mae_rw = abs(y_test[1:] - y_test[:-1])
        mae_avg = abs(y_test - y_train.mean())

        diff_rw = mae_rw - mae_model
        tstat_rw = diff_rw.mean() / diff_rw.std() * np.sqrt(diff_rw.shape[0] - 1)

        diff_avg = mae_avg - mae_model
        tstat_avg = diff_avg.mean() / diff_avg.std() * np.sqrt(diff_avg.shape[0] - 1)
        
        results[end] = {'mae': mae(y_test, y_pred), 'rmae': rmae(y_test, y_pred),
                        'recall': recall, 'precision': precision, 'f1': f1,
                        'r2': r2, 'r2_rw': r2_rw, 'tstat_rw': tstat_rw, 'tstat_avg': tstat_avg,
                        'avg_zero_test': y_test_bin.mean(), 'avg_zero_pred': y_pred_bin.mean(),
                        'params': reg.best_params_}
        
    return results
    

In [51]:
params = {
          'max_depth': [6, 8, 12], 
          'learning_rate':[0.05, 0.1],
          'reg_alpha': [10, 100],
          'random_state': [81],
          'n_estimators': [500, 1000]
          }

In [52]:
res_sliding = sliding_window_cv(df, 'rv_LT', 7, 'received_time_r', 1, XGBRegressor, params, freq, pair,
                                top_features, scaling = True)

  0%|          | 0/14 [00:00<?, ?it/s]

2023-04-03 2023-04-09 [datetime.date(2023, 4, 10)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-04 2023-04-10 [datetime.date(2023, 4, 11)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-05 2023-04-11 [datetime.date(2023, 4, 12)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-06 2023-04-12 [datetime.date(2023, 4, 13)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-07 2023-04-13 [datetime.date(2023, 4, 14)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-08 2023-04-14 [datetime.date(2023, 4, 15)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-09 2023-04-15 [datetime.date(2023, 4, 16)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-10 2023-04-16 [datetime.date(2023, 4, 17)]
Fitting 5 folds for each of 24 candidates, totalling 120 fits
2023-04-11 2023-04-17 [datetime.date(2023, 4, 18)]
Fitting 5 folds for each of 24 candidates, to

[CV 3/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.002 total time= 5.6min
[CV 3/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.002 total time= 6.9min
[CV 5/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.001 total time= 7.1min
[CV 5/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.001 total time= 8.2min
[CV 4/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 8.5min
[CV 2/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.003 total time= 9.5min
[CV 2/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 3.5min
[CV 2/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, 

[CV 2/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 5.0min
[CV 2/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.003 total time= 7.1min
[CV 4/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.001 total time= 7.1min
[CV 2/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.003 total time= 7.8min
[CV 2/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 9.0min
[CV 5/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.001 total time=11.1min
[CV 5/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 3.5min
[CV 5/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, 

[CV 1/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 5.4min
[CV 1/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.003 total time= 6.5min
[CV 2/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 6.9min
[CV 3/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.002 total time= 8.6min
[CV 1/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 7.7min
[CV 1/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.003 total time=10.7min
[CV 3/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.002 total time= 3.6min
[CV 3/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, sc

[CV 4/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.001 total time= 5.7min
[CV 4/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.001 total time= 7.1min
[CV 3/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.002 total time= 6.4min
[CV 1/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.003 total time= 7.7min
[CV 1/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 9.1min
[CV 4/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.001 total time=11.2min
[CV 4/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 3.5min
[CV 4/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;,

[CV 4/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 5.3min
[CV 4/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.001 total time= 7.4min
[CV 2/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 6.2min
[CV 4/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.001 total time= 8.9min
[CV 3/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.002 total time= 8.1min
[CV 3/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.002 total time=11.2min
[CV 3/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.002 total time= 3.4min
[CV 3/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;,

[CV 1/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 4.9min
[CV 1/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.003 total time= 7.0min
[CV 1/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 6.8min
[CV 2/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.003 total time= 8.5min
[CV 4/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.001 total time= 9.5min
[CV 4/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.001 total time=10.2min
[CV 2/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 3.3min
[CV 1/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, 

[CV 3/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.002 total time= 5.3min
[CV 3/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.002 total time= 7.3min
[CV 1/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 6.1min
[CV 1/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.003 total time= 8.5min
[CV 3/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.002 total time= 9.5min
[CV 3/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.002 total time= 9.8min
[CV 5/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.001 total time= 3.7min
[CV 2/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, 

[CV 5/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 5.3min
[CV 5/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.001 total time= 7.4min
[CV 4/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 6.6min
[CV 3/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.002 total time= 8.0min
[CV 5/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.001 total time= 9.4min
[CV 5/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.001 total time=10.0min
[CV 1/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.003 total time= 3.2min
[CV 4/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;,

[CV 2/5] END learning_rate=0.05, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 5.5min
[CV 2/5] END learning_rate=0.05, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.003 total time= 6.7min
[CV 3/5] END learning_rate=0.05, max_depth=8, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.002 total time= 6.9min
[CV 5/5] END learning_rate=0.05, max_depth=8, n_estimators=1000, random_state=81, reg_alpha=10;, score=-0.001 total time= 8.8min
[CV 5/5] END learning_rate=0.05, max_depth=12, n_estimators=500, random_state=81, reg_alpha=100;, score=-0.001 total time= 8.3min
[CV 1/5] END learning_rate=0.05, max_depth=12, n_estimators=1000, random_state=81, reg_alpha=100;, score=-0.003 total time= 9.3min
[CV 1/5] END learning_rate=0.1, max_depth=6, n_estimators=500, random_state=81, reg_alpha=10;, score=-0.003 total time= 3.5min
[CV 1/5] END learning_rate=0.1, max_depth=6, n_estimators=1000, random_state=81, reg_alpha=10;, s

In [53]:
params_res = pd.DataFrame([res_sliding[i]['params'] for i in res_sliding])
params_res['date'] = [i for i in res_sliding]

params_res.to_excel(f'SW_{pair}_PARAMS_FREQ{freq}_BEST_BOOSTING.xlsx', index = None)

In [54]:
sliding_results = pd.DataFrame({'date': [i for i in res_sliding.keys()],
                                'mae': [res_sliding[i]['mae'] for i, j in res_sliding.items()],
                                'rmae': [res_sliding[i]['rmae'] for i, j in res_sliding.items()],
                                'recall': [res_sliding[i]['recall'] for i, j in res_sliding.items()],
                                'precision': [res_sliding[i]['precision'] for i, j in res_sliding.items()],
                                'f1': [res_sliding[i]['f1'] for i, j in res_sliding.items()],
                                'r2': [res_sliding[i]['r2'] for i, j in res_sliding.items()], 
                                'r2_rw': [res_sliding[i]['r2_rw'] for i, j in res_sliding.items()],
                                'tstat_rw': [res_sliding[i]['tstat_rw'] for i, j in res_sliding.items()],
                                'tstat_avg': [res_sliding[i]['tstat_avg'] for i, j in res_sliding.items()],
                                'avg_zero_test': [res_sliding[i]['avg_zero_test'] for i, j in res_sliding.items()],
                                'avg_zero_pred': [res_sliding[i]['avg_zero_pred'] for i, j in res_sliding.items()]} 
                               )

sliding_results.to_excel(f'SW_{pair}_RESULTS_FREQ{freq}_BEST_BOOSTING_SELECTION.xlsx', index = None)


In [546]:
res_expanding = expanding_window_cv(df, 'rv_LT', 7, 'received_time_r', 1, XGBRegressor, params, freq, pair,
                                    scaling = True)

2023-04-03 2023-04-09 [datetime.date(2023, 4, 10)]
2023-04-03 2023-04-10 [datetime.date(2023, 4, 11)]
2023-04-03 2023-04-11 [datetime.date(2023, 4, 12)]
2023-04-03 2023-04-12 [datetime.date(2023, 4, 13)]
2023-04-03 2023-04-13 [datetime.date(2023, 4, 14)]
2023-04-03 2023-04-14 [datetime.date(2023, 4, 15)]
2023-04-03 2023-04-15 [datetime.date(2023, 4, 16)]
2023-04-03 2023-04-16 [datetime.date(2023, 4, 17)]
2023-04-03 2023-04-17 [datetime.date(2023, 4, 18)]
2023-04-03 2023-04-18 [datetime.date(2023, 4, 19)]
2023-04-03 2023-04-19 [datetime.date(2023, 4, 20)]
2023-04-03 2023-04-20 [datetime.date(2023, 4, 21)]
2023-04-03 2023-04-21 [datetime.date(2023, 4, 22)]
2023-04-03 2023-04-22 [datetime.date(2023, 4, 23)]


In [244]:
expanding_results = pd.DataFrame({'date': [i for i in res_expanding.keys()],
                                'mae': [res_expanding[i]['mae'] for i, j in res_expanding.items()],
                                'rmae': [res_expanding[i]['rmae'] for i, j in res_expanding.items()],
                                'recall': [res_expanding[i]['recall'] for i, j in res_expanding.items()],
                                'precision': [res_expanding[i]['precision'] for i, j in res_expanding.items()],
                                'f1': [res_expanding[i]['f1'] for i, j in res_expanding.items()],
                                'r2': [res_expanding[i]['r2'] for i, j in res_expanding.items()], 
                                'r2_rw': [res_expanding[i]['r2_rw'] for i, j in res_expanding.items()],
                                'tstat_rw': [res_expanding[i]['tstat_rw'] for i, j in res_expanding.items()],
                                'tstat_avg': [res_expanding[i]['tstat_avg'] for i, j in res_expanding.items()],
                                'avg_zero_test': [res_expanding[i]['avg_zero_test'] for i, j in res_expanding.items()],
                                'avg_zero_pred': [res_expanding[i]['avg_zero_pred'] for i, j in res_expanding.items()]} 
                               )

expanding_results.to_excel(f'EW_{pair}_RESULTS_FREQ{freq}_BEST_BOOSTING_SELECTION.xlsx', index = None)
