In [2]:
import sys
import os
import numpy as np
import pandas as pd
from IPython.utils import io
#Adjust directories to own paths
current_dir = os.path.dirname(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\forecasting\forecasts")
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.insert(0, parent_dir)
sys.path.append(os.path.dirname(os.path.abspath(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\forecasting\forecasts\perform_forecasts.ipynb")) + '/..')


from forecasting.data_preprocessing.obtain_data import obtainData
from forecasting.forecasts.in_sample_forecasts import in_sample_forecast
from forecasting.forecasts.out_of_sample_forecasts import forecast
from forecasting.utils.squared_errors import get_errors
from forecasting.utils.qlike import get_qlike
from forecasting.utils.model_confidence_set import update_mcs_count
from forecasting.utils.market_cycles import define_market_phases, cycle_errors, segment_data_by_phases, perform_kruskal, perform_dunn_test
from forecasting.utils.large_mid_cap import get_grouped_errors, initialize_errors, collect_errors, compute_statistics
from forecasting.utils.utility_benefits import expected_utility
from scipy.stats import skew
from tqdm import notebook


def perform_models(coins, in_sample, training_size=0.7):
    results = {
        'coin': [],
        'LLF_rmse': [], 'RF_rmse': [], 'GARCH_rmse': [], 'GJR_rmse': [], 'HAR-RV_rmse': [],
        'LLF_mae': [], 'RF_mae': [], 'GARCH_mae': [], 'GJR_mae': [], 'HAR-RV_mae': [],
        'LLF_qlike': [], 'RF_qlike': [], 'GARCH_qlike': [], 'GJR_qlike': [], 'HAR-RV_qlike': [],
        'LLF_utility': [], 'RF_utility': [], 'GARCH_utility': [], 'GJR_utility': [], 'HAR-RV_utility': []
    }

    mcs_counts_rmse = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcs_counts_qlike = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcs_counts_utility = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcs_counts_mae = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcsr_rmse = {}
    mcsr_qlike = {}
    mcsr_utility = {}
    mcsr_mae = {}

    # Large vs. Mid cap.
    large_cap_coins = ["Bitcoin", "Ethereum", "Tether", "Binance Coin"]
    mid_cap_coins = ["Bitcoin Cash", "Litecoin", "Internet Computer", "Polygon"]
    large_cap_errors = initialize_errors()
    mid_cap_errors = initialize_errors()

    for coin in notebook.tqdm(coins):
        print(coin)
        with io.capture_output() as captured:
            X, Y, X_ridge = obtainData(coin)
        if in_sample:
            predictions, Y_test = in_sample_forecast(X, Y, X_ridge)
        else:
            predictions, Y_test = forecast(coin, X, Y, X_ridge, training_size)
        
        mse, mae, rmse = get_errors(predictions, Y_test)
        qlike = get_qlike(predictions, Y_test)
        utility = expected_utility(predictions, Y_test)

        results['coin'].append(coin)
        for metric, res_dict in zip(['rmse', 'mae', 'qlike', 'utility'], [rmse, mae, qlike, utility]):
            for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']:
                results[f'{model}_{metric}'].append(res_dict[model])

        mcs_counts_rmse, mcs_counts_mae, mcs_counts_qlike, mcs_counts_utility = update_mcs_count(
            predictions, Y_test, mcs_counts_rmse=mcs_counts_rmse,  mcs_counts_mae= mcs_counts_mae, mcs_counts_qlike=mcs_counts_qlike, mcs_counts_utility=mcs_counts_utility
        )

        large_cap_errors, mid_cap_errors = collect_errors(predictions, Y_test, coin, large_cap_coins, mid_cap_coins, large_cap_errors, mid_cap_errors)

        print('rmse', rmse)
        print('qlike', qlike)
        print('counts', mcs_counts_rmse, mcs_counts_mae, mcs_counts_qlike, mcs_counts_utility)
        
        if not in_sample:
            # Define market phases
            market_phases = define_market_phases(X['Close'])
            total_errors = cycle_errors(predictions, Y_test)
            errors_by_phase, phase_counts = segment_data_by_phases(total_errors, market_phases)

            # Print the number of observations in each phase
            print("Number of observations in each phase:")
            for phase, count in phase_counts.items():
                print(f"{phase}: {count}")

            if coin != 'Tether':
                # Perform Kruskal-Wallis and Dunn's tests
                kruskal_results = perform_kruskal(errors_by_phase)
                dunn_results = perform_dunn_test(errors_by_phase)

                # Print statistics, Kruskal-Wallis results, and Dunn's test results for each model
                for model, phase_data in errors_by_phase.items():
                    print(f"Statistics for model {model} on coin {coin}:")
                    for phase, errors in phase_data.items():
                        errors_array = np.array(errors)
                        print(f"{phase}:")
                        print(f"  Mean: {np.mean(errors_array)}")
                        print(f"  Standard Deviation: {np.std(errors_array)}")
                        print(f"  Skewness: {skew(errors_array)}")

                    # Print Kruskal-Wallis results
                    kruskal_result = kruskal_results[model]
                    print(f"\nKruskal-Wallis results for model {model} on coin {coin}:")
                    print(f"Statistic = {kruskal_result['stat']}, p-value = {kruskal_result['p_value']}")

                    # Print Dunn's test results
                    dunn_result = dunn_results[model]
                    print(f"\nDunn's test results for model {model} on coin {coin} (p-values):")
                    print(dunn_result)
    
    # Calculate MCSR
    for model in predictions.keys():
        mcsr_rmse[model] = mcs_counts_rmse[model] / len(coins)
        mcsr_mae[model] = mcs_counts_mae[model] / len(coins)
        mcsr_qlike[model] = mcs_counts_qlike[model] / len(coins)
        mcsr_utility[model] = mcs_counts_utility[model] / len(coins)

    results_df = pd.DataFrame(results)

    large_cap_statistics = compute_statistics(large_cap_errors)
    mid_cap_statistics = compute_statistics(mid_cap_errors)

    print("Large Cap Errors:")
    print(large_cap_statistics)
    print("\nMid Cap Errors:")
    print(mid_cap_statistics)

    # Generate and print LaTeX tables
    latex_tables = {
        'RMSE': results_df[['coin', 'LLF_rmse', 'RF_rmse', 'GARCH_rmse', 'GJR_rmse', 'HAR-RV_rmse']].to_latex(index=False, float_format="%.3f"),
        'MAE': results_df[['coin', 'LLF_mae', 'RF_mae', 'GARCH_mae', 'GJR_mae', 'HAR-RV_mae']].to_latex(index=False, float_format="%.3f"),
        'QLIKE': results_df[['coin', 'LLF_qlike', 'RF_qlike', 'GARCH_qlike', 'GJR_qlike', 'HAR-RV_qlike']].to_latex(index=False, float_format="%.3f"),
        'Utility': results_df[['coin', 'LLF_utility', 'RF_utility', 'GARCH_utility', 'GJR_utility', 'HAR-RV_utility']].to_latex(index=False, float_format="%.3f"),
        'MCSR': pd.DataFrame([mcsr_rmse, mcsr_mae, mcsr_qlike, mcsr_utility], index=['RMSE', 'MAE', 'QLIKE', 'Utility']).to_latex(float_format="%.3f")
    }
    
    #Save tables to results
    current_dir = os.getcwd()
    # Construct the path to the 'feature_importances' directory within 'results' in 'forecasting'
    tables_dir = os.path.join(current_dir, "..", "Results", "tables")

    for name, table in latex_tables.items():
        feature = name.lower()
        if in_sample:
            save_path = os.path.join(tables_dir, f"{feature}_in_sample.tex")
        else:
            save_path = os.path.join(tables_dir, f"{feature}_out-of_sample.tex")
        with open(save_path, 'w') as f:
            f.write(table)

    return latex_tables

coins = ["Bitcoin", "Ethereum", "Tether", "Binance Coin", "Bitcoin Cash", "Litecoin", "Internet Computer", "Polygon"]

#In-sample Forecasts
latex_tables_in_sample = perform_models(coins, in_sample=True)
for name, table in latex_tables_in_sample.items():
    print(f"{name} Table:")
    print(table)

# Out-of-Sample Forecasts
latex_tables_out_sample = perform_models(coins, in_sample=False, training_size=0.70)
for name, table in latex_tables_out_sample.items():
    print(f"{name} Table:")
    print(table)

  0%|          | 0/8 [00:00<?, ?it/s]

Bitcoin



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 500, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 2, 'lam': 0.1}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 3}
GARCH estimates: mu          0.103153
omega       1.094889
alpha[1]    0.126730
beta[1]     0.748364
Name: params, dtype: float64
GJR estimates: mu          0.073986
omega       0.969059
alpha[1]    0.061690
gamma[1]    0.098674
beta[1]     0.776676
Name: params, dtype: float64
rmse {'LLF': 0.2906529584400876, 'RF': 0.36178258533084734, 'GARCH': 1.5687686427836331, 'GJR': 1.566283444242782, 'HAR-RV': 1.7092171112536745}
qlike {'LLF': 0.03467027932260908, 'RF': 0.006711548102553796, 'GARCH': 0.20963702985313204, 'GJR': 0.20999203199254798, 'HAR-RV': 0.28617590273957305}
counts {'LLF': 1, 'RF': 0, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 1, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 0, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 0, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Ethereum



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 200, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 4, 'lam': 0.1}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 5}
GARCH estimates: mu          0.120234
omega       0.419219
alpha[1]    0.083433
beta[1]     0.886303
Name: params, dtype: float64
GJR estimates: mu          0.115785
omega       0.709015
alpha[1]    0.069080
gamma[1]    0.063051
beta[1]     0.845340
Name: params, dtype: float64
rmse {'LLF': 0.3133823423270757, 'RF': 0.505960725507872, 'GARCH': 2.1116656527792452, 'GJR': 2.107570919318288, 'HAR-RV': 2.1548991281055176}
qlike {'LLF': 0.008005485691617745, 'RF': 0.008077135742726154, 'GARCH': 0.2591446000139814, 'GJR': 0.2596579160074702, 'HAR-RV': 0.27293158110349985}
counts {'LLF': 2, 'RF': 0, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 2, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 1, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 1, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Tether



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 400, 'max_depth': 7, 'min_samples_split': 5, 'min_samples_leaf': 3, 'lam': 0.05}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 100, 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 3}
GARCH estimates: mu         -0.009473
omega       0.019960
alpha[1]    0.066390
beta[1]     0.840968
Name: params, dtype: float64
GJR estimates: mu         -0.008262
omega       0.021255
alpha[1]    0.074656
gamma[1]   -0.011952
beta[1]     0.832614
Name: params, dtype: float64
rmse {'LLF': 0.1558703380669229, 'RF': 0.14823983699018478, 'GARCH': 0.39573197769992563, 'GJR': 0.39574901647516486, 'HAR-RV': 0.40867756255548854}
qlike {'LLF': 0.09025894557787212, 'RF': 0.017849018909674722, 'GARCH': 0.25562901518668996, 'GJR': 0.25561517283302615, 'HAR-RV': 0.44998066193532854}
counts {'LLF': 3, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 2, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 1, 'RF': 3, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 1, 'RF': 3, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Binance Coin



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 100, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 3, 'lam': 0.05}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 200, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 3}
GARCH estimates: mu          0.087400
omega       0.411812
alpha[1]    0.157216
beta[1]     0.822858
Name: params, dtype: float64
GJR estimates: mu          0.076187
omega       0.462729
alpha[1]    0.144596
gamma[1]    0.039700
beta[1]     0.811745
Name: params, dtype: float64
rmse {'LLF': 0.29674514873123203, 'RF': 0.4938681739947354, 'GARCH': 2.0861387435596055, 'GJR': 2.084952719065336, 'HAR-RV': 2.448547450905265}
qlike {'LLF': 0.008827541930606512, 'RF': 0.008249175338073352, 'GARCH': 0.2574702414257507, 'GJR': 0.25812202415098245, 'HAR-RV': 0.5457753133024263}
counts {'LLF': 4, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 3, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 2, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 2, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Bitcoin Cash



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 500, 'max_depth': 10, 'min_samples_split': 4, 'min_samples_leaf': 2, 'lam': 0.05}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 400, 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 3}
GARCH estimates: mu         -0.170957
omega       5.757143
alpha[1]    0.447733
beta[1]     0.366845
Name: params, dtype: float64
GJR estimates: mu         -0.070567
omega       5.952978
alpha[1]    0.584179
gamma[1]   -0.312458
beta[1]     0.365147
Name: params, dtype: float64
rmse {'LLF': 0.27176065635843016, 'RF': 0.63874131133069, 'GARCH': 2.8238124010134724, 'GJR': 2.8312938782614547, 'HAR-RV': 2.765666413278824}
qlike {'LLF': 0.004904082565986802, 'RF': 0.011126003085547426, 'GARCH': 0.3400276829918904, 'GJR': 0.3438380938297809, 'HAR-RV': 0.2787467937682345}
counts {'LLF': 5, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 4, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 3, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 3, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Litecoin



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 400, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 3, 'lam': 0.1}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 3}
GARCH estimates: mu         -0.025684
omega       0.950534
alpha[1]    0.115055
beta[1]     0.835366
Name: params, dtype: float64
GJR estimates: mu         -0.002165
omega       0.902088
alpha[1]    0.136316
gamma[1]   -0.039828
beta[1]     0.838694
Name: params, dtype: float64
rmse {'LLF': 0.3412214161354659, 'RF': 0.720801305305359, 'GARCH': 2.742462307869789, 'GJR': 2.7424635325924887, 'HAR-RV': 2.8901907913730724}
qlike {'LLF': 0.0071941401578613385, 'RF': 0.008298154985583497, 'GARCH': 0.3374623332779434, 'GJR': 0.3372250475401307, 'HAR-RV': 0.42404092450229247}
counts {'LLF': 6, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 5, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 4, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 4, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Internet Computer



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 200, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 5, 'lam': 0.01}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 400, 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 5}
GARCH estimates: mu         -0.130478
omega       0.802847
alpha[1]    0.119408
beta[1]     0.858346
Name: params, dtype: float64
GJR estimates: mu         -0.085877
omega       0.849500
alpha[1]    0.143828
gamma[1]   -0.041087
beta[1]     0.853303
Name: params, dtype: float64
rmse {'LLF': 0.10596767606638481, 'RF': 1.067258658880269, 'GARCH': 4.166364671365065, 'GJR': 4.163997256791119, 'HAR-RV': 3.7247971850811714}
qlike {'LLF': 0.0006932141850227527, 'RF': 0.010937914685072018, 'GARCH': 0.5832592130640931, 'GJR': 0.5825595647181323, 'HAR-RV': 0.3126449045904578}
counts {'LLF': 7, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 6, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 5, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 5, 'RF': 4, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Polygon



  0%|          | 0/50 [00:00<?, ?it/s]

LocalLinearForest {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 4, 'lam': 0.1}



  0%|          | 0/50 [00:00<?, ?it/s]

RandomForest {'n_estimators': 100, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 2}
GARCH estimates: mu         -0.008827
omega       0.852973
alpha[1]    0.132202
beta[1]     0.842927
Name: params, dtype: float64
GJR estimates: mu         -0.005100
omega       0.847115
alpha[1]    0.134224
gamma[1]   -0.004554
beta[1]     0.843385
Name: params, dtype: float64
rmse {'LLF': 0.38005488704928053, 'RF': 0.6558616723212058, 'GARCH': 3.4141703779394272, 'GJR': 3.4141229383044362, 'HAR-RV': 3.30327957881331}
qlike {'LLF': 0.007297256188757079, 'RF': 0.0073806709485041665, 'GARCH': 0.408734020016309, 'GJR': 0.408677398998186, 'HAR-RV': 0.3255837071643262}
counts {'LLF': 8, 'RF': 1, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 7, 'RF': 2, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 6, 'RF': 5, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0} {'LLF': 6, 'RF': 5, 'GARCH': 0, 'GJR': 0, 'HAR-RV': 0}
Large Cap Errors:
{'LLF': {'RMSLE': 0.2793840789234562, 'std': 0.35253902182056246, 'skew': 12.47841402