In [2]:
import sys
import os
import numpy as np
import pandas as pd
from IPython.utils import io
current_dir = os.path.dirname(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\forecasting\forecasts")
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
sys.path.insert(0, parent_dir)
sys.path.append(os.path.dirname(os.path.abspath(r"C:\Users\JNoot\Documents\University\Bachelor Thesis\New Code\forecasting\forecasts\perform_forecasts.ipynb")) + '/..')

from forecasting.data_preprocessing.obtain_data import obtainData
from forecasting.forecasts.in_sample_forecasts import in_sample_forecast
from forecasting.forecasts.out_of_sample_forecasts import forecast
from forecasting.utils.squared_errors import get_errors
from forecasting.utils.qlike import get_qlike
from forecasting.utils.model_confidence_set import update_mcs_count
from forecasting.utils.market_cycles import define_market_phases, cycle_errors, segment_data_by_phases, perform_kruskal, perform_dunn_test
from scipy.stats import skew
from tqdm import notebook


def perform_models(coins, in_sample, training_size=0.7):
    results = {
        'coin': [],
        'LLF_rmse': [], 'RF_rmse': [], 'GARCH_rmse': [], 'GJR_rmse': [], 'HAR-RV_rmse': [],
        'LLF_mae': [], 'RF_mae': [], 'GARCH_mae': [], 'GJR_mae': [], 'HAR-RV_mae': [],
        'LLF_qlike': [], 'RF_qlike': [], 'GARCH_qlike': [], 'GJR_qlike': [], 'HAR-RV_qlike': []
    }

    mcs_counts_rmse = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcs_counts_qlike = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcs_counts_utility = {model: 0 for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']}
    mcsr_rmse = {}
    mcsr_qlike = {}
    mcsr_utility = {}

    for coin in notebook.tqdm(coins):
        print(coin)
        with io.capture_output() as captured:
            X, Y, X_ridge = obtainData(coin)
        if in_sample:
            predictions, Y_test = in_sample_forecast(X, Y, X_ridge)
        else:
            predictions, Y_test = forecast(X, Y, X_ridge, training_size)
        
        mse, mae, rmse = get_errors(predictions, Y_test)
        qlike = get_qlike(predictions, Y_test)

        results['coin'].append(coin)
        for metric, res_dict in zip(['rmse', 'mae', 'qlike'], [rmse, mae, qlike]):
            for model in ['LLF', 'RF', 'GARCH', 'GJR', 'HAR-RV']:
                results[f'{model}_{metric}'].append(res_dict[model])

        mcs_counts_rmse, mcs_counts_qlike, mcs_counts_utility = update_mcs_count(
            predictions, Y_test, mcs_counts_rmse=mcs_counts_rmse, mcs_counts_qlike=mcs_counts_qlike, mcs_counts_utility=mcs_counts_utility
        )

        print('rmse', rmse)
        print('qlike', qlike)
        print('counts', mcs_counts_rmse, mcs_counts_qlike, mcs_counts_utility)
        
        if not in_sample:
            # Define market phases
            market_phases = define_market_phases(X['Close'])
            total_errors = cycle_errors(predictions, Y_test)
            errors_by_phase, phase_counts = segment_data_by_phases(total_errors, market_phases)

            # Print the number of observations in each phase
            print("Number of observations in each phase:")
            for phase, count in phase_counts.items():
                print(f"{phase}: {count}")

            if coin != 'Tether':
                # Perform Kruskal-Wallis and Dunn's tests
                kruskal_results = perform_kruskal(errors_by_phase)
                dunn_results = perform_dunn_test(errors_by_phase)

                # Print statistics, Kruskal-Wallis results, and Dunn's test results for each model
                for model, phase_data in errors_by_phase.items():
                    print(f"Statistics for model {model} on coin {coin}:")
                    for phase, errors in phase_data.items():
                        errors_array = np.array(errors)
                        print(f"{phase}:")
                        print(f"  Mean: {np.mean(errors_array)}")
                        print(f"  Standard Deviation: {np.std(errors_array)}")
                        print(f"  Skewness: {skew(errors_array)}")

                    # Print Kruskal-Wallis results
                    kruskal_result = kruskal_results[model]
                    print(f"\nKruskal-Wallis results for model {model} on coin {coin}:")
                    print(f"Statistic = {kruskal_result['stat']}, p-value = {kruskal_result['p_value']}")

                    # Print Dunn's test results
                    dunn_result = dunn_results[model]
                    print(f"\nDunn's test results for model {model} on coin {coin} (p-values):")
                    print(dunn_result)
    
    # Calculate MCSR
    for model in predictions.keys():
        mcsr_rmse[model] = mcs_counts_rmse[model] / len(coins)
        mcsr_qlike[model] = mcs_counts_qlike[model] / len(coins)
        mcsr_utility[model] = mcs_counts_utility[model] / len(coins)

    results_df = pd.DataFrame(results)

    # Generate and print LaTeX tables
    latex_tables = {
        'RMSE': results_df[['coin', 'LLF_rmse', 'RF_rmse', 'GARCH_rmse', 'GJR_rmse', 'HAR-RV_rmse']].to_latex(index=False, float_format="%.3f"),
        'MAE': results_df[['coin', 'LLF_mae', 'RF_mae', 'GARCH_mae', 'GJR_mae', 'HAR-RV_mae']].to_latex(index=False, float_format="%.3f"),
        'QLIKE': results_df[['coin', 'LLF_qlike', 'RF_qlike', 'GARCH_qlike', 'GJR_qlike', 'HAR-RV_qlike']].to_latex(index=False, float_format="%.3f"),
        'MCS Counts': pd.DataFrame([mcsr_rmse, mcsr_qlike, mcsr_utility], index=['RMSE', 'QLIKE', 'Utility']).to_latex(float_format="%.3f")
    }
    
    return latex_tables

coins = ["Bitcoin", "Ethereum", "Tether", "Binance Coin", "Bitcoin Cash", "Litecoin", "Internet Computer", "Polygon"]

# In-sample Forecasts
latex_tables_in_sample = perform_models(coins, in_sample=True)
for name, table in latex_tables_in_sample.items():
    print(f"{name} Table:")
    print(table)

# Out-of-Sample Forecasts
latex_tables_out_sample = perform_models(coins, in_sample=False, training_size=0.70)
for name, table in latex_tables_out_sample.items():
    print(f"{name} Table:")
    print(table)

  0%|          | 0/8 [00:00<?, ?it/s]

Bitcoin


KeyboardInterrupt: 