In [4]:
import pandas as pd
import random 
import json


from functions import join_stocks_crypto, generate_rand_portfolios
from functions_post_clustering import simulate_evaluate_portfolio_subset

In [5]:
#GET THE DATA IN
df_all_stocks = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data.csv', index_col='Date')

joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left') #mode - either do left with crypto and fill NA for stocks or do left on stocks and leave out some dates for cryptos
joined_df.index = pd.to_datetime(joined_df.index)

returns_all = joined_df.pct_change()


tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Reassemble the portfolio jsons for minvar
min_var_portfolios = dict()
for i in range(1,1000):
    with open(f'min_variance_portfolio_jsons/my_dict{i}.json') as f:
        port = json.load(f)
        min_var_portfolios.update(port)

with open(f"full_optimized_min_variance.json", "w") as f:
    json.dump(min_var_portfolios, f, indent=4)

SIMULATE AND EVALUATE

In [6]:
simulations_results_dict_rand, subset_statistics_df_rand, _ = simulate_evaluate_portfolio_subset(random_portfolios, returns_all, n_sims=100, t=100, distribution_model='multivar_norm')

simulations_results_dict_minvar, subset_statistics_df_minvar, _ = simulate_evaluate_portfolio_subset(min_var_portfolios, returns_all, n_sims=100, t=100, distribution_model='multivar_norm')

                           statistic        p_value  normal
mean_cumulative_return    618.729744  4.411062e-135   False
mean_daily_return         618.729744  4.411062e-135   False
std_cumulative_return     787.286530  1.103830e-171   False
std_daily_return          775.100921  4.886225e-169   False
sharpe_daily                5.107170   7.780225e-02    True
sharpe_cumulative           6.063570   4.822946e-02   False
sharpe_annual               5.107170   7.780225e-02    True
sharpe_cumulative_annual    6.063570   4.822946e-02   False
VaR                       568.763064  3.123704e-124   False
CVaR                      459.647092  1.544906e-100   False
                           statistic        p_value  normal
mean_cumulative_return    430.098636   4.029656e-94   False
mean_daily_return         430.098636   4.029656e-94   False
std_cumulative_return     564.590080  2.516644e-123   False
std_daily_return          555.698375  2.146007e-121   False
sharpe_daily               89.141050   4

In [None]:
from scipy.stats import f_oneway
from scipy.stats import kruskal


def kruskal_anova_test(subset_stats_dfs:list, metrics=['sharpe_annual'], test='anova'):
    tests_results = dict()
    for metric in metrics:
        groups = [subset_df[metric] for subset_df in subset_stats_dfs]
    
        if test == 'anova':
            test_stat, test_p = f_oneway(*groups)
        elif test == 'kruskal':
            test_stat, test_p = kruskal(*groups)

        tests_results[metric] = {'test_stat': round(float(test_stat), 4), 'test_p': round(float(test_p), 4)}
    
    return pd.DataFrame(tests_results)

In [None]:
kruskal_anova_test([subset_statistics_df_rand, subset_statistics_df_minvar], metrics=['sharpe_annual', 'sharpe_cumulative_annual'], test='kruskal')

Unnamed: 0,sharpe_annual,sharpe_cumulative_annual
test_stat,233.7857,239.2611
test_p,0.0,0.0


In [23]:
#Dunn-Bonferroni Test
import scikit_posthocs as sp


group1 = subset_statistics_df_rand['sharpe_annual']
group2 = subset_statistics_df_minvar['sharpe_annual']


# Combine into one Series
data = pd.concat([group1, group2], ignore_index=True)
groups = ['Group 1'] * len(group1) + ['Group 2'] * len(group2)

#sp.posthoc_dunn([group1, group2], p_adjust='bonferroni')

df = pd.DataFrame({'value': data, 'group': groups})
sp.posthoc_dunn(df, val_col='value', group_col='group', p_adjust='bonferroni')

Unnamed: 0,Group 1,Group 2
Group 1,1.0,8.90791e-53
Group 2,8.90791e-53,1.0
