In [None]:
import pandas as pd
import random 
import json


from functions import join_stocks_crypto, generate_rand_portfolios
from functions_post_clustering import simulate_evaluate_portfolio_subset, reoptimize_weights, kruskal_anova_test, dunn_bonferroni

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [None]:
#GET THE DATA IN
df_all_stocks = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data.csv', index_col='Date')

joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left') #mode - either do left with crypto and fill NA for stocks or do left on stocks and leave out some dates for cryptos
joined_df.index = pd.to_datetime(joined_df.index)

returns_all = joined_df.pct_change()


tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Reassemble the portfolio jsons for minvar
min_var_portfolios = dict()
for i in range(1,1000):
    with open(f'min_variance_portfolio_jsons/my_dict{i}.json') as f:
        port = json.load(f)
        min_var_portfolios.update(port)

with open(f"full_optimized_min_variance.json", "w") as f:
    json.dump(min_var_portfolios, f, indent=4)


rand_ports_maxsharpe = reoptimize_weights(joined_df, random_portfolios, how='max_sharpe', min_weight=0.01)
rand_ports_equal_weights = reoptimize_weights(joined_df, random_portfolios, how='equal_weights', min_weight=0.01)
minvar_ports_maxsharpe = reoptimize_weights(joined_df, min_var_portfolios, how='max_sharpe', min_weight=0.01)
minvar_ports_equal_weights = reoptimize_weights(joined_df, min_var_portfolios, how='equal_weights', min_weight=0.01)

#random_portfolios - normal random
#min_var_portfolios - normal minvar
#rand_ports_maxsharpe - maxsharpe random
#rand_ports_equal_weights - equal random
#minvar_ports_maxsharpe - maxsharpe minvar
#minvar_ports_equal_weights - equal minvar



portfolio_sets = {'rand_ports_maxsharpe': rand_ports_maxsharpe, 
                  'rand_ports_equal_weights': rand_ports_equal_weights, 
                  'minvar_ports_maxsharpe': minvar_ports_maxsharpe, 
                  'minvar_ports_equal_weights': minvar_ports_equal_weights}

SIMULATE AND EVALUATE

In [None]:
subset_statistics_results_dfs = dict()
for key, portfolio_set in portfolio_sets.items():
    for time_period in [63, 126, 189, 252]:
        simulations_results_dict, subset_statistics_df, _ = simulate_evaluate_portfolio_subset(portfolio_set, returns_all, n_sims=500, t=time_period, distribution_model='multivar_norm')
        subset_statistics_results_dfs[key] = subset_statistics_df


# simulations_results_dict_rand, subset_statistics_df_rand, _ = simulate_evaluate_portfolio_subset(random_portfolios, returns_all, n_sims=100, t=100, distribution_model='multivar_norm')

# simulations_results_dict_minvar, subset_statistics_df_minvar, _ = simulate_evaluate_portfolio_subset(min_var_portfolios, returns_all, n_sims=100, t=100, distribution_model='multivar_norm')

Normality Test results: 

                           statistic        p_value  normal
mean_cumulative_return    547.725185  1.156078e-119   False
mean_daily_return         547.725185  1.156078e-119   False
std_cumulative_return     693.947247  2.047717e-151   False
std_daily_return          679.814968  2.399138e-148   False
sharpe_daily                9.278289   9.665962e-03   False
sharpe_cumulative           9.387455   9.152504e-03   False
sharpe_annual               9.278289   9.665962e-03   False
sharpe_cumulative_annual    9.387455   9.152504e-03   False
VaR                       532.683712  2.134036e-116   False
CVaR                      435.670847   2.484721e-95   False
sortino                    13.497312   1.172454e-03   False
sortino_annual             13.497312   1.172454e-03   False
Normality Test results: 

                           statistic        p_value  normal
mean_cumulative_return    452.994496   4.300263e-99   False
mean_daily_return         452.994496   4.300263e

In [None]:
# subset_stats_dfs = {'Random Portfolios Stats': subset_statistics_df_rand,
#                     'MinVar Portfolios Stats': subset_statistics_df_minvar}
subset_stats_dfs = subset_statistics_results_dfs

kruskal_anova_test(subset_stats_dfs, metrics='all', test='kruskal')

Unnamed: 0,test_stat,test_p
mean_cumulative_return,180.3194,0.0
mean_daily_return,180.3194,0.0
std_cumulative_return,1210.8222,0.0
std_daily_return,1208.9945,0.0
sharpe_daily,392.0214,0.0
sharpe_cumulative,397.0952,0.0
sharpe_annual,392.0214,0.0
sharpe_cumulative_annual,397.0952,0.0
VaR,1366.6781,0.0
CVaR,1334.6594,0.0


In [None]:
subset_stats_dfs = {'Random Portfolios Stats': subset_statistics_df_rand,
                    'MinVar Portfolios Stats': subset_statistics_df_minvar}

x = dunn_bonferroni(subset_stats_dfs, metrics='all')
x

{'mean_cumulative_return':                    MinVar Portfolios  Random Portfolios
 MinVar Portfolios                1.0                0.0
 Random Portfolios                0.0                1.0,
 'mean_daily_return':                    MinVar Portfolios  Random Portfolios
 MinVar Portfolios                1.0                0.0
 Random Portfolios                0.0                1.0,
 'std_cumulative_return':                    MinVar Portfolios  Random Portfolios
 MinVar Portfolios                1.0                0.0
 Random Portfolios                0.0                1.0,
 'std_daily_return':                    MinVar Portfolios  Random Portfolios
 MinVar Portfolios                1.0                0.0
 Random Portfolios                0.0                1.0,
 'sharpe_daily':                    MinVar Portfolios  Random Portfolios
 MinVar Portfolios                1.0                0.0
 Random Portfolios                0.0                1.0,
 'sharpe_cumulative':           