In [1]:
import pandas as pd
import random 
import json
import numpy as np

from functions import join_stocks_crypto, generate_rand_portfolios
from functions_post_clustering import simulate_evaluate_portfolio_subset, reoptimize_weights, kruskal_anova_test, dunn_bonferroni, run_simulation

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [3]:
#GET THE DATA IN
df_all_stocks = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data_new.csv', index_col='timestamp')
joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left')
joined_df.index = pd.to_datetime(joined_df.index)
returns_all = joined_df.pct_change()



df_stocks_24 = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df_24 = pd.read_csv('cryptos_data_new.csv', index_col='timestamp')
joined_df_24 = join_stocks_crypto(cryptos_df_24, df_stocks_24, mode = 'stocks_left')
joined_df_24.index = pd.to_datetime(joined_df_24.index)
returns_all_24 = joined_df_24.pct_change()


tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Reassemble the portfolio jsons for minvar
min_var_portfolios = dict()
for i in range(1,1000):
    with open(f'min_variance_portfolio_jsons/my_dict{i}.json') as f:
        port = json.load(f)
        min_var_portfolios.update(port)


rand_ports_standard = random_portfolios.copy()
rand_ports_maxsharpe = reoptimize_weights(joined_df, random_portfolios, how='max_sharpe', min_weight=0.01)
rand_ports_equal_weights = reoptimize_weights(joined_df, random_portfolios, how='equal_weights', min_weight=0.01)

minvar_port_standard = min_var_portfolios.copy()
minvar_ports_maxsharpe = reoptimize_weights(joined_df, min_var_portfolios, how='max_sharpe', min_weight=0.01)
minvar_ports_equal_weights = reoptimize_weights(joined_df, min_var_portfolios, how='equal_weights', min_weight=0.01)


with open('all_optimized_sets_for_simulation.json') as f:
    crypto_supplemented_sets = json.load(f)


portfolio_sets = {#'rand_ports_standard': rand_ports_standard,
                  'rand_ports_maxsharpe': rand_ports_maxsharpe, 
                  'rand_ports_equalw': rand_ports_equal_weights,
                  #'minvar_port_standard': minvar_port_standard,  
                  'minvar_ports_maxsharpe': minvar_ports_maxsharpe, 
                  'minvar_ports_equalw': minvar_ports_equal_weights}

all_sets = portfolio_sets | crypto_supplemented_sets

equalw_sets = {k: v for k, v in all_sets.items() if 'equalw' in k}
maxsharpe_sets = {k: v for k, v in all_sets.items() if 'equalw' not in k}

In [4]:
from functions_post_clustering import simulate_evaluate_portfolio_subset

In [None]:
simulate_evaluate_portfolio_subset(portfolios_subset=)

In [None]:
# def evaluate_portfolios_out_of_sample(portfolios_subset, out_of_sample_returns, n_sims=1000, 
#                                      winsorize=True, winsorize_limits=(0.01, 0.01)):
#     """
#     Evaluate a set of portfolios using out-of-sample return data
    
#     Parameters:
#     - portfolios_subset: Dictionary of portfolios {portfolio_id: {ticker: weight}}
#     - out_of_sample_returns: DataFrame with out-of-sample return data
#     - n_sims: Number of simulations to run
#     - winsorize: Whether to winsorize returns
#     - winsorize_limits: Limits for winsorizing
    
#     Returns:
#     - DataFrame with performance metrics for each portfolio
#     """
#     results = {}
    
#     for portfolio_id, portfolio_dict in portfolios_subset.items():
#         # Identify tickers in this portfolio
#         portfolio_tickers = list(portfolio_dict.keys())
        
#         # Check if all tickers are in the out-of-sample data
#         missing_tickers = set(portfolio_tickers) - set(out_of_sample_returns.columns)
#         if missing_tickers:
#             print(f"Portfolio {portfolio_id}: Missing tickers {missing_tickers}. Skipping.")
#             continue
        
#         # Run simulation with out-of-sample data
#         t = len(out_of_sample_returns)  # Use full length of out-of-sample data
#         portfolio_sims = run_simulation(
#             portfolio_dict, 
#             out_of_sample_returns, 
#             n_sims=n_sims, 
#             t=t,
#             distribution_model='bootstrap',  # Bootstrap from out-of-sample data
#             winsorize=winsorize,
#             winsorize_limits=winsorize_limits
#         )
        
#         # Calculate performance metrics
#         # 1. Total Return
#         initial_value = 100
#         final_values = portfolio_sims[-1, :]
#         total_returns = (final_values / initial_value) - 1
        
#         # 2. Properly calculated annualized returns
#         years = t / 252
#         annualized_returns = (final_values / initial_value) ** (1 / years) - 1
        
#         # 3. Daily returns for risk calculations
#         daily_returns = (portfolio_sims[1:, :] - portfolio_sims[:-1, :]) / portfolio_sims[:-1, :]
        
#         # 4. Volatility (annualized)
#         volatility = np.std(daily_returns, axis=0) * np.sqrt(252)
        
#         # 5. Sharpe Ratio (assuming risk-free rate of 0.02 annual, or about 0.00008 daily)
#         rf_annual = 0.02
#         rf_daily = rf_annual / 252
#         sharpe_ratio = (np.mean(daily_returns, axis=0) - rf_daily) / np.std(daily_returns, axis=0) * np.sqrt(252)
        
#         # 6. Maximum Drawdown
#         max_drawdowns = np.zeros(n_sims)
#         for sim in range(n_sims):
#             peak = np.maximum.accumulate(portfolio_sims[:, sim])
#             drawdown = (portfolio_sims[:, sim] - peak) / peak
#             max_drawdowns[sim] = np.min(drawdown)
        
#         # 7. Sortino Ratio
#         downside_returns = np.minimum(daily_returns - rf_daily, 0)
#         downside_deviation = np.sqrt(np.mean(np.square(downside_returns), axis=0))
#         sortino_ratio = (np.mean(daily_returns, axis=0) - rf_daily) / downside_deviation * np.sqrt(252)
        
#         # 8. Value at Risk (95%)
#         var_95 = np.percentile(daily_returns, 5, axis=0) * initial_value
        
#         # Store results for this portfolio
#         results[portfolio_id] = {
#             'mean_total_return': np.mean(total_returns),
#             'mean_annualized_return': np.mean(annualized_returns),
#             'mean_volatility': np.mean(volatility),
#             'mean_sharpe_ratio': np.mean(sharpe_ratio),
#             'mean_sortino_ratio': np.mean(sortino_ratio),
#             'mean_max_drawdown': np.mean(max_drawdowns),
#             'var_95': np.mean(var_95),
#             # Include various percentiles to understand distribution
#             'return_10th_percentile': np.percentile(annualized_returns, 10),
#             'return_median': np.median(annualized_returns),
#             'return_90th_percentile': np.percentile(annualized_returns, 90)
#         }
    
#     # Convert results to DataFrame
#     results_df = pd.DataFrame(results).T
    
#     # Sort by Sharpe ratio (risk-adjusted performance)
#     results_df = results_df.sort_values('mean_sharpe_ratio', ascending=False)
    
#     return results_df