In [1]:
import pandas as pd
import random 
import json
import numpy as np

from functions import join_stocks_crypto, generate_rand_portfolios
from functions_post_clustering import simulate_evaluate_portfolio_subset, reoptimize_weights, kruskal_anova_test, dunn_bonferroni, run_simulation

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [3]:
#GET THE DATA IN
df_all_stocks = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data_new.csv', index_col='timestamp')

joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left')
joined_df.index = pd.to_datetime(joined_df.index)

returns_all = joined_df.pct_change()


tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Reassemble the portfolio jsons for minvar
min_var_portfolios = dict()
for i in range(1,1000):
    with open(f'min_variance_portfolio_jsons/my_dict{i}.json') as f:
        port = json.load(f)
        min_var_portfolios.update(port)

# with open(f"full_optimized_min_variance.json", "w") as f:
#     json.dump(min_var_portfolios, f, indent=4)

rand_ports_standard = random_portfolios.copy()
rand_ports_maxsharpe = reoptimize_weights(joined_df, random_portfolios, how='max_sharpe', min_weight=0.01)
rand_ports_equal_weights = reoptimize_weights(joined_df, random_portfolios, how='equal_weights', min_weight=0.01)

minvar_port_standard = min_var_portfolios.copy()
minvar_ports_maxsharpe = reoptimize_weights(joined_df, min_var_portfolios, how='max_sharpe', min_weight=0.01)
minvar_ports_equal_weights = reoptimize_weights(joined_df, min_var_portfolios, how='equal_weights', min_weight=0.01)


with open('all_optimized_sets_for_simulation.json') as f:
    crypto_supplemented_sets = json.load(f)


portfolio_sets = {#'rand_ports_standard': rand_ports_standard,
                  'rand_ports_maxsharpe': rand_ports_maxsharpe, 
                  'rand_ports_equalw': rand_ports_equal_weights,
                  #'minvar_port_standard': minvar_port_standard,  
                  'minvar_ports_maxsharpe': minvar_ports_maxsharpe, 
                  'minvar_ports_equalw': minvar_ports_equal_weights}

all_sets = portfolio_sets | crypto_supplemented_sets

equalw_sets = {k: v for k, v in all_sets.items() if 'equalw' in k}
maxsharpe_sets = {k: v for k, v in all_sets.items() if 'equalw' not in k}

In [None]:
test_subset = equalw_sets['minvar_w_cryptos_random_equalw']
new_return_df = pd.read_csv('out_sample_stocks_2024.csv')

{'portfolio_1': {'PEP': 0.05555555555555555,
  'TLK': 0.05555555555555555,
  'ULVR.L': 0.05555555555555555,
  'AD.AS': 0.05555555555555555,
  'KPN.AS': 0.05555555555555555,
  '600519.SS': 0.05555555555555555,
  '600941.SS': 0.05555555555555555,
  '601398.SS': 0.05555555555555555,
  '601857.SS': 0.05555555555555555,
  '2914.T': 0.05555555555555555,
  '8001.T': 0.05555555555555555,
  '9432.T': 0.05555555555555555,
  '9434.T': 0.05555555555555555,
  '0883.HK': 0.05555555555555555,
  '6288.HK': 0.05555555555555555,
  'AXSUSDT': 0.05555555555555555,
  'RNDRUSDT': 0.05555555555555555,
  'ROSEUSDT': 0.05555555555555555},
 'portfolio_2': {'PEP': 0.05555555555555555,
  'TLK': 0.05555555555555555,
  'AXSM': 0.05555555555555555,
  'SMMT': 0.05555555555555555,
  'TTE.PA': 0.05555555555555555,
  'SKFA.F': 0.05555555555555555,
  'ULVR.L': 0.05555555555555555,
  'AD.AS': 0.05555555555555555,
  'KPN.AS': 0.05555555555555555,
  '601398.SS': 0.05555555555555555,
  '2914.T': 0.05555555555555555,
  '9432.

In [None]:
def test_portfolios_out_of_sample(portfolios_subset, new_returns_df, n_sims=100):
    """
    Test existing portfolios with new returns data
    """
    out_of_sample_results = {}
    
    for portfolio_id, portfolio_dict in portfolios_subset.items():
        # Use only the tickers present in the portfolio
        portfolio_tickers = list(portfolio_dict.keys())
        
        # Check if all tickers are in the new data
        if not all(ticker in new_returns_df.columns for ticker in portfolio_tickers):
            print(f"Portfolio {portfolio_id}: Missing some tickers in new data, skipping")
            continue
            
        # Run simulation using new returns data
        portfolio_sims = run_simulation(
            portfolio_dict,
            new_returns_df,  # Use new returns data
            n_sims=n_sims, 
            t=len(new_returns_df),  # Use full length of new data
            distribution_model='bootstrap',  # Bootstrap from new data
            plot=False
        )
        
        # Calculate performance metrics
        final_values = portfolio_sims[-1, :]
        initial_value = 100
        holding_period_years = len(new_returns_df) / 252
        
        # Calculate annualized returns properly
        annualized_returns = (final_values / initial_value) ** (1 / holding_period_years) - 1
        mean_annualized_return = np.mean(annualized_returns)
        
        # Calculate risk metrics
        daily_returns = (portfolio_sims[1:, :] - portfolio_sims[:-1, :]) / portfolio_sims[:-1, :]
        volatility = np.std(daily_returns) * np.sqrt(252)
        sharpe = mean_annualized_return / volatility
        
        # Store results
        out_of_sample_results[portfolio_id] = {
            'annualized_return': mean_annualized_return,
            'volatility': volatility,
            'sharpe_ratio': sharpe
        }
    
    return pd.DataFrame(out_of_sample_results).T

In [None]:
test_portfolios_out_of_sample(test_subset, new_returns_df, n_sims=100)