In [1]:
# from pypfopt import EfficientFrontier
# from pypfopt import risk_models
# from pypfopt import expected_returns
import numpy as np

import pandas as pd
from functions import join_stocks_crypto, generate_rand_portfolios, sharpe_ratio_calculation, select_top_five, run_clustering_model, run_min_variance
from functions_post_clustering import reoptimize_weights, select_complementary_cryptos, supplement_set_with_cryptos

import cvxpy as cp
import random
import json
import warnings

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
df_all_stocks = pd.read_csv('stocks_data_filtered_volatility.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data_new.csv', index_col='timestamp')
joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left')

cryptos_list = list(cryptos_df.columns)

In [3]:
tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Select top five sharpe ratio portfolios from a portfolio
sharpe_ratio = sharpe_ratio_calculation(df_all_stocks, rf_rate_annual = 0.02)
top_five_dict = select_top_five(random_portfolios, metric=sharpe_ratio)

In [4]:
#Optimize Traditional Portfolios
# random.seed(30)
# for i in range(1, 1001):

#     print('Doing', i)
#     top_five_sets = dict(itertools.islice(top_five_dict.items(), i, i+1))
#     results = run_min_variance(df_all_stocks, top_five_sets, min_weight_for_top_five=0.05, clusters=None)  #TRY DIFFERENT WEIGHTS FOR top_five
#     with open(f"min_variance_portfolio_jsons/my_dict{i}.json", "w") as f:
#         json.dump(results, f, indent=4)

In [5]:
#Reassemble the results of the optimization - jsons
min_var_portfolios = dict()
for i in range(1,1001):
    with open(f'min_variance_portfolio_jsons/my_dict{i}.json') as f:
        port = json.load(f)
        min_var_portfolios.update(port)

with open(f"full_optimized_min_variance.json", "w") as f:
    json.dump(min_var_portfolios, f, indent=4)

In [4]:
#Actually Cluster the stocks

warnings.simplefilter(action='ignore', category=FutureWarning) #supress warnings for cleanliness
warnings.simplefilter(action='ignore', category=RuntimeWarning)

joined_df.index = pd.to_datetime(joined_df.index)
joined_df_weekly = joined_df.resample('W').last() #try aggregating on a weekly level
joined_df_3days = joined_df.resample('3D').last()


random.seed(32)

#AHC
df = joined_df_3days.rolling(window=60, center=True).mean()
labels, tickers_with_labels_ahc, _, _ = run_clustering_model(df, n_clus=12, model_name='ahc', linkage='complete', return_mode='geometric', n_init=3)

#KMEANS
df = joined_df_weekly.rolling(window=30, center=True).mean()
labels, tickers_with_labels_kmeans, _, _ = run_clustering_model(df, n_clus=4, model_name='kmeans', linkage=None, return_mode='geometric', n_init=3)

#KSHAPE
df = joined_df.rolling(window=3, center=True).mean()
labels, tickers_with_labels_kshape, _, _ = run_clustering_model(df, n_clus=7, model_name='kshape', linkage=None, return_mode='geometric', n_init=3)


for method, dict in {'ahc': tickers_with_labels_ahc, 'kmeans': tickers_with_labels_kmeans, 'kshape': tickers_with_labels_kshape}.items():
    with open(f'tickers_labels_{method}.json', 'w') as fp:
        json.dump(dict, fp)


In [5]:
#Random Portfolios supplemented with cryptos in 5 different ways

random.seed(32)


random_w_cryptos_kmeans = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=tickers_with_labels_kmeans, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='clustering')
random_w_cryptos_kshape = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=tickers_with_labels_kshape, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='clustering')
random_w_cryptos_ahc = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=tickers_with_labels_ahc, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='clustering')

random_w_cryptos_random = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=None, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='random')

random_w_cryptos_correlation_based = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=None, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='correlation')


#MinVar Portfolios supplemented with cryptos
# minvar_w_cryptos_kmeans = supplement_set_with_cryptos(portfolio_set=min_var_portfolios, cryptos_list=cryptos_list, tickers_with_labels=tickers_with_labels_kmeans, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='clustering')
# minvar_w_cryptos_kshape = supplement_set_with_cryptos(portfolio_set=min_var_portfolios, cryptos_list=cryptos_list, tickers_with_labels=tickers_with_labels_kshape, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='clustering')
# minvar_w_cryptos_ahc = supplement_set_with_cryptos(portfolio_set=min_var_portfolios, cryptos_list=cryptos_list, tickers_with_labels=tickers_with_labels_ahc, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='clustering')

# minvar_w_cryptos_random = supplement_set_with_cryptos(portfolio_set=min_var_portfolios, cryptos_list=cryptos_list, tickers_with_labels=None, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='random')

# minvar_w_cryptos_correlation_based = supplement_set_with_cryptos(portfolio_set=min_var_portfolios, cryptos_list=cryptos_list, tickers_with_labels=None, df_prices=joined_df, n_cryptos=3, selection_metric='sharpe', selection_method='correlation')

portfolio_sets = {'stock_only': random_portfolios,
                  'random_crypto': random_w_cryptos_random,
                  'correlation_crypto': random_w_cryptos_correlation_based,
                  'ahc_crypto': random_w_cryptos_ahc,
                  'kmeans_crypto': random_w_cryptos_kmeans,
                  'kshape_crypto': random_w_cryptos_kshape}


In [39]:
CRYPTO_ASSETS = list(cryptos_df.columns)


def reoptimize_portfolio_strategic_allocation(asset_list, df_prices, target_crypto_allocation=None):
    """
    Strategic asset class allocation with within-class min variance optimization.
    
    For stock-only portfolios: 100% stocks, min variance within stocks
    For crypto portfolios: Strategic allocation (e.g., 80/20), min variance within each class
    
    Parameters:
    -----------
    asset_list : list
        List of assets in portfolio
    df_prices : pd.DataFrame
        Price data
    target_crypto_allocation : float or None
        If None, assumes stock-only portfolio
        If float (e.g., 0.2), uses strategic allocation
    
    Returns:
    --------
    dict
        Optimal portfolio weights
    """
    
    # Separate stocks and cryptos
    stocks = [asset for asset in asset_list if asset not in CRYPTO_ASSETS]
    cryptos = [asset for asset in asset_list if asset in CRYPTO_ASSETS]

    # Calculate returns and covariance
    returns = df_prices[asset_list].pct_change().dropna()
    
    if target_crypto_allocation is None:
        # Stock-only portfolio: simple min variance
        output = min_variance_optimization(stocks, returns)
        return output
    
    else:
        # Mixed portfolio: strategic allocation approach
        stock_allocation = 1 - target_crypto_allocation
        crypto_allocation = target_crypto_allocation
        
        # Optimize within each asset class
        stock_weights = min_variance_within_class(stocks, returns, stock_allocation)
        crypto_weights = min_variance_within_class(cryptos, returns, crypto_allocation)
        
        # Combine weights
        final_weights = {**stock_weights, **crypto_weights}
        # if len(crypto_weights.keys()) < 3:
        #     print('HERE')
        return final_weights


def min_variance_within_class(assets, returns, total_allocation):
    """
    Min variance optimization within an asset class.
    """
    if len(assets) == 0:
        #print('HERE')
        return {}
    
    asset_returns = returns[assets]
    cov_matrix = asset_returns.cov() * 252  # Annualized
    
    try:
        # Analytical min variance solution
        inv_cov = np.linalg.pinv(cov_matrix.values)
        ones = np.ones((len(assets), 1))
        
        weights = inv_cov @ ones / (ones.T @ inv_cov @ ones)
        weights = weights.flatten()
        
        # Handle negative weights and numerical issues
        weights = np.maximum(weights, 0.01 / len(assets))  # Min 1% of class allocation per asset
        weights = weights / weights.sum()  # Normalize within class
        weights = weights * total_allocation  # Scale to class allocation
        
        result = {}
        for i, asset in enumerate(assets):
            result[asset] = float(weights[i])
        return result
        
    except:
        # Fallback to equal weights within class
        equal_weight = total_allocation / len(assets)
        result = {}
        for asset in assets:
            result[asset] = equal_weight
        return result


def min_variance_optimization(assets, returns):
    """
    Simple min variance for stock-only portfolios.
    """
    return min_variance_within_class(assets, returns, 1.0)

In [40]:
#Random Portfolios supplemented with cryptos reoptimized
random.seed(32)

# random_w_cryptos_kmeans_equalw = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_kmeans, how='equal_weights', min_weight=0.02, rf_rate=0.02)
# random_w_cryptos_kmeans_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_kmeans, how='max_sharpe', min_weight=0.02)

# random_w_cryptos_kshape_equalw  = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_kshape, how='equal_weights', min_weight=0.02)
# random_w_cryptos_kshape_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_kshape, how='max_sharpe', min_weight=0.02)

# random_w_cryptos_ahc_equalw  = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_ahc, how='equal_weights', min_weight=0.02)
# random_w_cryptos_ahc_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_ahc, how='max_sharpe', min_weight=0.02)

# random_w_cryptos_random_equalw  = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_random, how='equal_weights', min_weight=0.02)
# random_w_cryptos_random_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=random_w_cryptos_random, how='max_sharpe', min_weight=0.02)

#MinVar Portfolios supplemented with cryptos reoptimized
#minvar_w_cryptos_kmeans_equalw = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_kmeans, how='equal_weights', min_weight=0.02, rf_rate=0.02)
# minvar_w_cryptos_kmeans_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_kmeans, how='max_sharpe', min_weight=0.02)

# #minvar_w_cryptos_kshape_equalw  = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_kshape, how='equal_weights', min_weight=0.02)
# minvar_w_cryptos_kshape_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_kshape, how='max_sharpe', min_weight=0.02)

# #minvar_w_cryptos_ahc_equalw  = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_ahc, how='equal_weights', min_weight=0.02)
# minvar_w_cryptos_ahc_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_ahc, how='max_sharpe', min_weight=0.02)

# #minvar_w_cryptos_random_equalw  = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_random, how='equal_weights', min_weight=0.02)
# minvar_w_cryptos_random_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_random, how='max_sharpe', min_weight=0.02)

# minvar_w_cryptos_corr_maxsharpe = reoptimize_weights(df_prices=joined_df, portfolio_set=minvar_w_cryptos_correlation_based, how='max_sharpe', min_weight=0.02)


portfolio_configs = {
    'stock_only': {
        'target_crypto_allocation': None,  # 100% stocks
        'description': 'Baseline stock-only portfolios'
    },
    'random_crypto': {
        'target_crypto_allocation': 0.20,  # 80/20 allocation
        'description': 'Random crypto selection'
    },
    'correlation_crypto': {
        'target_crypto_allocation': 0.20,  # 80/20 allocation  
        'description': 'Correlation-based crypto selection'
    },
    'ahc_crypto': {
        'target_crypto_allocation': 0.20,  # 80/20 allocation
        'description': 'AHC clustering crypto selection'
    },
    'kmeans_crypto': {
        'target_crypto_allocation': 0.20,  # 80/20 allocation
        'description': 'K-means clustering crypto selection'
    },
    'kshape_crypto': {
        'target_crypto_allocation': 0.20,  # 80/20 allocation
        'description': 'K-shape clustering crypto selection'
    }
}

reoptimized_sets = {}

for set_name, config in portfolio_configs.items():
    reoptimized_portfolios = {}
    print(set_name)
    for portfolio_id, assets in portfolio_sets[set_name].items():
        if not isinstance(assets, list):
            asset_list = [i for i in assets.keys()]
        else:
            asset_list = assets[:]
        if set_name != 'stock_only' and len(asset_list) < 18:
            print('problem')
            
        optimal_weights = reoptimize_portfolio_strategic_allocation(
            asset_list=asset_list,
            df_prices=joined_df,
            target_crypto_allocation=config['target_crypto_allocation']
        )
        reoptimized_portfolios[portfolio_id] = optimal_weights
    
    reoptimized_sets[set_name] = reoptimized_portfolios

stock_only
random_crypto
correlation_crypto
ahc_crypto
kmeans_crypto
kshape_crypto


In [48]:
# all_sets = {#'random_w_cryptos_kmeans_equalw': random_w_cryptos_kmeans_equalw,
#             'random_w_cryptos_kmeans_maxsharpe': random_w_cryptos_kmeans_maxsharpe,
#             #'random_w_cryptos_kshape_equalw': random_w_cryptos_kshape_equalw,
#             'random_w_cryptos_kshape_maxsharpe': random_w_cryptos_kshape_maxsharpe,
#             #'random_w_cryptos_ahc_equalw': random_w_cryptos_ahc_equalw,
#             'random_w_cryptos_ahc_maxsharpe': random_w_cryptos_ahc_maxsharpe,
#             #'random_w_cryptos_random_equalw': random_w_cryptos_random_equalw,
#             'random_w_cryptos_random_maxsharpe': random_w_cryptos_random_maxsharpe,}

with open(f"all_optimized_sets_for_simulation_NEW_NEW_NEW.json", "w") as f:
    json.dump(reoptimized_sets, f, indent=4)