In [1]:
# from pypfopt import EfficientFrontier
# from pypfopt import risk_models
# from pypfopt import expected_returns
import numpy as np
from scipy.optimize import minimize
import pandas as pd
from functions import join_stocks_crypto, generate_rand_portfolios, sharpe_ratio_calculation, select_top_five, run_clustering_model, run_min_variance
from functions_post_clustering import reoptimize_weights, supplement_set_with_cryptos

import cvxpy as cp
import random
import json
import warnings

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
df_all_stocks = pd.read_csv('stocks_data_FINAL.csv',index_col='Date')
df_all_stocks.index = pd.to_datetime(df_all_stocks.index)
df_all_stocks.index = df_all_stocks.index.strftime('%Y-%m-%d')

cryptos_df = pd.read_csv('cryptos_data_new.csv', index_col='timestamp')
joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left')

cryptos_list = list(cryptos_df.columns)

In [3]:
tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers, weight_calc='equal')


#Select top five sharpe ratio portfolios from a portfolio
sharpe_ratio = sharpe_ratio_calculation(df_all_stocks, rf_rate_annual = 0.02)
top_five_dict = select_top_five(random_portfolios, metric=sharpe_ratio)

In [4]:
#Actually Cluster the stocks

warnings.simplefilter(action='ignore', category=FutureWarning) #supress warnings for cleanliness
warnings.simplefilter(action='ignore', category=RuntimeWarning)

joined_df.index = pd.to_datetime(joined_df.index)
joined_df_weekly = joined_df.resample('W').last() #try aggregating on a weekly level
joined_df_3days = joined_df.resample('3D').last()


random.seed(32)

#AHC
# df = joined_df_3days.rolling(window=60, center=False).mean()
# labels, tickers_with_labels_ahc, _, _ = run_clustering_model(df, n_clus=12, model_name='ahc', linkage='complete', return_mode='geometric', n_init=3)

#KMEANS
# df = joined_df_weekly.rolling(window=30, center=False).mean()
# labels, tickers_with_labels_kmeans, _, _ = run_clustering_model(df, n_clus=4, model_name='kmeans', linkage=None, return_mode='geometric', n_init=3)

#KSHAPE
# df = joined_df.rolling(window=10, center=False).mean()
# labels, tickers_with_labels_kshape, _, _ = run_clustering_model(df, n_clus=15, model_name='kshape', linkage=None, return_mode='geometric', n_init=3)


# for method, dict in {'ahc': tickers_with_labels_ahc, 'kmeans': tickers_with_labels_kmeans, 'kshape': tickers_with_labels_kshape}.items():
#     with open(f'tickers_labels_{method}.json', 'w') as fp:
#         json.dump(dict, fp)

# with open('crypto_subclusters_3clusters.json', 'w') as fp:
#     json.dump(tickers_with_labels_kshape, fp)

In [5]:
#Random Portfolios supplemented with cryptos in 5 different ways
with open('crypto_subclusters_3clusters.json', 'r') as r:
    cryptos_with_labels = json.load(r)
    
random.seed(42)
random_w_cryptos_random = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=None, df_prices=joined_df, n_cryptos=3, selection_method='random')
random_w_cryptos_corr = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=None, df_prices=joined_df, n_cryptos=3, selection_method='correlation')

random_w_cryptos_subclusters_random = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=cryptos_with_labels, df_prices=joined_df, n_cryptos=3, selection_method_clusters='random', selection_method='clustering')
random_w_cryptos_subclusters_corr = supplement_set_with_cryptos(portfolio_set=random_portfolios, cryptos_list=cryptos_list, tickers_with_labels=cryptos_with_labels, df_prices=joined_df, n_cryptos=3, selection_method_clusters='correlation', selection_method='clustering')



portfolio_sets = {'stock_only': random_portfolios,
                  'random_crypto': random_w_cryptos_random,
                  'correlation_crypto': random_w_cryptos_corr,
                  'random_subclusters_crypto': random_w_cryptos_subclusters_random,
                  'correlation_subclusters_crypto': random_w_cryptos_subclusters_corr
                }


In [6]:
equalw_sets = portfolio_sets.copy()
with open(f"equalw_sets.json", "w") as f:
    json.dump(equalw_sets, f, indent=4)

In [None]:
# CRYPTO_ASSETS = list(cryptos_df.columns)


# def reoptimize_portfolio_strategic_allocation(asset_list, df_prices, target_crypto_allocation=None):
#     """
#     Strategic asset class allocation with within-class min variance optimization.
    
#     For stock-only portfolios: 100% stocks, min variance within stocks
#     For crypto portfolios: Strategic allocation (e.g., 80/20), min variance within each class
    
#     Parameters:
#     -----------
#     asset_list : list
#         List of assets in portfolio
#     df_prices : pd.DataFrame
#         Price data
#     target_crypto_allocation : float or None
#         If None, assumes stock-only portfolio
#         If float (e.g., 0.2), uses strategic allocation
    
#     Returns:
#     --------
#     dict
#         Optimal portfolio weights
#     """
    
#     # Separate stocks and cryptos
#     stocks = [asset for asset in asset_list if asset not in CRYPTO_ASSETS]
#     cryptos = [asset for asset in asset_list if asset in CRYPTO_ASSETS]

#     # Calculate returns and covariance
#     returns = df_prices[asset_list].pct_change().dropna()
    
#     if target_crypto_allocation is None:
#         # Stock-only portfolio: simple min variance
#         output = min_variance_optimization(stocks, returns)
#         return output
    
#     else:
#         # Mixed portfolio: strategic allocation approach
#         stock_allocation = 1 - target_crypto_allocation
#         crypto_allocation = target_crypto_allocation
        
#         # Optimize within each asset class
#         stock_weights = min_variance_within_class(stocks, returns, stock_allocation)
#         crypto_weights = min_variance_within_class(cryptos, returns, crypto_allocation)
        
#         # Combine weights
#         final_weights = {**stock_weights, **crypto_weights}
#         return final_weights


# def min_variance_within_class(assets, returns, total_allocation):
#     """
#     Min variance optimization within an asset class.
#     """
#     if len(assets) == 0:
#         #print('HERE')
#         return {}
    
#     asset_returns = returns[assets]
#     cov_matrix = asset_returns.cov() * 252  # Annualized
    
#     try:
#         # Analytical min variance solution
#         inv_cov = np.linalg.pinv(cov_matrix.values)
#         ones = np.ones((len(assets), 1))
        
#         weights = inv_cov @ ones / (ones.T @ inv_cov @ ones)
#         weights = weights.flatten()
        
#         # Handle negative weights and numerical issues
#         weights = np.maximum(weights, 0.02 / len(assets))  # Min 1% of class allocation per asset
#         weights = weights / weights.sum()  # Normalize within class
#         weights = weights * total_allocation  # Scale to class allocation
        
#         result = {}
#         for i, asset in enumerate(assets):
#             result[asset] = float(weights[i])
#         return result
        
#     except:
#         # Fallback to equal weights within class
#         equal_weight = total_allocation / len(assets)
#         result = {}
#         for asset in assets:
#             result[asset] = equal_weight
#         return result


# def min_variance_optimization(assets, returns):
#     """
#     Simple min variance for stock-only portfolios.
#     """
#     return min_variance_within_class(assets, returns, 1.0)

In [None]:
# #Random Portfolios supplemented with cryptos reoptimized
# random.seed(32)

# portfolio_configs = {
#     'stock_only': {
#         'target_crypto_allocation': None,  # 100% stocks
#     },
#     'random_crypto': {
#         'target_crypto_allocation': 0.20,  # 80/20 allocation
#     },
#     'correlation_crypto': {
#         'target_crypto_allocation': 0.20,  # 80/20 allocation  
#     },
#     'random_subclusters_crypto': {
#         'target_crypto_allocation': 0.20,  # 80/20 allocation
#     },
#     'correlation_subclusters_crypto': {
#         'target_crypto_allocation': 0.20,  # 80/20 allocation
#     }
# }

# minvar_reoptimized_sets = {}

# for set_name, config in portfolio_configs.items():
#     reoptimized_portfolios = {}
#     print(set_name)
#     for portfolio_id, assets in portfolio_sets[set_name].items():
#         if not isinstance(assets, list):
#             asset_list = [i for i in assets.keys()]
#         else:
#             asset_list = assets[:]
#         if set_name != 'stock_only' and len(asset_list) < 18:
#             print('problem')
            
#         optimal_weights = reoptimize_portfolio_strategic_allocation(
#             asset_list=asset_list,
#             df_prices=joined_df,
#             target_crypto_allocation=config['target_crypto_allocation']
#         )
#         reoptimized_portfolios[portfolio_id] = optimal_weights
    
#     minvar_reoptimized_sets[set_name] = reoptimized_portfolios


# with open(f"minvar_reoptimized_sets.json", "w") as f:
#     json.dump(minvar_reoptimized_sets, f, indent=4)

stock_only
random_crypto
correlation_crypto
random_subclusters_crypto
correlation_subclusters_crypto


In [19]:
from scipy.optimize import minimize
import numpy as np

def maximize_diversification_ratio(asset_list, df_prices, bounds=(0.01, 0.20)):
    """
    Optimize portfolio weights to maximize diversification ratio.
    
    Diversification Ratio = (w'σ) / √(w'Σw)
    where w = weights, σ = individual volatilities, Σ = covariance matrix
    
    Parameters:
    -----------
    asset_list : list
        List of assets in portfolio
    df_prices : pd.DataFrame
        Price data
    bounds : tuple
        (min_weight, max_weight) for each asset
        
    Returns:
    --------
    dict
        Optimal portfolio weights
    """
    
    # Calculate returns and statistics
    returns = df_prices[asset_list].pct_change().dropna()
    
    if len(returns) < 50:  # Minimum data requirement
        # Fallback to equal weights
        equal_weight = 1.0 / len(asset_list)
        return {asset: equal_weight for asset in asset_list}
    
    # Annualized volatilities and covariance matrix
    volatilities = returns.std() * np.sqrt(252)
    cov_matrix = returns.cov() * 252
    
    n_assets = len(asset_list)
    
    def negative_diversification_ratio(weights):
        """
        Objective function to minimize (negative of diversification ratio)
        """
        weights = np.array(weights)
        
        # Weighted average of individual volatilities (numerator)
        weighted_vol = np.dot(weights, volatilities.values)
        
        # Portfolio volatility (denominator)
        portfolio_vol = np.sqrt(np.dot(weights, np.dot(cov_matrix.values, weights)))
        
        # Avoid division by zero
        if portfolio_vol < 1e-10:
            return 1e10
        
        # Return negative for minimization
        diversification_ratio = weighted_vol / portfolio_vol
        return -diversification_ratio
    
    # Constraints and bounds
    constraints = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1.0}  # Weights sum to 1
    weight_bounds = [bounds for _ in range(n_assets)]
    
    # Initial guess (equal weights)
    initial_weights = np.ones(n_assets) / n_assets
    
    try:
        # Optimize
        result = minimize(
            negative_diversification_ratio,
            initial_weights,
            method='SLSQP',
            bounds=weight_bounds,
            constraints=constraints,
            options={'maxiter': 1000, 'ftol': 1e-9}
        )
        
        if result.success:
            optimal_weights = result.x
            
            # Create output dictionary
            portfolio_weights = {}
            for i, asset in enumerate(asset_list):
                portfolio_weights[asset] = float(optimal_weights[i])
            
            return portfolio_weights
        else:
            # Fallback to equal weights if optimization fails
            equal_weight = 1.0 / len(asset_list)
            return {asset: equal_weight for asset in asset_list}
            
    except Exception as e:
        print(f"Optimization failed: {e}")
        # Fallback to equal weights
        equal_weight = 1.0 / len(asset_list)
        return {asset: equal_weight for asset in asset_list}


def reoptimize_portfolio_sets_mdr(portfolio_sets, df_prices, bounds=(0.01, 0.20)):
    """
    Reoptimize all portfolio sets using Maximum Diversification Ratio.
    
    Parameters:
    -----------
    portfolio_sets : dict
        Dictionary of portfolio sets (each containing multiple portfolios)
    df_prices : pd.DataFrame
        Price data
    bounds : tuple
        Weight bounds for optimization
        
    Returns:
    --------
    dict
        Reoptimized portfolio sets
    """
    
    reoptimized_sets = {}
    
    for set_name, portfolios in portfolio_sets.items():
        print(f"Optimizing {set_name}...")
        reoptimized_portfolios = {}
        
        for portfolio_id, portfolio in portfolios.items():
            # Extract asset list
            if isinstance(portfolio, dict):
                asset_list = list(portfolio.keys())
            else:
                asset_list = portfolio[:]
                
            # Optimize weights
            optimal_weights = maximize_diversification_ratio(
                asset_list=asset_list,
                df_prices=df_prices,
                bounds=bounds
            )
            
            reoptimized_portfolios[portfolio_id] = optimal_weights
        
        reoptimized_sets[set_name] = reoptimized_portfolios
        
    return reoptimized_sets

In [20]:
portfolio_sets = {'stock_only': random_portfolios,
                  'random_crypto': random_w_cryptos_random,
                  'correlation_crypto': random_w_cryptos_corr,
                  'random_subclusters_crypto': random_w_cryptos_subclusters_random,
                  'correlation_subclusters_crypto': random_w_cryptos_subclusters_corr
                }

import warnings
warnings.filterwarnings('ignore')

# Reoptimize all sets
mdr_reoptimized_sets = reoptimize_portfolio_sets_mdr(
    portfolio_sets=portfolio_sets,
    df_prices=joined_df,
    bounds=(0.01, 0.20)  # 1% min, 20% max per asset
)

Optimizing stock_only...
Optimizing random_crypto...
Optimizing correlation_crypto...
Optimizing random_subclusters_crypto...
Optimizing correlation_subclusters_crypto...


In [21]:
with open(f"mdr_reoptimized_sets.json", "w") as f:
    json.dump(mdr_reoptimized_sets, f, indent=4)

In [22]:
mdr_reoptimized_sets['correlation_crypto']

{'portfolio_0': {'1088.HK': 0.12731037292600322,
  'LPL': 0.010000000000000064,
  'COST': 0.04727775362795643,
  'SU.PA': 0.010000000000000099,
  'DG.PA': 0.07669518340315948,
  'AI.PA': 0.06768314234731335,
  'TLK': 0.15766060240842208,
  'KEP': 0.07573673260366541,
  '4519.T': 0.13966120357578535,
  'HDB': 0.04157285703154056,
  '8058.T': 0.11494891857516396,
  'INPST.AS': 0.013450728595627454,
  'CSCO': 0.04170628797311116,
  'CRESY': 0.02533710475753599,
  'HMC': 0.01,
  'INJUSDT': 0.013539255483566617,
  'STXUSDT': 0.013468518856079482,
  'RNDRUSDT': 0.013951337835069424},
 'portfolio_1': {'TTD': 0.01,
  'BN.PA': 0.16314068879298127,
  '601398.SS': 0.19727836601520202,
  '9434.T': 0.17362207526225137,
  'COST': 0.04556593069758711,
  '6857.T': 0.04595556143535319,
  'PRCT': 0.031047200552795702,
  '1398.HK': 0.07013690542104316,
  '4519.T': 0.05392805003880878,
  'INGA.AS': 0.01,
  'VERX': 0.026726921924796605,
  'SHELL.AS': 0.10049596730443587,
  '8035.T': 0.010000000000000073,
 