In [1]:
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

import pandas as pd
from functions import join_stocks_crypto, generate_rand_portfolios, sharpe_ratio_calculation, select_top_five, run_clustering_model

import cvxpy as cp
import random

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
df_all_stocks = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data.csv', index_col='Date')
joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left')

In [3]:
tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Select top five sharpe ratio portfolios from a portfolio
sharpe_ratio = sharpe_ratio_calculation(df_all_stocks, rf_rate_annual = 0.02)
top_five_dict = select_top_five(random_portfolios, metric=sharpe_ratio)

In [4]:
top_five = top_five_dict['portfolio_0']
top_five

{'CRESY': 0.9624262180841339,
 '601857.SS': 0.8426712750398243,
 'PKX': 0.7110552210687626,
 '0386.HK': 0.6714594474023661,
 'VERX': 0.6467235554934667}

In [5]:
df = df_all_stocks.rolling(window=30, center=True).mean()
labels, tickers_with_labels, _, _ = run_clustering_model(df, n_clus=4, model_name='ahc', linkage='complete', return_mode='geometric', n_init=3)

  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)


In [25]:
mu = expected_returns.mean_historical_return(joined_df)

S = risk_models.sample_cov(joined_df)

top_five = {'CRESY': 0.9624262180841339,
 '601857.SS': 0.8426712750398243,
 'PKX': 0.7110552210687626,
 '0386.HK': 0.6714594474023661,
 'VERX': 0.6467235554934667}




def optimize_portfolio(mu, S, top_five:dict, clusters:dict=None, min_weight_for_top_five=0.01, verbose=False, min_stocks_per_cluster=2):
    """
    Optimizes a portfolio with the following constraints:
    - Minimum weights for top five stocks
    - Exactly 15 stocks in total
    - At least 2 stocks from each cluster
    
    Parameters:
    -----------
    mu : pd.Series
        Expected returns for each ticker
    S : pd.DataFrame
        Covariance matrix of returns
    top_five : dict
        Dictionary of top five tickers and their scores
    clusters : dict
        Dictionary mapping tickers to their cluster labels
    min_weight_for_top_five : float, optional
        Minimum weight for each of the top five stocks
        
    Returns:
    --------
    dict
        Selected tickers and their optimized weights
    """
    ef = EfficientFrontier(mu, S, solver=cp.CPLEX, weight_bounds=(0,1))
    
    for ticker in top_five.keys():
        ef.add_constraint(lambda w, t=ticker: w[ef.tickers.index(t)] >= min_weight_for_top_five)
    
    booleans = cp.Variable(len(ef.tickers), boolean=True)

    ef.add_constraint(lambda x: x <= booleans)
    ef.add_constraint(lambda x: cp.sum(booleans) == 15)
    



    if clusters:
        unique_clusters = set(clusters.values())
        #print(f"Found {len(unique_clusters)} unique clusters: {unique_clusters}")
    
        # Add cluster constraints - at least 2 stocks from each cluster
        for cluster_label in unique_clusters:
            # Get indices of tickers in this cluster that are in ef.tickers
            cluster_tickers = [ticker for ticker in ef.tickers if ticker in clusters and clusters[ticker] == cluster_label]
            cluster_indices = [ef.tickers.index(ticker) for ticker in cluster_tickers]

            if verbose:
                print(f"Cluster {cluster_label}: found {len(cluster_indices)} stocks")

            # Add constraint to select at least 2 stocks from this cluster
            if cluster_indices:  # Only add constraint if there are stocks in this cluster
                cluster_sum = cp.sum([booleans[i] for i in cluster_indices])
                ef.add_constraint(lambda w, cs=cluster_sum: cs >= min_stocks_per_cluster)
    
    # Find the minimum volatility portfolio
    weights = ef.min_volatility()
    
    if verbose and clusters:
        # Check which stocks were selected and their weights
        selected_stocks = [ticker for ticker in ef.tickers if weights[ticker] > 1e-5]
        print(f"Selected {len(selected_stocks)} stocks in total")

        # Print cluster representation
        for cluster_label in unique_clusters:
            cluster_stocks = [t for t in selected_stocks if t in clusters and clusters[t] == cluster_label]
            print(f"Cluster {cluster_label}: {len(cluster_stocks)} stocks selected - {cluster_stocks}")
    
    # Don't filter by minimum weight here to ensure we get all 15 stocks
    selected = {ticker: weights[ticker] for ticker in ef.tickers if weights[ticker] > 1e-5}
    
    return selected


opt_port = optimize_portfolio(mu, S, top_five, min_weight_for_top_five=0.05, verbose=False, clusters=tickers_with_labels)

print({key: tickers_with_labels[key] for key in opt_port.keys()})
opt_port

{'CRESY': 3, 'PKX': 1, 'VERX': 3, 'PQ9.F': 0, 'QF9.F': 0, 'ULVR.L': 1, 'CCEP.AS': 2, 'KPN.AS': 3, '601398.SS': 3, '601857.SS': 1, '2914.T': 1, '9432.T': 1, '9434.T': 3, '0386.HK': 1, '6288.HK': 2}


{'CRESY': 0.05,
 'PKX': 0.05,
 'VERX': 0.05,
 'PQ9.F': 0.0039855865214221,
 'QF9.F': 0.0038098836028477,
 'ULVR.L': 0.083460732661682,
 'CCEP.AS': 0.0358732665403442,
 'KPN.AS': 0.1989628909554588,
 '601398.SS': 0.1020023098308874,
 '601857.SS': 0.05,
 '2914.T': 0.0656621100814813,
 '9432.T': 0.098729893762018,
 '9434.T': 0.132602022629776,
 '0386.HK': 0.05,
 '6288.HK': 0.0249113034140826}