In [42]:
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns

import pandas as pd
from functions import join_stocks_crypto, generate_rand_portfolios, sharpe_ratio_calculation, select_top_five, run_clustering_model, sharpe_ratio_calculation

import cvxpy as cp
import random
import json

In [28]:
df_all_stocks = pd.read_csv('stocks_data_filled.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data.csv', index_col='Date')
joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'stocks_left')

cryptos_list = list(cryptos_df.columns)

In [3]:
tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)


#Select top five sharpe ratio portfolios from a portfolio
sharpe_ratio = sharpe_ratio_calculation(df_all_stocks, rf_rate_annual = 0.02)
top_five_dict = select_top_five(random_portfolios, metric=sharpe_ratio)

In [4]:
top_five = top_five_dict['portfolio_0']
top_five

{'CRESY': 0.9624262180841339,
 '601857.SS': 0.8426712750398243,
 'PKX': 0.7110552210687626,
 '0386.HK': 0.6714594474023661,
 'VERX': 0.6467235554934667}

In [59]:
df = joined_df.rolling(window=30, center=True).mean()
labels, tickers_with_labels, _, _ = run_clustering_model(df, n_clus=4, model_name='ahc', linkage='complete', return_mode='geometric', n_init=3)

  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)


In [38]:
mu = expected_returns.mean_historical_return(joined_df)

S = risk_models.sample_cov(joined_df)

top_five = {'CRESY': 0.9624262180841339,
 '601857.SS': 0.8426712750398243,
 'PKX': 0.7110552210687626,
 '0386.HK': 0.6714594474023661,
 'VERX': 0.6467235554934667}


In [None]:
#CLAUDE's algorithm for selecting complementing cryptos:
#After this we want to reaoptimize the whole portfolio for max sharpe or min var

def select_complementary_cryptos(existing_stocks, crypto_candidates, cluster_assignments, df_prices, n_cryptos=3, verbose=False):
    """
    Select cryptocurrencies to complement an existing stock portfolio based on
    cluster diversification, with a special case for when all cryptos are in the same cluster.
    
    Parameters:
    - existing_stocks: List of stock tickers in the current portfolio
    - crypto_candidates: List of potential crypto assets to choose from
    - cluster_assignments: Dictionary mapping each asset to its cluster ID
    - returns_data: Dictionary mapping each asset to its return metric
    - n_cryptos: Number of cryptocurrencies to select (default: 3)
    
    Returns:
    - List of selected crypto assets
    """
    if verbose:
        clusters_dict = {i: cluster_assignments[i] for i in existing_stocks}
        print('Cluster Distribution in the original portfolio: \n')
        print(pd.DataFrame(columns = ['ticker', 'cluster'], data=clusters_dict.items()).groupby('cluster').count())

    returns_data = dict(sharpe_ratio_calculation(df_prices, rf_rate_annual = 0.02))
    # Step 1: Identify clusters already represented in the portfolio
    stock_clusters = set(cluster_assignments[stock] for stock in existing_stocks)
    
    # Step 2: Check crypto cluster diversity
    crypto_clusters = set(cluster_assignments[crypto] for crypto in crypto_candidates)
    
    # Special case: All cryptos are in the same cluster
    if len(crypto_clusters) == 1:
        sorted_by_return = sorted(
            crypto_candidates,
            key=lambda crypto: returns_data[crypto],
            reverse=True
        )
        return sorted_by_return[:n_cryptos]
    
    # Step 3: Group crypto candidates by their cluster
    crypto_by_cluster = {}
    for crypto in crypto_candidates:
        cluster = cluster_assignments[crypto]
        if cluster not in crypto_by_cluster:
            crypto_by_cluster[cluster] = []
        crypto_by_cluster[cluster].append(crypto)
    
    # Step 4: Select cryptos from unrepresented clusters first
    selected_cryptos = []
    unrepresented_clusters = set(crypto_by_cluster.keys()) - stock_clusters
    
    # Sort unrepresented clusters by the best return in each cluster
    cluster_best_returns = {
        cluster: max(returns_data[crypto] for crypto in cryptos)
        for cluster, cryptos in crypto_by_cluster.items()
        if cluster in unrepresented_clusters
    }
    
    sorted_unrepresented_clusters = sorted(
        unrepresented_clusters, 
        key=lambda cluster: cluster_best_returns[cluster],
        reverse=True
    )
    
    # For each unrepresented cluster, select the crypto with the best return
    for cluster in sorted_unrepresented_clusters:
        if len(selected_cryptos) >= n_cryptos:
            break
            
        # Choose the crypto with the best return from this cluster
        best_crypto = max(
            crypto_by_cluster[cluster],
            key=lambda crypto: returns_data[crypto]
        )
        selected_cryptos.append(best_crypto)
    
    # Step 5: If we still need more cryptos, use return metrics for selection
    if len(selected_cryptos) < n_cryptos:
        remaining_cryptos = [
            crypto for crypto in crypto_candidates 
            if crypto not in selected_cryptos
        ]
        
        sorted_remaining = sorted(
            remaining_cryptos,
            key=lambda crypto: returns_data[crypto],
            reverse=True
        )
        
        needed = n_cryptos - len(selected_cryptos)
        selected_cryptos.extend(sorted_remaining[:needed])
    
    return selected_cryptos

In [85]:
with open(f'min_variance_portfolio_jsons/my_dict3.json') as f:
    portfolio = json.load(f)
existing_stocks = list(portfolio['portfolio_3'].keys())

select_complementary_cryptos(existing_stocks=existing_stocks, crypto_candidates=cryptos_list, cluster_assignments=tickers_with_labels, df_prices = joined_df, n_cryptos=3, verbose=True)

Cluster Distribution in the original portfolio: 

         ticker
cluster        
0            10
1             5


['UNI-USD', 'INJ-USD', 'APE-USD']