In [41]:
'''from google.colab import drive
drive.mount('/content/drive')'''

"from google.colab import drive\ndrive.mount('/content/drive')"

In [42]:
'''cd drive/MyDrive/RoboA/'''

'cd drive/MyDrive/RoboA/'

# Markowitz Efficient Frontier

## 1. Imports

In [43]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statistics
from tqdm import tqdm
from collections import defaultdict, Counter
import math
from itertools import combinations
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import LSTM, Dense, Dropout # type: ignore

from fetchData import fetch_raw_data_yf, getSNP500, fetch_raw_data_yf_all, getNasdaq_comp
from MonteCarloRBA import MonteCarloRBA
from PortfolioFunction import maximize_sharpe, create_correlation_matrix, get_sharpe_ratio, get_matrices, maximize_sharpe_SLSQP
from LearningRBA import find_best_asset_to_remove, find_asset_to_add


## 2. Fetch Data

### Get all Nasdaq Stocks

In [44]:
assets= [
    "AAPL",  # Apple Inc.
    "MSFT",  # Microsoft Corporation
    "AMZN",  # Amazon.com Inc.
    "GOOGL", # Alphabet Inc. (Google) Class A
    "GOOG",  # Alphabet Inc. (Google) Class C
    "META",    # Meta Platforms Inc (formerly Facebook)
    "TSLA",  # Tesla Inc
    "UA", # Berkshire Hathaway Inc. Class B
    "JPM",   # JPMorgan Chase & Co.
    "V",     # Visa Inc.
    "JNJ",   # Johnson & Johnson
    "WMT",   # Walmart Inc.
    "PG",    # Procter & Gamble Co.
    "UNH",   # UnitedHealth Group Inc.
    "MA",    # Mastercard Inc.
    "NVDA",  # NVIDIA Corporation
    "HD",    # Home Depot Inc.
    "BAC",   # Bank of America Corp
    "DIS",   # Walt Disney Co
    "PYPL",  # PayPal Holdings
    "VZ",    # Verizon Communications Inc.
    "ADBE",  # Adobe Inc.
    "CMCSA", # Comcast Corporation
    "NFLX",  # Netflix Inc.
    "KO",    # Coca-Cola Co
    "NKE",   # NIKE Inc.
    "PFE",   # Pfizer Inc.
    "MRK",   # Merck & Co., Inc.
    "PEP",   # PepsiCo, Inc.
    "T",     # AT&T Inc.
    "ABT",   # Abbott Laboratories
    "CRM",   # Salesforce.com Inc.
    "ORCL",  # Oracle Corporation
    "ABBV",  # AbbVie Inc.
    "CSCO",  # Cisco Systems, Inc.
    "INTC",  # Intel Corporation
    "TMO",   # Thermo Fisher Scientific Inc.
    "XOM",   # Exxon Mobil Corporation
    "ACN",   # Accenture plc
    "LLY",   # Eli Lilly and Company
    "COST",  # Costco Wholesale Corporation
    "MCD",   # McDonald's Corp
    "DHR",   # Danaher Corporation
    "MDT",   # Medtronic plc
    "NEE",   # NextEra Energy, Inc.
    "BMY",   # Bristol-Myers Squibb Company
    "QCOM",  # Qualcomm Inc
    "CVX",   # Chevron Corporation
    "WFC",   # Wells Fargo & Co
    "LMT",    # Lockheed Martin Corporation
    "GS",   # Goldman Sachs Group, Inc.
    "MS",   # Morgan Stanley
    "IBM",  # International Business Machines Corporation
    "GE",   # General Electric Company
    "F",    # Ford Motor Company
    "GM",   # General Motors Company
    "UBER", # Uber Technologies, Inc.
    "LYFT", # Lyft, Inc.
    "SNAP", # Snap Inc.
    "TWTR", # Twitter, Inc.
    "SPOT", # Spotify Technology S.A.
    "AMD",  # Advanced Micro Devices, Inc.
    "TXN",  # Texas Instruments Incorporated
    "BABA", # Alibaba Group Holding Limited
    "SAP",  # SAP SE
    "HON",  # Honeywell International Inc.
    "BA",   # Boeing Company
    "RTX",  # Raytheon Technologies Corporation
    "CAT",  # Caterpillar Inc.
    "DE",   # Deere & Company
    "MMM",  # 3M Company
    "DUK",  # Duke Energy Corporation
    "SO",   # Southern Company
    "EXC",  # Exelon Corporation
    "NEE",  # NextEra Energy, Inc.
    "AEP",  # American Electric Power Company, Inc.
    "SRE",  # Sempra Energy
    "ETN",  # Eaton Corporation plc
    "EMR",  # Emerson Electric Co.
    "SYY",  # Sysco Corporation
    "KR",   # Kroger Co.
    "GIS",  # General Mills, Inc.
    "K",    # Kellogg Company
    "CPB",  # Campbell Soup Company
    "MO",   # Altria Group, Inc.
    "PM",   # Philip Morris International Inc.
    "BTI",  # British American Tobacco plc
    "RDY",  # Dr. Reddy's Laboratories Ltd.
    "GILD", # Gilead Sciences, Inc.
    "BIIB", # Biogen Inc.
    "CELG", # Celgene Corporation
    "AMGN", # Amgen Inc.
    "SYK",  # Stryker Corporation
    "BSX",  # Boston Scientific Corporation
    "ISRG", # Intuitive Surgical, Inc.
    "ZBH",  # Zimmer Biomet Holdings, Inc.
    "EW",   # Edwards Lifesciences Corporation
    "RMD",  # ResMed Inc.
    "VRTX", # Vertex Pharmaceuticals Incorporated
    "REGN",  # Regeneron Pharmaceuticals, Inc.
]

assets = getSNP500()

In [45]:
start_date = "2024-01-01"
end_date = "2025-01-01"
raw_data, asset_errors, max_combination= fetch_raw_data_yf_all(assets, start_date, end_date)

[*********************100%***********************]  503 of 503 completed

2 Failed downloads:
['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1d 2024-01-01 -> 2025-01-01)')
['BRK.B']: YFTzMissingError('possibly delisted; no timezone found')


Omitted assets ( 2 ):  ['BF.B', 'BRK.B']
Time to fetch data: 15.14 seconds


### Split into test and train

In [46]:
split = len(raw_data.index) // 2

raw_data_train = raw_data.iloc[:split]
raw_data_test = raw_data.iloc[split:]

## 3. Mean, Volatility and Covariance

In [47]:
names, annualized_returns, returns_matrix, cov, correlation_matrix = get_matrices(raw_data_train)

In [48]:
volatility = np.sqrt(np.diag(cov))

risk_free_rate=0
sharpe_ratios = (annualized_returns - risk_free_rate) / volatility

In [None]:
hover_texts = [
    f"<br>Symbol: {ticker} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
    for ticker, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
]

fig = go.Figure(data=go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=hover_texts,
    marker=dict(color=sharpe_ratios, colorscale = 'RdBu', size=6, line=dict(width=1), colorbar=dict(title="Sharpe<br>Ratio")
    )
))

fig.update_layout(
    title='Annual Performance of Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Returns',
    width = 1920,
    height = 1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("PerformanceofIndividualAssets.html")
#fig.write_image("PerformanceofIndividualAssets.png")

### Demonstrating SLSQP

In [50]:
rand_assets = np.random.choice(list(names), 5, replace=False)

selected_returns = annualized_returns.loc[rand_assets].values
selected_covariances = cov.loc[rand_assets, rand_assets].values

optimized_weights, weights_history, sharpe_history = maximize_sharpe_SLSQP(selected_returns, selected_covariances)

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Portfolio Weights Over Iterations", "Sharpe Ratio Over Iterations"))

for i in range(len(selected_returns)):
    fig.add_trace(go.Scatter(x=list(range(len(weights_history))), 
                             y=[h[i] for h in weights_history], 
                             mode='lines+markers', 
                             name=f'Asset {i+1} Weight'), 
                  row=1, col=1)

fig.add_trace(go.Scatter(x=list(range(len(sharpe_history))), 
                         y=sharpe_history, 
                         mode='lines+markers', 
                         name='Sharpe Ratio'), 
              row=1, col=2)

fig.update_layout(title_text='Portfolio Optimization Analysis',
                  xaxis_title='Iteration',
                  yaxis_title='Weight',
                  legend_title='Assets',
                    font=dict(
                        family="Cambria",
                        size=18,
                    )
)

# Update xaxis and yaxis properties for Sharpe Ratio subplot
fig.update_xaxes(title_text="Iteration", row=1, col=2)
fig.update_yaxes(title_text="Sharpe Ratio", row=1, col=2)

fig.show()

#fig.write_html("SLSQPDemo.html")
#fig.write_image("SLSQPDemo.png")

## 4.0 Monte Carlo Method

In [52]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, min_assets=8, max_assets=8)

100%|██████████| 10000/10000 [00:47<00:00, 210.58it/s]


In [None]:
iterations = [portfolio['iteration'] for portfolio in dominant_portfolios]
counts = list(range(1, len(dominant_portfolios) + 1))

# Create the Plotly graph
fig = go.Figure()

# Add a scatter plot to the figure with no markers and a smooth line
fig.add_trace(go.Scatter(x=iterations, y=counts,
                        mode='lines',  # Only lines, no markers
                        name='Dominant Portfolios',
                        line=dict(shape='spline')  # Setting line shape to spline for smoothness
))  

# Update the layout of the figure
fig.update_layout(
    title='Growth of Dominant Portfolios Over Iterations',
    xaxis_title='Portfolios Generated',
    yaxis_title='Number of Dominant Portfolios Found',
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)

# Show the figure
fig.show()
#fig.write_html("FrequencyOfDom.html")
#fig.write_image("FrequencyOfDom.png")

In [None]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in all_portfolios],
    y=[p["return"] for p in all_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in all_portfolios],
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (np.sqrt(p['variance'])):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in all_portfolios
    ]
))

fig1.update_layout(
    xaxis=dict(title='Volatility (Standard Deviation)'),
    yaxis=dict(title='Annualised Returns'),
    title='Monte Carlo Randomly Generated Portfolios',
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)
fig1.show()
#fig1.write_html("MonteCarlo.html")
#fig.write_image("MonteCarlo.png")

In [None]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in dominant_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in dominant_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        #showscale=True,
        #color=[p["return"] / (np.sqrt(p["variance"])) for p in dominant_portfolios],  # Sharpe Ratio
        #colorscale="RdBu",
        #colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['sharpe']:.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in dominant_portfolios
    ],
    name="Monte Carlo Portfolios"
))

fig2.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig2.update_layout(
    title='Monte Carlo Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig2.show()
#fig2.write_html("MCMarkowitzBullet.html")
#fig.write_image("MCMarkowitzBullet.png")

## 5.0 Machine Learning Method

### 5.1 Optimization Function Only

In [142]:
def MLRBA_V1(ticker, covariances, returns, num_iterations=None, risk_free_rate = 0, 
             return_power = 1, std_power = 1, return_weight=1/3, corr_weight=1/3, vol_weight= 1/3, num_assets = 8, base_portfolio = None):
    
    if num_iterations is None:
        num_iterations = min(math.comb(len(ticker), num_assets), 100000)
    
    if base_portfolio is None:
        base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
        #base_portfolio = list(ticker)[:num_assets]
    
    highest_weighted_sharpe = -np.inf

    all_portfolios = []

    tested_assets = set()
    best_iteration = 0

    def _get_portfolio_stats (portfolio_assets, risk_free_rate = 0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights,p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)

        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret-risk_free_rate)/np.sqrt(p_var),
        })

    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()

    highest_weighted_sharpe = curr_weighted_sharpe
    for i in tqdm(range(num_iterations)):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret)     #most_correlated_asset, _, _ = find_correlation_matrix(portfolio, curr_variances)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns, return_weight, corr_weight, vol_weight)         # Find the next best asset to add to the portfolio
        asset_to_add = ranked_assets.index[0]

        for asset in ranked_assets.index:
            if asset not in tested_assets:
                asset_to_add = asset
                break

        new_portfolio.append(asset_to_add)
        tested_assets.add(asset_to_add)

        if len(tested_assets) >= len(ticker) - num_assets:
            print("All assets have been tested")
            break

        # Substitute in and measure portfolio performance based on sharpe ratio
        new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(new_portfolio, risk_free_rate)

        _update_portfolios_array(all_portfolios, new_portfolio, new_p_weights, new_p_return, new_p_variance)

        if new_weighted_sharpe > highest_weighted_sharpe:
            highest_weighted_sharpe = new_weighted_sharpe
            best_portfolio = new_portfolio
            curr_ret, curr_var = new_returns, new_var
            best_iteration = i  # Update the best iteration

            _update_portfolios_array(good_portfolios, new_portfolio, new_p_weights, new_p_return, new_p_variance)

            tested_assets.clear()

        # If Sharpe ratio was worse, then move on to the next least correlated asset
        # If Sharpe ratio is better, set as new base portfolio, and repeat the process for num_iterations times
        # Adjust the sharpe ratio, maybe more emphasis on returns/volatility
        # Update weights to value return or corr
        # See how many iterations it takes to get here, whats a good threshold/stopping point
        # Backtesting
        # Train a model to maybe predict the sharpe ratio of a portfolio

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios, best_iteration  

base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V1(names, cov, annualized_returns)
base_portfolio, best_portfolio, len(total_portfolios), best_iteration

  0%|          | 0/100000 [00:00<?, ?it/s]

  8%|▊         | 8277/100000 [01:36<17:50, 85.71it/s]

All assets have been tested





({'tickers': array(['NEM', 'BRO', 'O', 'AXP', 'MOS', 'ERIE', 'FICO', 'WDAY'],
        dtype='<U5'),
  'weights': array([0.11649217, 0.03358685, 0.56275508, 0.07971409, 0.        ,
         0.11737041, 0.09008141, 0.        ]),
  'return': np.float64(1.1606945930788528),
  'variance': np.float64(0.014142571887262327),
  'sharpe': np.float64(9.760088684404959)},
 {'tickers': ['DHI', 'LMT', 'CBOE', 'BK', 'WELL', 'K', 'ATO', 'AAPL'],
  'weights': array([0.09669204, 0.15714454, 0.13284975, 0.10595771, 0.12567591,
         0.11950564, 0.13495148, 0.12722294]),
  'return': np.float64(1.2641524139215543),
  'variance': np.float64(0.004506359331029446),
  'sharpe': np.float64(18.83156999767294)},
 8278,
 7784)

In [150]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
fig.write_html("ConvergenceRBA.html")
#fig.write_image("ConvergenceRBA.png")

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()

#fig.write_html("ConvergenceRBA+Asset.html")
#fig.write_image("ConvergenceRBA+Asset.png")


In [144]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                xaxis_title='Iteration',
                yaxis_title='Sharpe Ratio',
                width=1920,
                height=1080,
                font=dict(
                    family="Cambria",
                    size=18,
                )
)
fig.show()

#### Comparing MLRBA_V1 with Monte Carlo

In [148]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Convergence Portfolios"
))

figC.update_layout(
    title='Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.85, y=0.95),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
figC.write_html("ComparisonOfConvergence+MC.html")
#fig.write_image("ComparisonOfConvergence+MC.png")

### 5.2 Reinforcement Weight Training

In [153]:
def MLRBA_V2(ticker, covariances, returns, num_iterations=None, risk_free_rate = 0, 
             return_power = 1, std_power = 1, return_weight=1/3, corr_weight=1/3, vol_weight= 1/3, num_assets = 8, base_portfolio = None):
    
    if num_iterations is None:
        num_iterations = min(math.comb(len(ticker), num_assets), 100000)

    if base_portfolio is None:
        base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
        #base_portfolio = list(ticker)[:num_assets]

    highest_weighted_sharpe = -np.inf
    all_portfolios = []
    tested_assets = set()
    best_iteration = 0

    learning_rate = 0.03

    def _get_portfolio_stats(portfolio_assets, risk_free_rate=0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights, p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)
        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret - risk_free_rate) / np.sqrt(p_var),
        })

    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()
    highest_weighted_sharpe = curr_weighted_sharpe

    improvement_threshold = 0.001

    for i in tqdm(range(num_iterations)):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns, return_weight, corr_weight, vol_weight)
        asset_to_add = ranked_assets.index[0]

        for asset in ranked_assets.index:
            if asset not in tested_assets:
                asset_to_add = asset
                break

        new_portfolio.append(asset_to_add)
        tested_assets.add(asset_to_add)

        if len(tested_assets) >= len(ticker) - num_assets:
            print("All assets have been tested")
            break

        new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(new_portfolio, risk_free_rate)
        _update_portfolios_array(all_portfolios, new_portfolio, new_p_weights, new_p_return, new_p_variance)

        if new_weighted_sharpe > highest_weighted_sharpe:
            improvement = new_weighted_sharpe - highest_weighted_sharpe
            highest_weighted_sharpe = new_weighted_sharpe
            best_portfolio = new_portfolio
            curr_ret, curr_var = new_returns, new_var
            best_iteration = i

            asset_return = returns.loc[asset_to_add]
            asset_vol = np.sqrt(covariances.loc[asset_to_add, asset_to_add])
            avg_return = returns.mean()
            avg_vol = np.sqrt(np.diag(covariances)).mean()

            corr_with_portfolio = correlation_matrix.loc[new_portfolio, asset_to_add].drop(asset_to_add).mean()
            avg_corr_in_portfolio = correlation_matrix.loc[new_portfolio].drop(asset_to_add, axis=1).mean().mean()
            
            # Update weights using the current learning rate
            return_weight += learning_rate * (asset_return - avg_return) / avg_return
            vol_weight    += learning_rate * (avg_vol - asset_vol) / avg_vol
            corr_weight   += learning_rate * (avg_corr_in_portfolio - corr_with_portfolio) / avg_corr_in_portfolio

            total = return_weight + corr_weight + vol_weight
            return_weight /= total
            corr_weight /= total
            vol_weight /= total

            if improvement < improvement_threshold:
                learning_rate *= 0.95
            else:
                learning_rate *= 1.01

            _update_portfolios_array(good_portfolios, new_portfolio, new_p_weights, new_p_return, new_p_variance)
            tested_assets.clear()

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios, best_iteration


base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V2(names, cov, annualized_returns)
best_portfolio, best_portfolio['sharpe'], best_iteration

  2%|▏         | 1731/100000 [00:20<19:49, 82.64it/s]

All assets have been tested





({'tickers': ['DHI', 'CBRE', 'K', 'CBOE', 'ATO', 'LMT', 'GEHC', 'FTNT'],
  'weights': array([0.09440362, 0.08090139, 0.11753189, 0.13202273, 0.17753579,
         0.19417911, 0.15373074, 0.04969473]),
  'return': np.float64(1.4942358392488537),
  'variance': np.float64(0.006402866706875918),
  'sharpe': np.float64(18.673766254603894)},
 np.float64(18.673766254603894),
 1238)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Learning Convergence Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("LearningConvergenceRBA.html")
#fig.write_image("LearningConvergenceRBA.png")

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Learning Convergence Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()
#fig.write_html("LearningConvergenceRBA+Asset.html")
#fig.write_image("LearningConvergenceRBA+Asset.png")


In [23]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                  xaxis_title='Iteration',
                  yaxis_title='Sharpe Ratio',
                  )
fig.show()

In [None]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Learning Convergence Portfolios"
))

figC.update_layout(
    title='Learning Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.8, y=0.95),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfLearningConvergence+MC.html")
#fig.write_image("ComparisonOfLearningConvergence+MC.png")

### Comparing V1 and V2

In [24]:
def run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs, num_assets=8):

    def generate_rand_port(tickers, num_assets, num_runs):
        rand_port = []
        for _ in range (num_runs):
            base_portfolio = np.random.choice(list(tickers), num_assets, replace=False)
            rand_port.append(base_portfolio)

        return rand_port
    
    random_portfolios = generate_rand_port(names, num_assets=num_assets, num_runs=num_runs)

    total_good_portfolios_length_v1 = 0
    best_portfolios_v1 = []
    best_iterations_v1 = []
    
    total_good_portfolios_length_v2 = 0
    best_portfolios_v2 = []
    best_iterations_v2 = []
    
    for portfolio in random_portfolios:
        base_portfolio_v1, best_portfolio_v1, good_portfolios_v1, _, best_iteration_v1 = MLRBA_V1(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v1 += len(good_portfolios_v1)
        best_portfolios_v1.append(best_portfolio_v1)
        best_iterations_v1.append(best_iteration_v1)
        
        base_portfolio_v2, best_portfolio_v2, good_portfolios_v2, _, best_iteration_v2 = MLRBA_V2(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v2 += len(good_portfolios_v2)
        best_portfolios_v2.append(best_portfolio_v2)
        best_iterations_v2.append(best_iteration_v2)

        print(base_portfolio_v1['tickers'] == base_portfolio_v2['tickers'])

    average_length_v1 = total_good_portfolios_length_v1 / num_runs
    average_iteration_v1 = statistics.fmean(best_iterations_v1)
    std_dev_iteration_v1 = statistics.stdev(best_iterations_v1) if num_runs > 1 else 0

    average_length_v2 = total_good_portfolios_length_v2 / num_runs
    average_iteration_v2 = statistics.fmean(best_iterations_v2)
    std_dev_iteration_v2 = statistics.stdev(best_iterations_v2) if num_runs > 1 else 0

    
    results = {
        'v1': (base_portfolio_v1, average_length_v1, best_portfolios_v1, average_iteration_v1, std_dev_iteration_v1, best_iterations_v1),
        'v2': (base_portfolio_v2, average_length_v2, best_portfolios_v2, average_iteration_v2, std_dev_iteration_v2, best_iterations_v2)
    }
    
    return results

num_runs = 10 
results = run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs)

_, _, _, average_iteration_v1, std_dev_v1, best_iterations_v1 = results['v1']
_, _, _, average_iteration_v2, std_dev_v2, best_iterations_v2 = results['v2']

  8%|▊         | 7668/100000 [01:20<16:06, 95.57it/s] 


All assets have been tested


  2%|▏         | 1527/100000 [00:16<17:48, 92.17it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  8%|▊         | 8021/100000 [01:23<15:52, 96.55it/s] 


All assets have been tested


  1%|▏         | 1497/100000 [00:16<17:35, 93.35it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  8%|▊         | 8364/100000 [01:27<15:53, 96.06it/s] 


All assets have been tested


  2%|▏         | 1535/100000 [00:16<17:34, 93.37it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  9%|▉         | 9323/100000 [01:37<15:44, 96.04it/s] 


All assets have been tested


  2%|▏         | 1646/100000 [00:17<17:32, 93.43it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  8%|▊         | 7833/100000 [01:21<15:53, 96.69it/s] 


All assets have been tested


  2%|▏         | 1839/100000 [00:19<17:44, 92.22it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  8%|▊         | 8449/100000 [01:29<16:13, 94.01it/s] 


All assets have been tested


  2%|▏         | 1814/100000 [00:19<17:52, 91.52it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  6%|▋         | 6258/100000 [01:07<16:51, 92.68it/s] 


All assets have been tested


  2%|▏         | 1717/100000 [00:18<18:06, 90.50it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  8%|▊         | 7621/100000 [01:22<16:37, 92.57it/s]


All assets have been tested


  2%|▏         | 1595/100000 [00:17<18:07, 90.51it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  8%|▊         | 8373/100000 [01:28<16:08, 94.65it/s] 


All assets have been tested


  2%|▏         | 1526/100000 [00:16<18:14, 89.98it/s]


All assets have been tested
[ True  True  True  True  True  True  True  True]


  9%|▊         | 8508/100000 [01:32<16:31, 92.31it/s]


All assets have been tested


  2%|▏         | 1534/100000 [00:17<18:19, 89.52it/s]

All assets have been tested
[ True  True  True  True  True  True  True  True]





In [25]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=[
        "Iterations to Reach Best Portfolio (Line Plot)",
        "Iterations to Reach Best Portfolio (Bar Plot)"
    ],)

fig.add_trace(
    go.Scatter(x=list(range(1, num_runs + 1)), y=best_iterations_v1, mode='lines', name='MLRBA V1'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(x=list(range(1, num_runs + 1)), y=best_iterations_v2, mode='lines', name='MLRBA V2'),
    row=1, col=1
)
fig.update_xaxes(title_text='Run Number', row=1, col=1)
fig.update_yaxes(title_text='Average Number of Iterations to Best Portfolio', row=1, col=1)

fig.add_trace(
    go.Bar(x=['MLRBA V1'], y=[average_iteration_v1], name='MLRBA V1', width=0.4,
           error_y=dict(type='data', array=[std_dev_v1], visible=True)),
    row=1, col=2
)
fig.add_trace(
    go.Bar(x=['MLRBA V2'], y=[average_iteration_v2], name='MLRBA V2', width=0.4,
           error_y=dict(type='data', array=[std_dev_v2], visible=True)),
    row=1, col=2
)
fig.update_xaxes(title_text='Run Number', row=1, col=2)
fig.update_yaxes(title_text='Average Number of Iterations to Best Portfolio', row=1, col=2)

fig.show()

## 6.0 Portfolio Prediction using LSTM

In [26]:
class PortfolioPredictor:
    def __init__(self, raw_data_train, raw_data_test, best_portfolio, n_steps=1, epochs=50, batch_size=32):
        self.raw_data_train = raw_data_train
        self.raw_data_test = raw_data_test
        self.best_portfolio = best_portfolio
        self.n_steps = n_steps
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None
        self.history = None

    def preprocess_data(self):
        best_portfolio_data_train = self.raw_data_train[self.best_portfolio['tickers']]
        best_portfolio_data_test = self.raw_data_test[self.best_portfolio['tickers']]
        weights = np.array(self.best_portfolio['weights'])

        # Use a scaler fitted on a broader dataset so that training/test normalization is consistent
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        # Fit on the entire raw_data (or on a fixed training period) for consistency
        full_data = pd.concat([best_portfolio_data_train, best_portfolio_data_test])
        self.scaler.fit(full_data)
        
        normalized_train_data = self.scaler.transform(best_portfolio_data_train)
        normalized_test_data = self.scaler.transform(best_portfolio_data_test)

        self.weighted_returns_train = np.dot(normalized_train_data, weights)
        self.weighted_returns_test = np.dot(normalized_test_data, weights)

    def create_datasets(self, data):
        X, y = [], []
        for i in range(len(data) - self.n_steps):
            v = data[i:(i + self.n_steps), :]
            X.append(v)
            y.append(data[i + self.n_steps, :])
        return np.array(X), np.array(y)

    def build_model(self):
        self.model = Sequential([
            LSTM(250, activation='relu', return_sequences=True),
            Dropout(0.2),
            LSTM(50, activation='relu', return_sequences=False),
            Dropout(0.2),
            Dense(1),
        ])

        def tf_weighted_mse(y_true, y_pred, power=3):
            n = tf.shape(y_true)[0]
            normalized_index = tf.cond(
                tf.equal(n, 1),
                lambda: tf.ones([n], dtype=tf.float32),
                lambda: tf.cast(tf.range(n), tf.float32) / tf.cast(n - 1, tf.float32)
            )
            weights = tf.pow(normalized_index, power)
            weights += 1e-6
            weights /= tf.reduce_sum(weights)
            
            squared_errors = tf.square(y_true - y_pred)
            weighted_squared_errors = weights * squared_errors
            return tf.reduce_mean(weighted_squared_errors)

        self.model.compile(optimizer='adam', loss=tf_weighted_mse)

    def train_model(self):
        self.X_train_weighted, self.y_train_weighted = self.create_datasets(self.weighted_returns_train.reshape(-1, 1))
        self.history = self.model.fit(self.X_train_weighted, self.y_train_weighted, epochs=self.epochs, batch_size=self.batch_size, validation_split=0.001, shuffle=False, verbose=0)

    def predict(self):
        X_test_weighted, y_test_weighted = self.create_datasets(self.weighted_returns_test.reshape(-1, 1))
        
        self.predictions = self.model.predict(X_test_weighted)
        self.y_test_weighted = y_test_weighted
        
        return self.predictions

    def normalize_cumulative_returns(self, data):
        data_series = pd.Series(data.flatten())
        pct_change = data_series.pct_change().fillna(0)
        cum_returns = (1 + pct_change).cumprod()
        normalized_returns = cum_returns * 100
        return normalized_returns

    def normalize_cumulative_returns_with_baseline(self, data, baseline):
        data_series = pd.Series(data.flatten())
        pct_change = data_series.pct_change().fillna(0)
        cum_returns = (1 + pct_change).cumprod()
        normalized_returns = cum_returns * baseline
        return normalized_returns

    def plot_loss(self):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=np.arange(1, len(self.history.history['loss'])+1), y=self.history.history['loss'], mode='lines', name='Training Loss'))
        fig.add_trace(go.Scatter(x=np.arange(1, len(self.history.history['val_loss'])+1), y=self.history.history['val_loss'], mode='lines', name='Validation Loss'))
        fig.update_layout(title='Training and Validation Loss Over Epochs',
                          xaxis_title='Epoch',
                          yaxis_title='Loss',
                          legend_title='Type of Loss')
        fig.show()
        
    def plot_predictions(self):
        normalized_train = self.normalize_cumulative_returns(self.y_train_weighted)
        training_end_value = normalized_train.iloc[-1]
        normalized_test = self.normalize_cumulative_returns_with_baseline(self.y_test_weighted, training_end_value)
        normalized_predicted = self.normalize_cumulative_returns_with_baseline(self.predictions, training_end_value)

        fig = go.Figure()
        # Plot the training portfolio (January to June)
        fig.add_trace(go.Scatter(
            x=self.raw_data_train.index,
            y=normalized_train,
            mode='lines',
            name='Actual Training Returns'
        ))
        # Plot the actual test portfolio (June to December)
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index[-len(normalized_test):],
            y=normalized_test,
            mode='lines',
            name='Actual Test Returns'
        ))
        # Plot the predicted portfolio (June to December)
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index[-len(normalized_predicted):],
            y=normalized_predicted,
            mode='lines',
            name='Predicted Test Returns'
        ))
        fig.update_layout(
            title='Actual vs Predicted Weighted Portfolio Returns',
            xaxis_title='Date',
            yaxis_title='Normalized Returns',
            legend_title='Portfolio'
        )
        fig.show()


In [27]:
portfolio_predictor = PortfolioPredictor(raw_data_train, raw_data_test, best_portfolio, n_steps=3, epochs=30)

portfolio_predictor.preprocess_data()
portfolio_predictor.build_model()
portfolio_predictor.train_model()
prediction = portfolio_predictor.predict()   
portfolio_predictor.plot_loss()
portfolio_predictor.plot_predictions()

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step


In [28]:
def evaluate_portfolios_over_time(raw_data, window_size=5, num_windows=None, threshold=0.05, epochs=30):
    split = len(raw_data.index) // 2
    all_good_portfolios = []
    if num_windows is None:
        num_windows = split // window_size
    
    previous_best_portfolio = None 

    for i in range(num_windows):
        curr_split = i * window_size

        loop_raw_data_train = raw_data.iloc[:split + curr_split]
        loop_raw_data_test = raw_data.iloc[split + curr_split:]
        loop_names, loop_annualized_returns, _, loop_cov, _ = get_matrices(loop_raw_data_train)
        
        _, loop_best_portfolio, loop_good_portfolios, _, _ = MLRBA_V2(loop_names, loop_cov, loop_annualized_returns)
        best_sharpe = loop_best_portfolio['sharpe']
        
        close_to_best = []
        if previous_best_portfolio is not None:
            close_to_best.append(previous_best_portfolio)
        close_to_best.append(loop_best_portfolio)
        
        for j in range(len(loop_best_portfolio)):
            difference = abs((best_sharpe - loop_good_portfolios[j]['sharpe']) / best_sharpe)
            if difference < threshold:
                close_to_best.append(loop_good_portfolios[j])

        print(f'Length of close to best is: {len(close_to_best)}')

        sharpe_list = [portfolio['sharpe'] for portfolio in close_to_best]
        print("Sharpe ratios (first is best_sharpe):", sharpe_list)
        
        portfolio_results = {}
        for id, portfolio in enumerate(close_to_best):
            portfolio_predictor = PortfolioPredictor(loop_raw_data_train, loop_raw_data_test, portfolio, n_steps=window_size, epochs=epochs)
            portfolio_predictor.preprocess_data()
            portfolio_predictor.build_model()
            portfolio_predictor.train_model()
            prediction = portfolio_predictor.predict() 

            if len(prediction) >= window_size:
                end_pred = prediction[window_size-1]
            else:
                end_pred = prediction[-1]
            
            percentage_diff = (end_pred - prediction[0]) / prediction[0]
            print(prediction[:min(window_size, len(prediction))], prediction[0], percentage_diff * 100)          
            
            portfolio_results[id] = percentage_diff

        # Check if all predictions (percentage_diff) are negative
        if max(portfolio_results.values()) < 0:
            print("All percentage differences are negative. Choosing an empty portfolio (not holding anything).")
            predicted_best_portfolio = {}
        else:
            best_id = max(portfolio_results, key=portfolio_results.get)
            predicted_best_portfolio = close_to_best[best_id]
            previous_best_portfolio = predicted_best_portfolio
        
        start_date = loop_raw_data_test.index[0]
        end_date = loop_raw_data_test.index[window_size-1]
        
        all_good_portfolios.append({
            "portfolio": predicted_best_portfolio,
            "start_date": start_date,
            "end_date": end_date
        })
        print(f'Current iteration: {i}, the best portfolio found was portfolio: {best_id}')
    
    return all_good_portfolios

In [29]:
all_good_portfolios = evaluate_portfolios_over_time(raw_data, window_size=5, num_windows=8, threshold=0.5, epochs=20)

  1%|▏         | 1380/100000 [00:15<18:11, 90.33it/s]


All assets have been tested
Length of close to best is: 6
Sharpe ratios (first is best_sharpe): [np.float64(18.424470409657594), np.float64(9.667158719085453), np.float64(9.861909760782494), np.float64(9.89227437207338), np.float64(9.892274372080507), np.float64(10.630556792099869)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[[0.60583234]
 [0.6015339 ]
 [0.598468  ]
 [0.6025143 ]
 [0.6064424 ]] [0.60583234] [0.10069676]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step
[[0.49322668]
 [0.49677962]
 [0.4986439 ]
 [0.50565445]
 [0.51630515]] [0.49322668] [4.67908]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
[[0.53250086]
 [0.5355282 ]
 [0.5377588 ]
 [0.5453141 ]
 [0.55668014]] [0.53250086] [4.5407023]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[[0.511101  ]
 [0.513554  ]
 [0.51518685]
 [0.5213562 ]
 [0.5311547 ]] [0.511101] [3.9236248]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

  1%|          | 1008/100000 [00:10<17:37, 93.63it/s]


All assets have been tested
Length of close to best is: 5
Sharpe ratios (first is best_sharpe): [np.float64(9.667158719085453), np.float64(16.86591708563275), np.float64(8.606588104361276), np.float64(9.632617093869706), np.float64(9.632617222990758)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[[0.52017355]
 [0.5430499 ]
 [0.5596491 ]
 [0.57170486]
 [0.58226806]] [0.52017355] [11.937268]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step
[[0.6834261 ]
 [0.6934681 ]
 [0.70179945]
 [0.7052202 ]
 [0.7063643 ]] [0.6834261] [3.3563533]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[[0.6177358]
 [0.6204458]
 [0.6230661]
 [0.6172299]
 [0.6053761]] [0.6177358] [-2.0008032]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
[[0.6159034 ]
 [0.6216457 ]
 [0.62843674]
 [0.6264155 ]
 [0.61810637]] [0.6159034] [0.357684]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[[0.610

  1%|          | 888/100000 [00:09<17:35, 93.87it/s] 


All assets have been tested
Length of close to best is: 5
Sharpe ratios (first is best_sharpe): [np.float64(9.667158719085453), np.float64(16.142907524656657), np.float64(8.201067223019665), np.float64(9.024451769910382), np.float64(9.024451769912067)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[[0.9351643 ]
 [0.939383  ]
 [0.9295033 ]
 [0.94137627]
 [0.9542102 ]] [0.9351643] [2.036642]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[[0.95613706]
 [0.9422217 ]
 [0.91755843]
 [0.8928196 ]
 [0.883317  ]] [0.95613706] [-7.61607]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[[0.93325824]
 [0.90756965]
 [0.8672828 ]
 [0.83540684]
 [0.80770695]] [0.93325824] [-13.453005]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[[1.0033664 ]
 [0.97690094]
 [0.9269221 ]
 [0.8875911 ]
 [0.85519266]] [1.0033664] [-14.767655]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step


  1%|          | 1155/100000 [00:12<17:48, 92.49it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(9.667158719085453), np.float64(14.996156060220617), np.float64(8.78824281067013), np.float64(9.061137894855534), np.float64(9.304009449745907), np.float64(10.030448845267415), np.float64(10.03291150347281)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[[0.8630904 ]
 [0.86199725]
 [0.865516  ]
 [0.86013204]
 [0.843963  ]] [0.8630904] [-2.2161489]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[[0.76701283]
 [0.75684315]
 [0.75288206]
 [0.74820113]
 [0.7541752 ]] [0.76701283] [-1.6737202]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[[0.6481711 ]
 [0.64189893]
 [0.64747506]
 [0.64744514]
 [0.6640108 ]] [0.6481711] [2.4437523]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[[0.6985364 ]
 [0.683729  ]
 [0.68743324]
 [0.6845791 ]
 [0.6986831 ]] [0.6985364] [0.0209992]
[1m2/2[0

  1%|          | 861/100000 [00:09<18:19, 90.14it/s]


All assets have been tested
Length of close to best is: 2
Sharpe ratios (first is best_sharpe): [np.float64(8.78824281067013), np.float64(13.943386706277138)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[[0.6006149 ]
 [0.6060647 ]
 [0.6264049 ]
 [0.6519109 ]
 [0.67110276]] [0.6006149] [11.735949]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[[0.7092428]
 [0.7096621]
 [0.7181375]
 [0.7329786]
 [0.7447405]] [0.7092428] [5.0050087]
Current iteration: 4, the best portfolio found was portfolio: 0


  1%|          | 748/100000 [00:08<17:51, 92.63it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(8.78824281067013), np.float64(13.949848534128542), np.float64(9.870729114864464), np.float64(9.872005112733474), np.float64(9.985185850563907), np.float64(9.985185866317327), np.float64(9.985185866317616)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[[0.6878848 ]
 [0.7047906 ]
 [0.70599866]
 [0.7001432 ]
 [0.7010615 ]] [0.6878848] [1.9155357]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
[[0.7857579 ]
 [0.7932614 ]
 [0.79569036]
 [0.7904566 ]
 [0.7863842 ]] [0.7857579] [0.07970975]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[[0.7672074 ]
 [0.76573056]
 [0.76334596]
 [0.7535589 ]
 [0.74455595]] [0.7672074] [-2.9524527]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
[[0.73996544]
 [0.73849094]
 [0.7358092 ]
 [0.72666943]
 [0.7184602 ]] [0.73996544] [-2.9062488]
[1m1/1[0

  1%|          | 980/100000 [00:10<17:40, 93.36it/s]


All assets have been tested
Length of close to best is: 2
Sharpe ratios (first is best_sharpe): [np.float64(8.78824281067013), np.float64(13.271843548813417)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
[[0.7346114 ]
 [0.7487903 ]
 [0.75875074]
 [0.7726484 ]
 [0.7925733 ]] [0.7346114] [7.890142]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
[[0.7600251 ]
 [0.7577293 ]
 [0.7647611 ]
 [0.76835245]
 [0.77967775]] [0.7600251] [2.5857916]
Current iteration: 6, the best portfolio found was portfolio: 0


  1%|          | 903/100000 [00:09<17:57, 91.99it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(8.78824281067013), np.float64(13.296164616270707), np.float64(8.149104476630143), np.float64(8.268452573465987), np.float64(8.270985800181936), np.float64(8.271483196820341), np.float64(8.271483233221367)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[[0.7864418 ]
 [0.79393077]
 [0.8017476 ]
 [0.79819787]
 [0.7867136 ]] [0.7864418] [0.03456037]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
[[0.84496135]
 [0.8586028 ]
 [0.8627971 ]
 [0.8613964 ]
 [0.85398304]] [0.84496135] [1.0677055]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[[0.77776   ]
 [0.78369075]
 [0.7861286 ]
 [0.78690565]
 [0.77882504]] [0.77776] [0.13693373]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[[0.7135139 ]
 [0.7197837 ]
 [0.7226949 ]
 [0.72186416]
 [0.7138723 ]] [0.7135139] [0.05023066]
[1m1/1[0m 

In [30]:
def extract_asset_returns(raw_data, assets, start_date, end_date):
    if not isinstance(raw_data.index, pd.DatetimeIndex):
        raw_data.index = pd.to_datetime(raw_data.index)

    filtered_data = raw_data.loc[start_date:end_date, assets]

    return filtered_data

def chain_portfolio_performance(weekly_series_list, starting_value=100):
    continuous_series = pd.Series()
    current_value = starting_value

    for week_series in weekly_series_list:
        # Normalize the week so that it starts at 1 (or current_value)
        week_normalized = week_series / week_series.iloc[0]
        # Scale the normalized week to start at current_value
        week_scaled = week_normalized * current_value
        # Update the current_value to the last value of this week
        current_value = week_scaled.iloc[-1]
        # Append the week_series to the continuous_series
        continuous_series = pd.concat([continuous_series, week_scaled])
    
    return continuous_series

ML_portfolio = []
initial_value = 100

for i in range(len(all_good_portfolios)):
    curr_best_portfolio = all_good_portfolios[i]['portfolio']
    start_date = all_good_portfolios[i]['start_date']
    end_date = all_good_portfolios[i]['end_date']
    
    if not curr_best_portfolio:
        if i == 0:
            previous_value = initial_value
        else:
            previous_value = ML_portfolio[-1].iloc[-1]
        # Create a series with the same index as in raw_data for the window duration, all at previous_value.
        window_index = raw_data.loc[start_date:end_date].index
        portfolio_daily_returns = pd.Series(previous_value, index=window_index)
    else:
        best_curr_port_assets = curr_best_portfolio['tickers']
        best_curr_port_assets_test_data = extract_asset_returns(raw_data, best_curr_port_assets, start_date, end_date)
        curr_best_portfolio_weights = curr_best_portfolio['weights']
        weighted_returns = best_curr_port_assets_test_data.mul(curr_best_portfolio_weights, axis='columns')
        portfolio_daily_returns = weighted_returns.sum(axis=1)
    
    ML_portfolio.append(portfolio_daily_returns)

ML_portfolio_streamed = chain_portfolio_performance(ML_portfolio, starting_value=initial_value)
ML_portfolio_streamed


The behavior of array concatenation with empty entries is deprecated. In a future version, this will no longer exclude empty items when determining the result dtype. To retain the old behavior, exclude the empty entries before the concat operation.



2024-10-03    100.000000
2024-10-04    102.110079
2024-10-07    101.624737
2024-10-08    101.720434
2024-10-09    102.632614
2024-10-10    102.632614
2024-10-11    105.446875
2024-10-14    106.093349
2024-10-15    106.462814
2024-10-16    107.989919
2024-10-17    107.989919
2024-10-18    108.211466
2024-10-21    106.250251
2024-10-22    106.529839
2024-10-23    107.089148
2024-10-24    107.089148
2024-10-25    105.872684
2024-10-28    106.350510
2024-10-29    105.615510
2024-10-30    106.952206
2024-10-31    106.952206
2024-11-01    107.073910
2024-11-04    106.529659
2024-11-05    108.266362
2024-11-06    111.583198
2024-11-07    111.583198
2024-11-08    113.462892
2024-11-11    114.924717
2024-11-12    113.455528
2024-11-13    112.629057
2024-11-14    112.629057
2024-11-15    112.763832
2024-11-18    112.215615
2024-11-19    110.711150
2024-11-20    113.456942
2024-11-21    113.456942
2024-11-22    121.490888
2024-11-25    117.950595
2024-11-26    116.662440
2024-11-27    119.256233


In [31]:
ML_daily_returns = ML_portfolio_streamed.pct_change()
ML_cumulative_returns = (1 + ML_daily_returns).cumprod()

ML_cumulative_returns.iloc[0] = 1
ML_portfolio_normalized = (ML_cumulative_returns / ML_cumulative_returns.iloc[0]) * 100

Nasdaq_comp = getNasdaq_comp(ML_portfolio_streamed.index[0], ML_portfolio_streamed.index[-1])

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100%)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    )
)

[*********************100%***********************]  1 of 1 completed

Omitted assets: []
Time to fetch data: 0.09 seconds
Max combination of assets with complete data: 1





## 7.0 Testing Against Others

In [32]:
best_port_assets = best_portfolio['tickers']
best_port_assets_test_data = raw_data_test.loc[:, best_port_assets]

Nasdaq_comp = getNasdaq_comp(best_port_assets_test_data.index[0], best_port_assets_test_data.index[-1])

best_portfolio_weights = best_portfolio['weights']
normalized_prices = best_port_assets_test_data.div(best_port_assets_test_data.iloc[0])
daily_returns = normalized_prices.pct_change()
weighted_returns = daily_returns.mul(best_portfolio_weights, axis='columns')
portfolio_daily_returns = weighted_returns.sum(axis=1)
portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod()

portfolio_start = portfolio_cumulative_returns.iloc[0]
portfolio_normalized = (portfolio_cumulative_returns / portfolio_start) * 100

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=portfolio_cumulative_returns.index,
    y=portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    )
)

[*********************100%***********************]  1 of 1 completed

Omitted assets: []
Time to fetch data: 0.05 seconds
Max combination of assets with complete data: 1





## 8.0 Find Optimal Portfolio Size

In [33]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'sharpe', 3, 50)

100%|██████████| 10000/10000 [02:37<00:00, 63.57it/s]


In [34]:
rd_portfolio_sizes = [len(portfolio['tickers']) for portfolio in all_portfolios]
rd_volatility = [np.sqrt(portfolio['variance']) for portfolio in all_portfolios]
rd_returns = [portfolio['return'] for portfolio in all_portfolios]

volatility_by_size = defaultdict(list)
for size, vol, ret in zip(rd_portfolio_sizes, rd_volatility, rd_returns):
    volatility_by_size[size].append((vol, ret))

average_volatility = {size: np.mean([v[0] for v in vols]) for size, vols in volatility_by_size.items()}
average_returns = {size: np.mean([v[1] for v in vols]) for size, vols in volatility_by_size.items()}

sorted_sizes = sorted(average_volatility.keys())
sorted_average_vols = [average_volatility[size] for size in sorted_sizes]
sorted_average_rets = [average_returns[size] for size in sorted_sizes]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=sorted_sizes,
    y=sorted_average_vols,
    mode='lines',
    name='Average Volatility'
))

fig.update_layout(
    title='Average Volatility and Returns by Portfolio Size',
    xaxis_title='Number of Assets in Portfolio',
    yaxis_title='Average Value',
    xaxis=dict(type='category'),
)

fig.show()

In [35]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'vol', 50, 50)


100%|██████████| 10000/10000 [05:17<00:00, 31.46it/s]


In [27]:
rd_portfolio_sizes = [sum(weight > 0 for weight in portfolio['weights']) for portfolio in all_portfolios]

portfolio_size_counts = Counter(rd_portfolio_sizes)

sizes = sorted(portfolio_size_counts.keys())
counts = [portfolio_size_counts[size] for size in sizes]

fig = go.Figure(data=[go.Bar(x=sizes, y=counts)])

fig.update_layout(
    title='Distribution of Portfolio Sizes After Optimization',
    xaxis_title='Number of assets in portfolio after optimizing',
    yaxis_title='Number of Portfolios',
    xaxis=dict(type='category'),
    yaxis=dict(type='linear')
)

fig.show()

In [None]:
#fig.write_html("PortfolioSize.html")