In [53]:
'''from google.colab import drive
drive.mount('/content/drive')'''

"from google.colab import drive\ndrive.mount('/content/drive')"

In [54]:
'''cd drive/MyDrive/RoboA/'''

'cd drive/MyDrive/RoboA/'

# Markowitz Efficient Frontier

## 1. Imports

In [55]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statistics
from tqdm import tqdm
from collections import defaultdict, Counter
import math
from itertools import combinations
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import LSTM, Dense, Dropout # type: ignore

from fetchData import fetch_raw_data_yf, getSNP500, fetch_raw_data_yf_all, getNasdaq_comp
from MonteCarloRBA import MonteCarloRBA
from PortfolioFunction import maximize_sharpe, create_correlation_matrix, get_sharpe_ratio, get_matrices, maximize_sharpe_SLSQP
from LearningRBA import find_best_asset_to_remove, find_asset_to_add


## 2. Fetch Data

### Get all Nasdaq Stocks

In [56]:
assets= [
    "AAPL",  # Apple Inc.
    "MSFT",  # Microsoft Corporation
    "AMZN",  # Amazon.com Inc.
    "GOOGL", # Alphabet Inc. (Google) Class A
    "GOOG",  # Alphabet Inc. (Google) Class C
    "META",    # Meta Platforms Inc (formerly Facebook)
    "TSLA",  # Tesla Inc
    "UA", # Berkshire Hathaway Inc. Class B
    "JPM",   # JPMorgan Chase & Co.
    "V",     # Visa Inc.
    "JNJ",   # Johnson & Johnson
    "WMT",   # Walmart Inc.
    "PG",    # Procter & Gamble Co.
    "UNH",   # UnitedHealth Group Inc.
    "MA",    # Mastercard Inc.
    "NVDA",  # NVIDIA Corporation
    "HD",    # Home Depot Inc.
    "BAC",   # Bank of America Corp
    "DIS",   # Walt Disney Co
    "PYPL",  # PayPal Holdings
    "VZ",    # Verizon Communications Inc.
    "ADBE",  # Adobe Inc.
    "CMCSA", # Comcast Corporation
    "NFLX",  # Netflix Inc.
    "KO",    # Coca-Cola Co
    "NKE",   # NIKE Inc.
    "PFE",   # Pfizer Inc.
    "MRK",   # Merck & Co., Inc.
    "PEP",   # PepsiCo, Inc.
    "T",     # AT&T Inc.
    "ABT",   # Abbott Laboratories
    "CRM",   # Salesforce.com Inc.
    "ORCL",  # Oracle Corporation
    "ABBV",  # AbbVie Inc.
    "CSCO",  # Cisco Systems, Inc.
    "INTC",  # Intel Corporation
    "TMO",   # Thermo Fisher Scientific Inc.
    "XOM",   # Exxon Mobil Corporation
    "ACN",   # Accenture plc
    "LLY",   # Eli Lilly and Company
    "COST",  # Costco Wholesale Corporation
    "MCD",   # McDonald's Corp
    "DHR",   # Danaher Corporation
    "MDT",   # Medtronic plc
    "NEE",   # NextEra Energy, Inc.
    "BMY",   # Bristol-Myers Squibb Company
    "QCOM",  # Qualcomm Inc
    "CVX",   # Chevron Corporation
    "WFC",   # Wells Fargo & Co
    "LMT",    # Lockheed Martin Corporation
    "GS",   # Goldman Sachs Group, Inc.
    "MS",   # Morgan Stanley
    "IBM",  # International Business Machines Corporation
    "GE",   # General Electric Company
    "F",    # Ford Motor Company
    "GM",   # General Motors Company
    "UBER", # Uber Technologies, Inc.
    "LYFT", # Lyft, Inc.
    "SNAP", # Snap Inc.
    "TWTR", # Twitter, Inc.
    "SPOT", # Spotify Technology S.A.
    "AMD",  # Advanced Micro Devices, Inc.
    "TXN",  # Texas Instruments Incorporated
    "BABA", # Alibaba Group Holding Limited
    "SAP",  # SAP SE
    "HON",  # Honeywell International Inc.
    "BA",   # Boeing Company
    "RTX",  # Raytheon Technologies Corporation
    "CAT",  # Caterpillar Inc.
    "DE",   # Deere & Company
    "MMM",  # 3M Company
    "DUK",  # Duke Energy Corporation
    "SO",   # Southern Company
    "EXC",  # Exelon Corporation
    "NEE",  # NextEra Energy, Inc.
    "AEP",  # American Electric Power Company, Inc.
    "SRE",  # Sempra Energy
    "ETN",  # Eaton Corporation plc
    "EMR",  # Emerson Electric Co.
    "SYY",  # Sysco Corporation
    "KR",   # Kroger Co.
    "GIS",  # General Mills, Inc.
    "K",    # Kellogg Company
    "CPB",  # Campbell Soup Company
    "MO",   # Altria Group, Inc.
    "PM",   # Philip Morris International Inc.
    "BTI",  # British American Tobacco plc
    "RDY",  # Dr. Reddy's Laboratories Ltd.
    "GILD", # Gilead Sciences, Inc.
    "BIIB", # Biogen Inc.
    "CELG", # Celgene Corporation
    "AMGN", # Amgen Inc.
    "SYK",  # Stryker Corporation
    "BSX",  # Boston Scientific Corporation
    "ISRG", # Intuitive Surgical, Inc.
    "ZBH",  # Zimmer Biomet Holdings, Inc.
    "EW",   # Edwards Lifesciences Corporation
    "RMD",  # ResMed Inc.
    "VRTX", # Vertex Pharmaceuticals Incorporated
    "REGN",  # Regeneron Pharmaceuticals, Inc.
]

assets = getSNP500()

In [57]:
start_date = "2024-01-01"
end_date = "2025-01-01"
raw_data, asset_errors, max_combination= fetch_raw_data_yf_all(assets, start_date, end_date)

[*********************100%***********************]  503 of 503 completed

3 Failed downloads:
['BRK.B']: YFTzMissingError('possibly delisted; no timezone found')
['BEN', 'BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1d 2024-01-01 -> 2025-01-01)')


Omitted assets ( 3 ):  ['BRK.B', 'BEN', 'BF.B']
Time to fetch data: 15.85 seconds


### Split into test and train

In [58]:
split = len(raw_data.index) // 2

raw_data_train = raw_data.iloc[:split]
raw_data_test = raw_data.iloc[split:]

## 3. Mean, Volatility and Covariance

In [None]:
names, annualized_returns, weighted_returns_matrix, normal_returns_matrix, cov, correlation_matrix = get_matrices(raw_data_train)

In [60]:
volatility = np.sqrt(np.diag(cov))

risk_free_rate=0
sharpe_ratios = (annualized_returns - risk_free_rate) / volatility

In [61]:
hover_texts = [
    f"<br>Symbol: {ticker} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
    for ticker, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
]

fig = go.Figure(data=go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=hover_texts,
    marker=dict(color=sharpe_ratios, colorscale = 'RdBu', size=6, line=dict(width=1), colorbar=dict(title="Sharpe<br>Ratio")
    )
))

fig.update_layout(
    title='Annual Performance of Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Returns',
    width = 1920,
    height = 1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("PerformanceofIndividualAssets.html")
#fig.write_image("PerformanceofIndividualAssets.png", format='png', width=1920, height=1080)

### Demonstrating SLSQP

In [62]:
rand_assets = np.random.choice(list(names), 5, replace=False)

selected_returns = annualized_returns.loc[rand_assets].values
selected_covariances = cov.loc[rand_assets, rand_assets].values

optimized_weights, weights_history, sharpe_history = maximize_sharpe_SLSQP(selected_returns, selected_covariances)

In [63]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Portfolio Weights Over Iterations", "Sharpe Ratio Over Iterations"))

for i in range(len(selected_returns)):
    fig.add_trace(go.Scatter(x=list(range(len(weights_history))), 
                             y=[h[i] for h in weights_history], 
                             mode='lines+markers', 
                             name=f'Asset {i+1} Weight'), 
                  row=1, col=1)

fig.add_trace(go.Scatter(x=list(range(len(sharpe_history))), 
                         y=sharpe_history, 
                         mode='lines+markers', 
                         name='Sharpe Ratio'), 
              row=1, col=2)

fig.update_layout(title_text='Portfolio Optimization Analysis',
                  xaxis_title='Iteration',
                  yaxis_title='Weight',
                  legend_title='Assets',
                    font=dict(
                        family="Cambria",
                        size=18,
                    )
)

# Update xaxis and yaxis properties for Sharpe Ratio subplot
fig.update_xaxes(title_text="Iteration", row=1, col=2)
fig.update_yaxes(title_text="Sharpe Ratio", row=1, col=2)

fig.show()

#fig.write_html("SLSQPDemo.html")
#fig.write_image("SLSQPDemo.png", format='png', width=1920, height=1080)

## 4.0 Monte Carlo Method

In [64]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, min_assets=8, max_assets=8)

100%|██████████| 10000/10000 [00:47<00:00, 209.32it/s]


In [65]:
iterations = [portfolio['iteration'] for portfolio in dominant_portfolios]
counts = list(range(1, len(dominant_portfolios) + 1))

# Create the Plotly graph
fig = go.Figure()

# Add a scatter plot to the figure with no markers and a smooth line
fig.add_trace(go.Scatter(x=iterations, y=counts,
                        mode='lines',  # Only lines, no markers
                        name='Dominant Portfolios',
                        line=dict(shape='spline')  # Setting line shape to spline for smoothness
))  

# Update the layout of the figure
fig.update_layout(
    title='Growth of Dominant Portfolios Over Iterations',
    xaxis_title='Portfolios Generated',
    yaxis_title='Number of Dominant Portfolios Found',
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)

# Show the figure
fig.show()
#fig.write_html("FrequencyOfDom.html")
#fig.write_image("FrequencyOfDom.png", format='png', width=1920, height=1080)

In [66]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in all_portfolios],
    y=[p["return"] for p in all_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in all_portfolios],
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (np.sqrt(p['variance'])):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in all_portfolios
    ]
))

fig1.update_layout(
    xaxis=dict(title='Volatility (Standard Deviation)'),
    yaxis=dict(title='Annualised Returns'),
    title='Monte Carlo Randomly Generated Portfolios',
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)
fig1.show()
#fig1.write_html("MonteCarlo.html")
#fig1.write_image("MonteCarlo.png", format='png', width=1920, height=1080)

In [67]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in dominant_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in dominant_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        #showscale=True,
        #color=[p["return"] / (np.sqrt(p["variance"])) for p in dominant_portfolios],  # Sharpe Ratio
        #colorscale="RdBu",
        #colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['sharpe']:.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in dominant_portfolios
    ],
    name="Monte Carlo Portfolios"
))

fig2.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig2.update_layout(
    title='Monte Carlo Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig2.show()
#fig2.write_html("MCMarkowitzBullet.html")
#fig2.write_image("MCMarkowitzBullet.png", format='png', width=1920, height=1080)

## 5.0 Machine Learning Method

### 5.1 Optimization Function Only

In [69]:
def MLRBA_V1(ticker, covariances, returns, num_iterations=None, risk_free_rate = 0, 
             return_power = 1, std_power = 1, return_weight=1/3, corr_weight=1/3, vol_weight= 1/3, num_assets = 8, base_portfolio = None):
    
    if num_iterations is None:
        num_iterations = min(math.comb(len(ticker), num_assets), 100000)
    
    if base_portfolio is None:
        base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
        #base_portfolio = list(ticker)[:num_assets]
    
    def _get_portfolio_stats (portfolio_assets, risk_free_rate = 0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights,p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)

        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret-risk_free_rate)/np.sqrt(p_var),
        })

    all_portfolios = []
    
    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()

    highest_weighted_sharpe = -np.inf
    highest_weighted_sharpe = curr_weighted_sharpe
    
    portfolios_tested = 0
    best_iteration = 0

    progress_bar = tqdm(total=num_iterations, desc="Portfolios Tested")
    for _ in range(num_iterations):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns,
                                          return_weight, corr_weight, vol_weight)

        asset_added = False

        for asset in ranked_assets.index:
            if asset in new_portfolio:
                continue

            test_portfolio = new_portfolio + [asset]
            portfolios_tested += 1
            progress_bar.update(1)

            new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(test_portfolio, risk_free_rate)
            _update_portfolios_array(all_portfolios, test_portfolio, new_p_weights, new_p_return, new_p_variance)

            if new_weighted_sharpe > highest_weighted_sharpe:
                best_iteration = portfolios_tested
                best_portfolio = test_portfolio
                curr_ret, curr_var = new_returns, new_var
                highest_weighted_sharpe = new_weighted_sharpe

                _update_portfolios_array(good_portfolios, test_portfolio, new_p_weights, new_p_return, new_p_variance)

                asset_added = True
                break  # Accept first asset that improves Sharpe

        if not asset_added:
            print("All assets have been tested or no improvement found.")
            break

    progress_bar.close()

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios, best_iteration  

base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V1(names, cov, annualized_returns)
base_portfolio, best_portfolio, len(total_portfolios), best_iteration

Portfolios Tested:   9%|▊         | 8559/100000 [00:56<10:00, 152.25it/s]

All assets have been tested or no improvement found.





({'tickers': array(['ISRG', 'KIM', 'RJF', 'LYB', 'NWS', 'PAYX', 'EA', 'MCHP'],
        dtype='<U5'),
  'weights': array([2.93934101e-02, 6.52048532e-01, 0.00000000e+00, 0.00000000e+00,
         0.00000000e+00, 3.18558058e-01, 0.00000000e+00, 3.88259303e-14]),
  'return': np.float64(1.2625840080457067),
  'variance': np.float64(0.028694998572856706),
  'sharpe': np.float64(7.453447220685816)},
 {'tickers': ['DHI', 'LMT', 'CBOE', 'BLK', 'K', 'CBRE', 'NI', 'DASH'],
  'weights': array([0.0654639 , 0.18578425, 0.10930454, 0.09492213, 0.10766445,
         0.05759625, 0.29186063, 0.08740386]),
  'return': np.float64(1.3821575555302261),
  'variance': np.float64(0.005216366026141533),
  'sharpe': np.float64(19.136985301189945)},
 8560,
 8066)

In [70]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("ConvergenceRBA.html")
#fig.write_image("ConvergenceRBA.png", format='png', width=1920, height=1080)

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()

#fig.write_html("ConvergenceRBA+Asset.html")
#fig.write_image("ConvergenceRBA+Asset.png", format='png', width=1920, height=1080)


In [71]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                xaxis_title='Iteration',
                yaxis_title='Sharpe Ratio',
                width=1920,
                height=1080,
                font=dict(
                    family="Cambria",
                    size=18,
                )
)
fig.show()

#### Comparing MLRBA_V1 with Monte Carlo

In [72]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Convergence Portfolios"
))

figC.update_layout(
    title='Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.85, y=0.95),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfConvergence+MC.html")
#figC.write_image("ComparisonOfConvergence+MC.png", format='png', width=1920, height=1080)

Portfolios Tested:   0%|          | 0/100000 [07:03<?, ?it/s]


### 5.2 Reinforcement Weight Training

In [73]:
def MLRBA_V2(ticker, covariances, returns, num_iterations=None, risk_free_rate = 0, 
             return_power = 1, std_power = 1, return_weight=1/3, corr_weight=1/3, vol_weight= 1/3, num_assets = 8, base_portfolio = None):
    
    if num_iterations is None:
        num_iterations = min(math.comb(len(ticker), num_assets), 100000)

    if base_portfolio is None:
        base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
        #base_portfolio = list(ticker)[:num_assets]

    def _get_portfolio_stats(portfolio_assets, risk_free_rate=0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights, p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)
        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret - risk_free_rate) / np.sqrt(p_var),
        })

    all_portfolios = []

    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()

    highest_weighted_sharpe = -np.inf
    highest_weighted_sharpe = curr_weighted_sharpe

    best_iteration = 0
    portfolios_tested = 0

    learning_rate = 0.03
    improvement_threshold = 0.001

    progress_bar = tqdm(total=num_iterations, desc="Portfolios Tested")
    for i in range(num_iterations):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns, return_weight, corr_weight, vol_weight)

        asset_added = False

        for asset in ranked_assets.index:
            portfolios_tested += 1
            progress_bar.update(1)
            
            copy_new_portfolio = new_portfolio.copy()
            copy_new_portfolio.append(asset)

            new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(copy_new_portfolio, risk_free_rate)
            _update_portfolios_array(all_portfolios, copy_new_portfolio, new_p_weights, new_p_return, new_p_variance)

            if new_weighted_sharpe > highest_weighted_sharpe:
                best_iteration = portfolios_tested
                improvement = new_weighted_sharpe - highest_weighted_sharpe
                highest_weighted_sharpe = new_weighted_sharpe
                best_portfolio = copy_new_portfolio
                curr_ret, curr_var = new_returns, new_var

                asset_added = True

                asset_return = returns.loc[asset]
                asset_vol = np.sqrt(covariances.loc[asset, asset])
                avg_return = returns.mean()
                avg_vol = np.sqrt(np.diag(covariances)).mean()

                corr_with_portfolio = correlation_matrix.loc[copy_new_portfolio, asset].drop(asset).mean()
                avg_corr_in_portfolio = correlation_matrix.loc[copy_new_portfolio].drop(asset, axis=1).mean().mean()

                # Update weights using the current learning rate
                return_weight += learning_rate * (asset_return - avg_return) / avg_return
                vol_weight    += learning_rate * (avg_vol - asset_vol) / avg_vol
                corr_weight   += learning_rate * (avg_corr_in_portfolio - corr_with_portfolio) / avg_corr_in_portfolio

                total = return_weight + corr_weight + vol_weight
                return_weight /= total
                corr_weight /= total
                vol_weight /= total

                if improvement < improvement_threshold:
                    learning_rate *= 0.95
                else:
                    learning_rate *= 1.01

                _update_portfolios_array(good_portfolios, copy_new_portfolio, new_p_weights, new_p_return, new_p_variance)
                break  # stop at first valid improving asset

        if not asset_added:
            print("All assets have been tested or no improvement possible.")
            break

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios, best_iteration


base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V2(names, cov, annualized_returns)
best_portfolio, best_portfolio['sharpe'], best_iteration

Portfolios Tested:   2%|▏         | 1675/100000 [00:11<11:11, 146.52it/s]

All assets have been tested or no improvement possible.





({'tickers': ['DHI', 'CBRE', 'K', 'CBOE', 'ATO', 'LMT', 'GEHC', 'FTNT'],
  'weights': array([0.09440343, 0.08090165, 0.11753208, 0.13202246, 0.17753632,
         0.19417899, 0.1537305 , 0.04969457]),
  'return': np.float64(1.4942363330998354),
  'variance': np.float64(0.00640286861989156),
  'sharpe': np.float64(18.673769636732786)},
 np.float64(18.673769636732786),
 1182)

In [74]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Learning Convergence Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("LearningConvergenceRBA.html")
#fig.write_image("LearningConvergenceRBA.png", format='png', width=1920, height=1080)

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Learning Convergence Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()
#fig.write_html("LearningConvergenceRBA+Asset.html")
#fig.write_image("LearningConvergenceRBA+Asset.png", format='png', width=1920, height=1080)


In [75]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                  xaxis_title='Iteration',
                  yaxis_title='Sharpe Ratio',
                  )
fig.show()

In [76]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Learning Convergence Portfolios"
))

figC.update_layout(
    title='Learning Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.8, y=0.95),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfLearningConvergence+MC.html")
#figC.write_image("ComparisonOfLearningConvergence+MC.png", format='png', width=1920, height=1080)

### Comparing V1 and V2

In [77]:
def run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs, num_assets=8):

    def generate_rand_port(tickers, num_assets, num_runs):
        rand_port = []
        for _ in range (num_runs):
            base_portfolio = np.random.choice(list(tickers), num_assets, replace=False)
            rand_port.append(base_portfolio)

        return rand_port
    
    random_portfolios = generate_rand_port(names, num_assets=num_assets, num_runs=num_runs)

    total_good_portfolios_length_v1 = 0
    best_portfolios_v1 = []
    best_iterations_v1 = []
    
    total_good_portfolios_length_v2 = 0
    best_portfolios_v2 = []
    best_iterations_v2 = []
    
    for portfolio in random_portfolios:
        base_portfolio_v1, best_portfolio_v1, good_portfolios_v1, _, best_iteration_v1 = MLRBA_V1(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v1 += len(good_portfolios_v1)
        best_portfolios_v1.append(best_portfolio_v1)
        best_iterations_v1.append(best_iteration_v1)
        
        base_portfolio_v2, best_portfolio_v2, good_portfolios_v2, _, best_iteration_v2 = MLRBA_V2(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v2 += len(good_portfolios_v2)
        best_portfolios_v2.append(best_portfolio_v2)
        best_iterations_v2.append(best_iteration_v2)

        print(base_portfolio_v1['tickers'] == base_portfolio_v2['tickers'])

    average_length_v1 = total_good_portfolios_length_v1 / num_runs
    average_iteration_v1 = statistics.fmean(best_iterations_v1)
    std_dev_iteration_v1 = statistics.stdev(best_iterations_v1) if num_runs > 1 else 0

    average_length_v2 = total_good_portfolios_length_v2 / num_runs
    average_iteration_v2 = statistics.fmean(best_iterations_v2)
    std_dev_iteration_v2 = statistics.stdev(best_iterations_v2) if num_runs > 1 else 0

    
    results = {
        'v1': (base_portfolio_v1, average_length_v1, best_portfolios_v1, average_iteration_v1, std_dev_iteration_v1, best_iterations_v1),
        'v2': (base_portfolio_v2, average_length_v2, best_portfolios_v2, average_iteration_v2, std_dev_iteration_v2, best_iterations_v2)
    }
    
    return results

num_runs = 20
results = run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs)

_, _, best_portfolios_v1, average_iteration_v1, std_dev_v1, best_iterations_v1 = results['v1']
_, _, best_portfolios_v2, average_iteration_v2, std_dev_v2, best_iterations_v2 = results['v2']

Portfolios Tested:   8%|▊         | 8422/100000 [00:57<10:23, 146.93it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1513/100000 [00:12<14:01, 116.97it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   7%|▋         | 7445/100000 [00:47<09:45, 158.08it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|▏         | 1425/100000 [00:09<11:10, 146.94it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 8194/100000 [00:54<10:06, 151.26it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1581/100000 [00:10<11:12, 146.35it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   6%|▌         | 5988/100000 [00:38<10:01, 156.38it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 725/100000 [00:04<11:09, 148.23it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 7772/100000 [00:49<09:46, 157.30it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 1181/100000 [00:07<10:58, 150.04it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   5%|▍         | 4898/100000 [00:30<09:53, 160.13it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1559/100000 [00:10<11:04, 148.10it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 7770/100000 [00:49<09:50, 156.19it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1606/100000 [00:10<11:08, 147.29it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   6%|▌         | 6036/100000 [00:39<10:12, 153.50it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1584/100000 [00:11<11:30, 142.49it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   7%|▋         | 7045/100000 [00:51<11:18, 136.90it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1584/100000 [00:12<12:42, 129.12it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   7%|▋         | 6503/100000 [00:48<11:42, 133.10it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|▏         | 1463/100000 [00:11<13:08, 125.03it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 7670/100000 [00:57<11:29, 133.82it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1695/100000 [00:14<13:52, 118.02it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 8231/100000 [01:01<11:29, 133.10it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1579/100000 [00:12<13:00, 126.03it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   9%|▉         | 8762/100000 [01:05<11:24, 133.20it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|▏         | 1459/100000 [00:11<13:09, 124.88it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 7507/100000 [00:54<11:12, 137.58it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1647/100000 [00:13<13:07, 124.89it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   5%|▌         | 5331/100000 [00:40<12:00, 131.41it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1519/100000 [00:12<13:19, 123.20it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   7%|▋         | 7145/100000 [00:53<11:34, 133.73it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1708/100000 [00:14<13:33, 120.77it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 8085/100000 [00:59<11:14, 136.37it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1506/100000 [00:11<12:46, 128.58it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   7%|▋         | 7177/100000 [00:52<11:23, 135.90it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1693/100000 [00:13<12:39, 129.52it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   8%|▊         | 8473/100000 [01:04<11:31, 132.34it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1840/100000 [00:14<12:57, 126.20it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   7%|▋         | 6934/100000 [00:53<11:53, 130.48it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   2%|▏         | 1580/100000 [00:12<13:28, 121.80it/s]

All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]





In [None]:
from plotly.subplots import make_subplots

avg_sharpe_v1 = np.mean([portfolio['sharpe'] for portfolio in best_portfolios_v1])
avg_sharpe_v2 = np.mean([portfolio['sharpe'] for portfolio in best_portfolios_v2])

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=[
        "Average Iterations to Find Best Portfolio",
        "Average Highest Sharpe Ratio"
    ]
)

fig.add_trace(go.Bar(
    x=['Standard Convergence', 'Learning Convergergence'],
    y=[average_iteration_v1, average_iteration_v2],
    name='Iterations',
    error_y=dict(type='data', array=[std_dev_v1, std_dev_v2], visible=True),
    width=0.4
), row=1, col=1)

fig.add_trace(go.Bar(
    x=['Standard Convergence', 'Learning Convergergence'],
    y=[avg_sharpe_v1, avg_sharpe_v2],
    name='Sharpe Ratio',
    width=0.4
), row=1, col=2)

# Axis titles
fig.update_xaxes(title_text='Method Version', row=1, col=1)
fig.update_yaxes(title_text='Average Iterations', row=1, col=1)

fig.update_xaxes(title_text='Method Version', row=1, col=2)
fig.update_yaxes(title_text='Average Sharpe Ratio', row=1, col=2)

# Layout and display
fig.update_layout(
    title_text='Standard Convergence vs Learning Convergence',
    showlegend=False,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("StandardvsLearning.html")
#fig.write_image("StandardvsLearning.png", format='png')

## 6.0 Portfolio Prediction using LSTM

In [50]:
class PortfolioPredictor:
    def __init__(self, raw_data_train, raw_data_test, best_portfolio, n_steps=1, epochs=50, batch_size=32):
        self.raw_data_train = raw_data_train
        self.raw_data_test = raw_data_test
        self.best_portfolio = best_portfolio
        self.n_steps = n_steps
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None
        self.history = None

    def preprocess_data(self):
        best_portfolio_data_train = self.raw_data_train[self.best_portfolio['tickers']]
        best_portfolio_data_test = self.raw_data_test[self.best_portfolio['tickers']]
        weights = np.array(self.best_portfolio['weights'])

        # Use a scaler fitted on a broader dataset so that training/test normalization is consistent
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        # Fit on the entire raw_data (or on a fixed training period) for consistency
        full_data = pd.concat([best_portfolio_data_train, best_portfolio_data_test])
        self.scaler.fit(full_data)
        
        normalized_train_data = self.scaler.transform(best_portfolio_data_train)
        normalized_test_data = self.scaler.transform(best_portfolio_data_test)

        self.weighted_returns_train = np.dot(normalized_train_data, weights)
        self.weighted_returns_test = np.dot(normalized_test_data, weights)

    def create_datasets(self, data):
        X, y = [], []
        for i in range(len(data) - self.n_steps):
            v = data[i:(i + self.n_steps), :]
            X.append(v)
            y.append(data[i + self.n_steps, :])
        return np.array(X), np.array(y)

    def build_model(self):
        self.model = Sequential([
            LSTM(250, activation='relu', return_sequences=True),
            Dropout(0.2),
            LSTM(50, activation='relu', return_sequences=False),
            Dropout(0.2),
            Dense(1),
        ])

        def tf_weighted_mse(y_true, y_pred, power=3):
            n = tf.shape(y_true)[0]
            normalized_index = tf.cond(
                tf.equal(n, 1),
                lambda: tf.ones([n], dtype=tf.float32),
                lambda: tf.cast(tf.range(n), tf.float32) / tf.cast(n - 1, tf.float32)
            )
            weights = tf.pow(normalized_index, power)
            weights += 1e-6
            weights /= tf.reduce_sum(weights)
            
            squared_errors = tf.square(y_true - y_pred)
            weighted_squared_errors = weights * squared_errors
            return tf.reduce_mean(weighted_squared_errors)

        self.model.compile(optimizer='adam', loss=tf_weighted_mse)

    def train_model(self):
        self.X_train_weighted, self.y_train_weighted = self.create_datasets(self.weighted_returns_train.reshape(-1, 1))
        self.history = self.model.fit(self.X_train_weighted, self.y_train_weighted, epochs=self.epochs, batch_size=self.batch_size, validation_split=0.001, shuffle=False, verbose=0)

    def predict(self):
        X_test_weighted, y_test_weighted = self.create_datasets(self.weighted_returns_test.reshape(-1, 1))
        
        self.predictions = self.model.predict(X_test_weighted)
        self.y_test_weighted = y_test_weighted
        
        return self.predictions

    def normalize_cumulative_returns(self, data):
        data_series = pd.Series(data.flatten())
        pct_change = data_series.pct_change().fillna(0)
        cum_returns = (1 + pct_change).cumprod()
        normalized_returns = cum_returns * 100
        return normalized_returns

    def normalize_cumulative_returns_with_baseline(self, data, baseline):
        data_series = pd.Series(data.flatten())
        pct_change = data_series.pct_change().fillna(0)
        cum_returns = (1 + pct_change).cumprod()
        normalized_returns = cum_returns * baseline
        return normalized_returns

    def plot_loss(self):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=np.arange(1, len(self.history.history['loss'])+1), y=self.history.history['loss'], mode='lines', name='Training Loss'))
        fig.add_trace(go.Scatter(x=np.arange(1, len(self.history.history['val_loss'])+1), y=self.history.history['val_loss'], mode='lines', name='Validation Loss'))
        fig.update_layout(title='Training and Validation Loss Over Epochs',
                          xaxis_title='Epoch',
                          yaxis_title='Loss',
                          legend_title='Type of Loss')
        fig.show()
        
    def plot_predictions(self):
        normalized_train = self.normalize_cumulative_returns(self.y_train_weighted)
        training_end_value = normalized_train.iloc[-1]
        normalized_test = self.normalize_cumulative_returns_with_baseline(self.y_test_weighted, training_end_value)
        normalized_predicted = self.normalize_cumulative_returns_with_baseline(self.predictions, training_end_value)

        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=self.raw_data_train.index,
            y=normalized_train,
            mode='lines',
            name='Actual Training Returns'
        ))
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index[self.n_steps:],
            y=normalized_test,
            mode='lines',
            name='Actual Test Returns'
        ))
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index[self.n_steps:],
            y=normalized_predicted,
            mode='lines',
            name='Predicted Test Returns'
        ))
        fig.update_layout(
            title='Actual vs Predicted Weighted Portfolio Returns',
            xaxis_title='Date',
            yaxis_title='Normalized Returns',
            legend_title='Portfolio'
        )
        fig.show()

In [51]:
portfolio_predictor = PortfolioPredictor(raw_data_train, raw_data_test, best_portfolio, n_steps=3, epochs=30)

portfolio_predictor.preprocess_data()
portfolio_predictor.build_model()
portfolio_predictor.train_model()
prediction = portfolio_predictor.predict()   
portfolio_predictor.plot_loss()
portfolio_predictor.plot_predictions()

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step


In [None]:
def evaluate_portfolios_over_time(raw_data, window_size=5, num_windows=None, threshold=0.05, epochs=30):
    split = len(raw_data.index) // 2
    all_good_portfolios = []
    if num_windows is None:
        num_windows = split // window_size
    
    previous_best_portfolio = None 

    for i in range(num_windows):
        curr_split = i * window_size

        loop_raw_data_train = raw_data.iloc[:split + curr_split]
        loop_raw_data_test = raw_data.iloc[split + curr_split:]
        loop_names, loop_annualized_returns, _, _, loop_cov, _ = get_matrices(loop_raw_data_train)
        
        _, loop_best_portfolio, loop_good_portfolios, _, _ = MLRBA_V2(loop_names, loop_cov, loop_annualized_returns)
        best_sharpe = loop_best_portfolio['sharpe']
        
        close_to_best = []
        if previous_best_portfolio is not None:
            close_to_best.append(previous_best_portfolio)
        close_to_best.append(loop_best_portfolio)
        
        for j in range(len(loop_best_portfolio)):
            difference = abs((best_sharpe - loop_good_portfolios[j]['sharpe']) / best_sharpe)
            if difference < threshold:
                close_to_best.append(loop_good_portfolios[j])

        print(f'Length of close to best is: {len(close_to_best)}')

        sharpe_list = [portfolio['sharpe'] for portfolio in close_to_best]
        print("Sharpe ratios (first is best_sharpe):", sharpe_list)
        
        portfolio_results = {}
        for id, portfolio in enumerate(close_to_best):
            portfolio_predictor = PortfolioPredictor(loop_raw_data_train, loop_raw_data_test, portfolio, n_steps=window_size, epochs=epochs)
            portfolio_predictor.preprocess_data()
            portfolio_predictor.build_model()
            portfolio_predictor.train_model()
            prediction = portfolio_predictor.predict() 

            if len(prediction) >= window_size:
                end_pred = prediction[window_size-1]
            else:
                end_pred = prediction[-1]
            
            percentage_diff = (end_pred - prediction[0]) / prediction[0]
            print(prediction[:min(window_size, len(prediction))], prediction[0], percentage_diff * 100)          
            
            portfolio_results[id] = percentage_diff

        best_id = None

        # Check if all predictions (percentage_diff) are negative
        if max(portfolio_results.values()) < 0:
            print("All percentage differences are negative. Choosing an empty portfolio (not holding anything).")
            predicted_best_portfolio = {}
        else:
            best_id = max(portfolio_results, key=portfolio_results.get)
            predicted_best_portfolio = close_to_best[best_id]
            previous_best_portfolio = predicted_best_portfolio
        
        start_date = loop_raw_data_test.index[0]
        end_date = loop_raw_data_test.index[window_size-1]
        
        all_good_portfolios.append({
            "portfolio": predicted_best_portfolio,
            "start_date": start_date,
            "end_date": end_date
        })
        if best_id is not None:
            print(f'Current iteration: {i}, the best portfolio found was portfolio: {best_id}')
        else:
            print(f'Current iteration: {i}, no portfolio selected (empty portfolio chosen).')
    
    return all_good_portfolios

In [43]:
all_good_portfolios = evaluate_portfolios_over_time(raw_data, window_size=5, num_windows=None, threshold=0.5, epochs=20)

  2%|▏         | 1671/100000 [00:19<19:05, 85.86it/s]


All assets have been tested
Length of close to best is: 2
Sharpe ratios (first is best_sharpe): [np.float64(18.673769731181135), np.float64(9.906805283906442)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
[[0.66894776]
 [0.6657029 ]
 [0.6613922 ]
 [0.6646785 ]
 [0.6668718 ]] [0.66894776] [-0.31033367]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
[[0.65660584]
 [0.63287395]
 [0.61923367]
 [0.62154275]
 [0.6246599 ]] [0.65660584] [-4.865315]
All percentage differences are negative. Choosing an empty portfolio (not holding anything).
Current iteration: 0, no portfolio selected (empty portfolio chosen).


  1%|          | 1166/100000 [00:14<21:03, 78.25it/s]


All assets have been tested
Length of close to best is: 4
Sharpe ratios (first is best_sharpe): [np.float64(17.359482693467516), np.float64(9.575271008423258), np.float64(9.587716166854129), np.float64(10.178072398433619)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.6575648]
 [0.6691079]
 [0.680183 ]
 [0.6855105]
 [0.6856771]] [0.6575648] [4.2752123]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.5719873 ]
 [0.5909376 ]
 [0.60871404]
 [0.6165797 ]
 [0.61994153]] [0.5719873] [8.383799]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step
[[0.5790519 ]
 [0.59781194]
 [0.6154951 ]
 [0.6231932 ]
 [0.62622154]] [0.5790519] [8.146009]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.62283933]
 [0.6429971 ]
 [0.6609589 ]
 [0.67011017]
 [0.674227  ]] [0.62283933] [8.250549]
Current iteration: 1, the best portfolio found was portfolio: 1


  1%|          | 914/100000 [00:11<20:07, 82.04it/s]


All assets have been tested
Length of close to best is: 4
Sharpe ratios (first is best_sharpe): [np.float64(9.575271008423258), np.float64(16.14291538760696), np.float64(8.152351549258851), np.float64(8.152351782603224)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[[0.7572981 ]
 [0.7460396 ]
 [0.72374606]
 [0.7121548 ]
 [0.69351983]] [0.7572981] [-8.42182]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[[0.88572013]
 [0.87247735]
 [0.85020566]
 [0.82822543]
 [0.81907296]] [0.88572013] [-7.524631]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[[0.7418238 ]
 [0.7370124 ]
 [0.71662736]
 [0.7125131 ]
 [0.7057029 ]] [0.7418238] [-4.869201]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step
[[0.62198484]
 [0.6180254 ]
 [0.60003936]
 [0.5966015 ]
 [0.59170955]] [0.62198484] [-4.867528]
All percentage differences are negative. Choosing an empty portfolio (not holding anything).
Current itera

  1%|▏         | 1432/100000 [00:17<19:57, 82.32it/s]


All assets have been tested
Length of close to best is: 5
Sharpe ratios (first is best_sharpe): [np.float64(9.575271008423258), np.float64(15.566055246123423), np.float64(8.048466168160493), np.float64(8.048466168160862), np.float64(9.302629354428772)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.6722947]
 [0.6534792]
 [0.6449042]
 [0.6291509]
 [0.6409565]] [0.6722947] [-4.6613717]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step
[[0.81503725]
 [0.8022502 ]
 [0.7963143 ]
 [0.78612477]
 [0.7829711 ]] [0.81503725] [-3.934319]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.75236315]
 [0.7295801 ]
 [0.72111565]
 [0.7084163 ]
 [0.7128258 ]] [0.75236315] [-5.255091]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[[0.7302181 ]
 [0.7066036 ]
 [0.6983827 ]
 [0.68470067]
 [0.6888679 ]] [0.7302181] [-5.662717]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.

  1%|          | 785/100000 [00:09<20:07, 82.18it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(9.575271008423258), np.float64(13.464535054932691), np.float64(8.64446968036656), np.float64(8.658620566938374), np.float64(9.356430691769736), np.float64(9.35734237213332), np.float64(9.640866946074375)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.6169921 ]
 [0.60747343]
 [0.6063976 ]
 [0.6085514 ]
 [0.59881413]] [0.6169921] [-2.9462266]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.726279  ]
 [0.73131377]
 [0.7465265 ]
 [0.7722354 ]
 [0.79873884]] [0.726279] [9.976856]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.6416303 ]
 [0.6479424 ]
 [0.6611632 ]
 [0.68487775]
 [0.7131703 ]] [0.6416303] [11.149722]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.6862418 ]
 [0.69319886]
 [0.7073025 ]
 [0.73277414]
 [0.76332504]] [0.6862418] [11.232663]
[1m2/2[0m [32

  1%|          | 900/100000 [00:11<20:23, 80.98it/s]


All assets have been tested
Length of close to best is: 3
Sharpe ratios (first is best_sharpe): [np.float64(8.658620566938374), np.float64(13.734541545593213), np.float64(6.900749244091781)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.7315057 ]
 [0.7499661 ]
 [0.75400174]
 [0.7525303 ]
 [0.7500503 ]] [0.7315057] [2.535129]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
[[0.80687016]
 [0.82024735]
 [0.821648  ]
 [0.814284  ]
 [0.8074236 ]] [0.80687016] [0.06858961]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.63520193]
 [0.6282304 ]
 [0.60856444]
 [0.58178264]
 [0.5607401 ]] [0.63520193] [-11.722543]
Current iteration: 5, the best portfolio found was portfolio: 0


  1%|          | 776/100000 [00:09<20:05, 82.32it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(8.658620566938374), np.float64(13.121011548538686), np.float64(6.561193345178461), np.float64(6.61551159331628), np.float64(7.146670833443626), np.float64(7.195443352896724), np.float64(7.835044766637889)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[[0.7198302 ]
 [0.7349782 ]
 [0.7525947 ]
 [0.76557213]
 [0.7835664 ]] [0.7198302] [8.854338]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
[[0.75978726]
 [0.7678089 ]
 [0.78115207]
 [0.7904964 ]
 [0.80288905]] [0.75978726] [5.672876]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.81118876]
 [0.8130011 ]
 [0.8210128 ]
 [0.8267587 ]
 [0.8305569 ]] [0.81118876] [2.387628]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step
[[0.82957876]
 [0.83071274]
 [0.8375952 ]
 [0.8430071 ]
 [0.8467119 ]] [0.82957876] [2.0652792]
[1m1/1[0m [

  1%|          | 658/100000 [00:08<20:21, 81.30it/s]


All assets have been tested
Length of close to best is: 2
Sharpe ratios (first is best_sharpe): [np.float64(8.658620566938374), np.float64(12.467443913163756)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[[0.83279353]
 [0.8406595 ]
 [0.8452982 ]
 [0.8386738 ]
 [0.8245707 ]] [0.83279353] [-0.98737776]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.81077266]
 [0.82303774]
 [0.8251683 ]
 [0.8222128 ]
 [0.8126054 ]] [0.81077266] [0.22604655]
Current iteration: 7, the best portfolio found was portfolio: 1


  1%|          | 882/100000 [00:10<20:14, 81.58it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(12.467443913163756), np.float64(12.5702085870888), np.float64(6.670641065923017), np.float64(6.715424163167398), np.float64(6.726252084249847), np.float64(6.831193680917119), np.float64(6.980184314677132)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[[0.832372  ]
 [0.80672604]
 [0.78728956]
 [0.7690453 ]
 [0.75916153]] [0.832372] [-8.7954035]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
[[0.7987605 ]
 [0.77548623]
 [0.75733024]
 [0.7405451 ]
 [0.72912586]] [0.7987605] [-8.717834]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.78404105]
 [0.7625647 ]
 [0.7538399 ]
 [0.7474392 ]
 [0.74153024]] [0.78404105] [-5.422013]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[[0.8010729 ]
 [0.77679086]
 [0.76663715]
 [0.7597011 ]
 [0.7541719 ]] [0.8010729] [-5.8547716]
[1m1/1[0m 

  1%|          | 688/100000 [00:08<20:16, 81.62it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(12.467443913163756), np.float64(11.341623124790171), np.float64(6.285733334603867), np.float64(6.303654700663462), np.float64(6.306978621726838), np.float64(6.497621236753117), np.float64(6.621878728208433)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.7300866 ]
 [0.7184477 ]
 [0.71179795]
 [0.70610857]
 [0.7008271 ]] [0.7300866] [-4.0076756]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.871402  ]
 [0.8624242 ]
 [0.85543346]
 [0.8495865 ]
 [0.84198105]] [0.871402] [-3.3762798]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[[0.80837065]
 [0.79173106]
 [0.79313004]
 [0.7823495 ]
 [0.76672137]] [0.80837065] [-5.1522503]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.76044565]
 [0.7460654 ]
 [0.7472332 ]
 [0.73762125]
 [0.72338074]] [0.76044565] [-4.874104]
[1m1/1

  1%|          | 780/100000 [00:09<20:04, 82.40it/s]


All assets have been tested
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(12.467443913163756), np.float64(10.685175821477333), np.float64(6.077807260585055), np.float64(6.275409515757428), np.float64(6.349656777803999), np.float64(6.3549702642224855), np.float64(6.3549707471147405)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.6955875 ]
 [0.68404585]
 [0.67107975]
 [0.6603377 ]
 [0.6703099 ]] [0.6955875] [-3.633995]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.83103645]
 [0.81983614]
 [0.8091262 ]
 [0.804644  ]
 [0.8205869 ]] [0.83103645] [-1.2574091]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step
[[0.6706505 ]
 [0.6455418 ]
 [0.62220955]
 [0.6056668 ]
 [0.6140784 ]] [0.6706505] [-8.435404]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[[0.6406176 ]
 [0.61630464]
 [0.59506595]
 [0.5795834 ]
 [0.58850694]] [0.6406176] [-8.134442]
[1m1/1

  1%|          | 834/100000 [00:10<20:02, 82.49it/s]


All assets have been tested
Length of close to best is: 4
Sharpe ratios (first is best_sharpe): [np.float64(12.467443913163756), np.float64(10.382791047688563), np.float64(5.248275691398198), np.float64(5.346529523739702)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step
[[0.7169023]
 [0.7179092]] [0.7169023] [0.14045167]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[[0.8543504]
 [0.861031 ]] [0.8543504] [0.7819517]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.6970389]
 [0.7024393]] [0.6970389] [0.77476585]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
[[0.6801947]
 [0.6846576]] [0.6801947] [0.65612066]
Current iteration: 11, the best portfolio found was portfolio: 1


In [44]:
def extract_asset_returns(raw_data, assets, start_date, end_date):
    if not isinstance(raw_data.index, pd.DatetimeIndex):
        raw_data.index = pd.to_datetime(raw_data.index)

    filtered_data = raw_data.loc[start_date:end_date, assets]

    return filtered_data

def chain_portfolio_performance(weekly_series_list, starting_value=100):
    continuous_series = pd.Series()
    current_value = starting_value

    for week_series in weekly_series_list:
        # Normalize the week so that it starts at 1 (or current_value)
        week_normalized = week_series / week_series.iloc[0]
        # Scale the normalized week to start at current_value
        week_scaled = week_normalized * current_value
        # Update the current_value to the last value of this week
        current_value = week_scaled.iloc[-1]
        # Append the week_series to the continuous_series
        continuous_series = pd.concat([continuous_series, week_scaled])
    
    return continuous_series

ML_portfolio = []
initial_value = 100

for i in range(len(all_good_portfolios)):
    curr_best_portfolio = all_good_portfolios[i]['portfolio']
    start_date = all_good_portfolios[i]['start_date']
    end_date = all_good_portfolios[i]['end_date']
    
    if not curr_best_portfolio:
        if i == 0:
            previous_value = initial_value
        else:
            previous_value = ML_portfolio[-1].iloc[-1]
        # Create a series with the same index as in raw_data for the window duration, all at previous_value.
        window_index = raw_data.loc[start_date:end_date].index
        portfolio_daily_returns = pd.Series(previous_value, index=window_index)
    else:
        best_curr_port_assets = curr_best_portfolio['tickers']
        best_curr_port_assets_test_data = extract_asset_returns(raw_data, best_curr_port_assets, start_date, end_date)
        curr_best_portfolio_weights = curr_best_portfolio['weights']
        weighted_returns = best_curr_port_assets_test_data.mul(curr_best_portfolio_weights, axis='columns')
        portfolio_daily_returns = weighted_returns.sum(axis=1)
    
    ML_portfolio.append(portfolio_daily_returns)

ML_portfolio_streamed = chain_portfolio_performance(ML_portfolio, starting_value=initial_value)
ML_portfolio_streamed


The behavior of array concatenation with empty entries is deprecated. In a future version, this will no longer exclude empty items when determining the result dtype. To retain the old behavior, exclude the empty entries before the concat operation.



2024-10-03    100.000000
2024-10-04    100.000000
2024-10-07    100.000000
2024-10-08    100.000000
2024-10-09    100.000000
2024-10-10    100.000000
2024-10-11    101.866348
2024-10-14    103.726296
2024-10-15    103.608703
2024-10-16    103.633839
2024-10-17    103.633839
2024-10-18    103.633839
2024-10-21    103.633839
2024-10-22    103.633839
2024-10-23    103.633839
2024-10-24    103.633839
2024-10-25    103.633839
2024-10-28    103.633839
2024-10-29    103.633839
2024-10-30    103.633839
2024-10-31    103.633839
2024-11-01    103.775933
2024-11-04    103.427588
2024-11-05    103.753076
2024-11-06    104.342912
2024-11-07    104.342912
2024-11-08    106.205112
2024-11-11    107.221093
2024-11-12    107.498000
2024-11-13    107.443135
2024-11-14    107.443135
2024-11-15    107.877294
2024-11-18    109.190543
2024-11-19    109.031125
2024-11-20    109.348232
2024-11-21    109.348232
2024-11-22    118.416789
2024-11-25    114.081422
2024-11-26    112.375205
2024-11-27    115.185120


In [45]:
ML_daily_returns = ML_portfolio_streamed.pct_change()
ML_cumulative_returns = (1 + ML_daily_returns).cumprod()

ML_cumulative_returns.iloc[0] = 1
ML_portfolio_normalized = (ML_cumulative_returns / ML_cumulative_returns.iloc[0]) * 100

Nasdaq_comp = getNasdaq_comp(ML_portfolio_streamed.index[0], ML_portfolio_streamed.index[-1])

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100%)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    )
)

[*********************100%***********************]  1 of 1 completed

Omitted assets: []
Time to fetch data: 0.02 seconds
Max combination of assets with complete data: 1





## 7.0 Testing Against Others

In [46]:
best_port_assets = best_portfolio['tickers']
best_port_assets_test_data = raw_data_test.loc[:, best_port_assets]

Nasdaq_comp = getNasdaq_comp(best_port_assets_test_data.index[0], best_port_assets_test_data.index[-1])

best_portfolio_weights = best_portfolio['weights']
normalized_prices = best_port_assets_test_data.div(best_port_assets_test_data.iloc[0])
daily_returns = normalized_prices.pct_change()
weighted_returns = daily_returns.mul(best_portfolio_weights, axis='columns')
portfolio_daily_returns = weighted_returns.sum(axis=1)
portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod()

portfolio_start = portfolio_cumulative_returns.iloc[0]
portfolio_normalized = (portfolio_cumulative_returns / portfolio_start) * 100

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=portfolio_cumulative_returns.index,
    y=portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    )
)

[*********************100%***********************]  1 of 1 completed

Omitted assets: []
Time to fetch data: 0.02 seconds
Max combination of assets with complete data: 1





## 8.0 Find Optimal Portfolio Size

In [47]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'sharpe', 3, 50)

100%|██████████| 10000/10000 [03:02<00:00, 54.89it/s]


In [48]:
rd_portfolio_sizes = [len(portfolio['tickers']) for portfolio in all_portfolios]
rd_volatility = [np.sqrt(portfolio['variance']) for portfolio in all_portfolios]
rd_returns = [portfolio['return'] for portfolio in all_portfolios]

volatility_by_size = defaultdict(list)
for size, vol, ret in zip(rd_portfolio_sizes, rd_volatility, rd_returns):
    volatility_by_size[size].append((vol, ret))

average_volatility = {size: np.mean([v[0] for v in vols]) for size, vols in volatility_by_size.items()}
average_returns = {size: np.mean([v[1] for v in vols]) for size, vols in volatility_by_size.items()}

sorted_sizes = sorted(average_volatility.keys())
sorted_average_vols = [average_volatility[size] for size in sorted_sizes]
sorted_average_rets = [average_returns[size] for size in sorted_sizes]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=sorted_sizes,
    y=sorted_average_vols,
    mode='lines',
    name='Average Volatility'
))

fig.update_layout(
    title='Average Volatility and Returns by Portfolio Size',
    xaxis_title='Number of Assets in Portfolio',
    yaxis_title='Average Value',
    xaxis=dict(type='category'),
)

fig.show()

In [None]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'vol', 50, 50)

100%|██████████| 10000/10000 [06:11<00:00, 26.94it/s]


In [54]:
rd_portfolio_sizes = [sum(weight > 0 for weight in portfolio['weights']) for portfolio in all_portfolios]

portfolio_size_counts = Counter(rd_portfolio_sizes)

sizes = sorted(portfolio_size_counts.keys())
counts = [portfolio_size_counts[size] for size in sizes]

fig = go.Figure(data=[go.Bar(x=sizes, y=counts)])

fig.update_layout(
    title='Distribution of Portfolio Sizes After Optimization',
    xaxis_title='Number of assets in portfolio after optimizing',
    yaxis_title='Number of Portfolios',
    xaxis=dict(type='category'),
    yaxis=dict(type='linear')
)

fig.show()

In [51]:
#fig.write_html("PortfolioSize.html")