In [6]:
'''from google.colab import drive
drive.mount('/content/drive')'''

"from google.colab import drive\ndrive.mount('/content/drive')"

In [7]:
'''cd drive/MyDrive/RoboA/'''

'cd drive/MyDrive/RoboA/'

# Markowitz Efficient Frontier

## 1. Imports

In [8]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statistics
from tqdm import tqdm
from collections import defaultdict, Counter
import math
from itertools import combinations
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import LSTM, Dense, Dropout # type: ignore

from fetchData import fetch_raw_data_yf, getSNP500, fetch_raw_data_yf_all, getNasdaq_comp, add_days_to_date
from MonteCarloRBA import MonteCarloRBA
from PortfolioFunction import maximize_sharpe, create_correlation_matrix, get_sharpe_ratio, get_matrices, maximize_sharpe_SLSQP
from LearningRBA import find_best_asset_to_remove, find_asset_to_add


## 2. Fetch Data

### Get all Nasdaq Stocks

In [9]:
assets= [
    "AAPL",  # Apple Inc.
    "MSFT",  # Microsoft Corporation
    "AMZN",  # Amazon.com Inc.
    "GOOGL", # Alphabet Inc. (Google) Class A
    "GOOG",  # Alphabet Inc. (Google) Class C
    "META",    # Meta Platforms Inc (formerly Facebook)
    "TSLA",  # Tesla Inc
    "UA", # Berkshire Hathaway Inc. Class B
    "JPM",   # JPMorgan Chase & Co.
    "V",     # Visa Inc.
    "JNJ",   # Johnson & Johnson
    "WMT",   # Walmart Inc.
    "PG",    # Procter & Gamble Co.
    "UNH",   # UnitedHealth Group Inc.
    "MA",    # Mastercard Inc.
    "NVDA",  # NVIDIA Corporation
    "HD",    # Home Depot Inc.
    "BAC",   # Bank of America Corp
    "DIS",   # Walt Disney Co
    "PYPL",  # PayPal Holdings
    "VZ",    # Verizon Communications Inc.
    "ADBE",  # Adobe Inc.
    "CMCSA", # Comcast Corporation
    "NFLX",  # Netflix Inc.
    "KO",    # Coca-Cola Co
    "NKE",   # NIKE Inc.
    "PFE",   # Pfizer Inc.
    "MRK",   # Merck & Co., Inc.
    "PEP",   # PepsiCo, Inc.
    "T",     # AT&T Inc.
    "ABT",   # Abbott Laboratories
    "CRM",   # Salesforce.com Inc.
    "ORCL",  # Oracle Corporation
    "ABBV",  # AbbVie Inc.
    "CSCO",  # Cisco Systems, Inc.
    "INTC",  # Intel Corporation
    "TMO",   # Thermo Fisher Scientific Inc.
    "XOM",   # Exxon Mobil Corporation
    "ACN",   # Accenture plc
    "LLY",   # Eli Lilly and Company
    "COST",  # Costco Wholesale Corporation
    "MCD",   # McDonald's Corp
    "DHR",   # Danaher Corporation
    "MDT",   # Medtronic plc
    "NEE",   # NextEra Energy, Inc.
    "BMY",   # Bristol-Myers Squibb Company
    "QCOM",  # Qualcomm Inc
    "CVX",   # Chevron Corporation
    "WFC",   # Wells Fargo & Co
    "LMT",    # Lockheed Martin Corporation
    "GS",   # Goldman Sachs Group, Inc.
    "MS",   # Morgan Stanley
    "IBM",  # International Business Machines Corporation
    "GE",   # General Electric Company
    "F",    # Ford Motor Company
    "GM",   # General Motors Company
    "UBER", # Uber Technologies, Inc.
    "LYFT", # Lyft, Inc.
    "SNAP", # Snap Inc.
    "TWTR", # Twitter, Inc.
    "SPOT", # Spotify Technology S.A.
    "AMD",  # Advanced Micro Devices, Inc.
    "TXN",  # Texas Instruments Incorporated
    "BABA", # Alibaba Group Holding Limited
    "SAP",  # SAP SE
    "HON",  # Honeywell International Inc.
    "BA",   # Boeing Company
    "RTX",  # Raytheon Technologies Corporation
    "CAT",  # Caterpillar Inc.
    "DE",   # Deere & Company
    "MMM",  # 3M Company
    "DUK",  # Duke Energy Corporation
    "SO",   # Southern Company
    "EXC",  # Exelon Corporation
    "NEE",  # NextEra Energy, Inc.
    "AEP",  # American Electric Power Company, Inc.
    "SRE",  # Sempra Energy
    "ETN",  # Eaton Corporation plc
    "EMR",  # Emerson Electric Co.
    "SYY",  # Sysco Corporation
    "KR",   # Kroger Co.
    "GIS",  # General Mills, Inc.
    "K",    # Kellogg Company
    "CPB",  # Campbell Soup Company
    "MO",   # Altria Group, Inc.
    "PM",   # Philip Morris International Inc.
    "BTI",  # British American Tobacco plc
    "RDY",  # Dr. Reddy's Laboratories Ltd.
    "GILD", # Gilead Sciences, Inc.
    "BIIB", # Biogen Inc.
    "CELG", # Celgene Corporation
    "AMGN", # Amgen Inc.
    "SYK",  # Stryker Corporation
    "BSX",  # Boston Scientific Corporation
    "ISRG", # Intuitive Surgical, Inc.
    "ZBH",  # Zimmer Biomet Holdings, Inc.
    "EW",   # Edwards Lifesciences Corporation
    "RMD",  # ResMed Inc.
    "VRTX", # Vertex Pharmaceuticals Incorporated
    "REGN",  # Regeneron Pharmaceuticals, Inc.
]

assets = getSNP500()

In [76]:
start_date = "2024-01-01"
end_date = "2025-01-01"

In [None]:
raw_data, asset_errors, max_combination= fetch_raw_data_yf(assets, start_date, end_date)

## 3. Mean, Volatility and Covariance

In [11]:
names, annualized_returns, unweighted_annaulized_returns, weighted_returns_matrix, normal_returns_matrix, cov, correlation_matrix = get_matrices(raw_data)

volatility = np.sqrt(np.diag(cov))
risk_free_rate=0
sharpe_ratios = (annualized_returns - risk_free_rate) / volatility

In [12]:
hover_texts = [
    f"<br>Symbol: {ticker} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
    for ticker, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
]

fig = go.Figure(data=go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=hover_texts,
    marker=dict(color=sharpe_ratios, colorscale = 'RdBu', size=6, line=dict(width=1), colorbar=dict(title="Sharpe<br>Ratio")
    )
))

fig.update_layout(
    title='Annual Performance of Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Returns',
    width = 1920,
    height = 1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("PerformanceofIndividualAssets.html")
#fig.write_image("PerformanceofIndividualAssets.png", format='png', width=1920, height=1080)

### Demonstrating SLSQP

In [13]:
rand_assets = np.random.choice(list(names), 5, replace=False)

selected_returns = annualized_returns.loc[rand_assets].values
selected_covariances = cov.loc[rand_assets, rand_assets].values

optimized_weights, weights_history, sharpe_history = maximize_sharpe_SLSQP(selected_returns, selected_covariances)

In [14]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Portfolio Weights Over Iterations", "Sharpe Ratio Over Iterations"))

for i in range(len(selected_returns)):
    fig.add_trace(go.Scatter(x=list(range(len(weights_history))), 
                             y=[h[i] for h in weights_history], 
                             mode='lines+markers', 
                             name=f'Asset {i+1} Weight'), 
                  row=1, col=1)

fig.add_trace(go.Scatter(x=list(range(len(sharpe_history))), 
                         y=sharpe_history, 
                         mode='lines+markers', 
                         name='Sharpe Ratio'), 
              row=1, col=2)

fig.update_layout(title_text='Portfolio Optimization Analysis',
                  xaxis_title='Iteration',
                  yaxis_title='Weight',
                  legend_title='Assets',
                    font=dict(
                        family="Cambria",
                        size=18,
                    )
)

# Update xaxis and yaxis properties for Sharpe Ratio subplot
fig.update_xaxes(title_text="Iteration", row=1, col=2)
fig.update_yaxes(title_text="Sharpe Ratio", row=1, col=2)

fig.show()

#fig.write_html("SLSQPDemo.html")
#fig.write_image("SLSQPDemo.png", format='png', width=1920, height=1080)

## 4.0 Monte Carlo Method

In [15]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, min_assets=8, max_assets=8)

100%|██████████| 10000/10000 [00:43<00:00, 227.65it/s]


In [16]:
iterations = [portfolio['iteration'] for portfolio in dominant_portfolios]
counts = list(range(1, len(dominant_portfolios) + 1))

fig = go.Figure()

fig.add_trace(go.Scatter(x=iterations, y=counts,
                        mode='lines',
                        name='Dominant Portfolios',
                        line=dict(shape='spline')
))  

fig.update_layout(
    title='Growth of Dominant Portfolios Over Iterations',
    xaxis_title='Portfolios Generated',
    yaxis_title='Number of Dominant Portfolios Found',
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("FrequencyOfDom.html")
#fig.write_image("FrequencyOfDom.png", format='png', width=1920, height=1080)

In [17]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in all_portfolios],
    y=[p["return"] for p in all_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in all_portfolios],
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (np.sqrt(p['variance'])):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in all_portfolios
    ]
))

fig1.update_layout(
    xaxis=dict(title='Volatility (Standard Deviation)'),
    yaxis=dict(title='Annualised Returns'),
    title='Monte Carlo Randomly Generated Portfolios',
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)
fig1.show()
#fig1.write_html("MonteCarlo.html")
#fig1.write_image("MonteCarlo.png", format='png', width=1920, height=1080)

In [18]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in dominant_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in dominant_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        #showscale=True,
        #color=[p["return"] / (np.sqrt(p["variance"])) for p in dominant_portfolios],  # Sharpe Ratio
        #colorscale="RdBu",
        #colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['sharpe']:.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in dominant_portfolios
    ],
    name="Monte Carlo Portfolios"
))

fig2.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig2.update_layout(
    title='Monte Carlo Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    height=1080,
    width=1920,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig2.show()
#fig2.write_html("MCMarkowitzBullet.html")
#fig2.write_image("MCMarkowitzBullet.png", format='png', width=1920, height=1080)

## 5.0 Machine Learning Method

### 5.1 Optimization Function Only

In [19]:
def MLRBA_V1(ticker, covariances, returns, num_iterations=None, risk_free_rate = 0, 
             return_power = 1, std_power = 1, return_weight=1/3, corr_weight=1/3, vol_weight= 1/3, num_assets = 8, base_portfolio = None):
    
    if num_iterations is None:
        num_iterations = min(math.comb(len(ticker), num_assets), 100000)
    
    if base_portfolio is None:
        base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
        #base_portfolio = list(ticker)[:num_assets]
    
    def _get_portfolio_stats (portfolio_assets, risk_free_rate = 0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights,p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)

        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret-risk_free_rate)/np.sqrt(p_var),
        })

    all_portfolios = []
    
    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()

    highest_weighted_sharpe = -np.inf
    highest_weighted_sharpe = curr_weighted_sharpe
    
    portfolios_tested = 0
    best_iteration = 0

    progress_bar = tqdm(total=num_iterations, desc="Portfolios Tested")
    for _ in range(num_iterations):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns,
                                          return_weight, corr_weight, vol_weight)

        asset_added = False

        for asset in ranked_assets.index:
            if asset in new_portfolio:
                continue

            test_portfolio = new_portfolio + [asset]
            portfolios_tested += 1
            progress_bar.update(1)

            new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(test_portfolio, risk_free_rate)
            _update_portfolios_array(all_portfolios, test_portfolio, new_p_weights, new_p_return, new_p_variance)

            if new_weighted_sharpe > highest_weighted_sharpe:
                best_iteration = portfolios_tested
                best_portfolio = test_portfolio
                curr_ret, curr_var = new_returns, new_var
                highest_weighted_sharpe = new_weighted_sharpe

                _update_portfolios_array(good_portfolios, test_portfolio, new_p_weights, new_p_return, new_p_variance)

                asset_added = True
                break  # Accept first asset that improves Sharpe

        if not asset_added:
            print("All assets have been tested or no improvement found.")
            break

    progress_bar.close()

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios, best_iteration  

base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V1(names, cov, annualized_returns)
base_portfolio, best_portfolio, len(good_portfolios), len(total_portfolios), best_iteration

Portfolios Tested:   5%|▌         | 5150/100000 [00:31<09:33, 165.25it/s]

All assets have been tested or no improvement found.





({'tickers': array(['MCK', 'MS', 'META', 'HII', 'FIS', 'BA', 'CVX', 'FRT'], dtype='<U5'),
  'weights': array([2.09131770e-01, 3.36779705e-03, 3.17548388e-01, 1.75271446e-15,
         4.69952046e-01, 2.35148543e-15, 0.00000000e+00, 0.00000000e+00]),
  'return': np.float64(0.46414493816368757),
  'variance': np.float64(0.027052615776532712),
  'sharpe': np.float64(2.8219478673728426)},
 {'tickers': ['NVDA', 'PGR', 'FOX', 'JNPR', 'K', 'BSX', 'T', 'WMT'],
  'weights': array([0.08665345, 0.12596287, 0.11552138, 0.16406503, 0.09869546,
         0.14564759, 0.10080821, 0.16264601]),
  'return': np.float64(0.5678112370836165),
  'variance': np.float64(0.00813240516051581),
  'sharpe': np.float64(6.296431448485793)},
 56,
 5151,
 4659)

In [20]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("ConvergenceRBA.html")
#fig.write_image("ConvergenceRBA.png", format='png', width=1920, height=1080)

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()

#fig.write_html("ConvergenceRBA+Asset.html")
#fig.write_image("ConvergenceRBA+Asset.png", format='png', width=1920, height=1080)


In [21]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                xaxis_title='Iteration',
                yaxis_title='Sharpe Ratio',
                width=1920,
                height=1080,
                font=dict(
                    family="Cambria",
                    size=18,
                )
)
fig.show()

#### Comparing MLRBA_V1 with Monte Carlo

In [22]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Convergence Portfolios"
))

figC.update_layout(
    title='Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.85, y=0.95),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfConvergence+MC.html")
#figC.write_image("ComparisonOfConvergence+MC.png", format='png', width=1920, height=1080)

### 5.2 Reinforcement Weight Training

In [23]:
def MLRBA_V2(ticker, covariances, returns, num_iterations=None, risk_free_rate = 0, 
             return_power = 1, std_power = 1, return_weight=1/3, corr_weight=1/3, vol_weight= 1/3, num_assets = 8, base_portfolio = None):
    
    if num_iterations is None:
        num_iterations = min(math.comb(len(ticker), num_assets), 100000)

    if base_portfolio is None:
        base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
        #base_portfolio = list(ticker)[:num_assets]

    def _get_portfolio_stats(portfolio_assets, risk_free_rate=0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights, p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)
        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret - risk_free_rate) / np.sqrt(p_var),
        })

    all_portfolios = []

    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()

    highest_weighted_sharpe = -np.inf
    highest_weighted_sharpe = curr_weighted_sharpe

    best_iteration = 0
    portfolios_tested = 0

    learning_rate = 0.03
    improvement_threshold = 0.001

    progress_bar = tqdm(total=num_iterations, desc="Portfolios Tested")
    for i in range(num_iterations):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns, return_weight, corr_weight, vol_weight)

        asset_added = False

        for asset in ranked_assets.index:
            portfolios_tested += 1
            progress_bar.update(1)
            
            copy_new_portfolio = new_portfolio.copy()
            copy_new_portfolio.append(asset)

            new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(copy_new_portfolio, risk_free_rate)
            _update_portfolios_array(all_portfolios, copy_new_portfolio, new_p_weights, new_p_return, new_p_variance)

            if new_weighted_sharpe > highest_weighted_sharpe:
                best_iteration = portfolios_tested
                improvement = new_weighted_sharpe - highest_weighted_sharpe
                highest_weighted_sharpe = new_weighted_sharpe
                best_portfolio = copy_new_portfolio
                curr_ret, curr_var = new_returns, new_var

                asset_added = True

                asset_return = returns.loc[asset]
                asset_vol = np.sqrt(covariances.loc[asset, asset])
                avg_return = returns.mean()
                avg_vol = np.sqrt(np.diag(covariances)).mean()

                corr_with_portfolio = correlation_matrix.loc[copy_new_portfolio, asset].drop(asset).mean()
                avg_corr_in_portfolio = correlation_matrix.loc[copy_new_portfolio].drop(asset, axis=1).mean().mean()

                # Update weights using the current learning rate
                return_weight += learning_rate * (asset_return - avg_return) / avg_return
                vol_weight    += learning_rate * (avg_vol - asset_vol) / avg_vol
                corr_weight   += learning_rate * (avg_corr_in_portfolio - corr_with_portfolio) / avg_corr_in_portfolio

                total = return_weight + corr_weight + vol_weight
                return_weight /= total
                corr_weight /= total
                vol_weight /= total

                if improvement < improvement_threshold:
                    learning_rate *= 0.95
                else:
                    learning_rate *= 1.01

                _update_portfolios_array(good_portfolios, copy_new_portfolio, new_p_weights, new_p_return, new_p_variance)
                break  # stop at first valid improving asset

        if not asset_added:
            print("All assets have been tested or no improvement possible.")
            break

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios, best_iteration


base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V2(names, cov, annualized_returns)
best_portfolio, best_portfolio['sharpe'], best_iteration

Portfolios Tested:   1%|          | 936/100000 [00:05<10:24, 158.69it/s]

All assets have been tested or no improvement possible.





({'tickers': ['JNPR', 'NVDA', 'PGR', 'WMT', 'FOX', 'BSX', 'K', 'T'],
  'weights': array([0.16406503, 0.08665345, 0.12596287, 0.16264601, 0.11552137,
         0.14564759, 0.09869546, 0.10080821]),
  'return': np.float64(0.5678112374614321),
  'variance': np.float64(0.008132405171333372),
  'sharpe': np.float64(6.296431448487679)},
 np.float64(6.296431448487679),
 445)

In [24]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Learning Convergence Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("LearningConvergenceRBA.html")
#fig.write_image("LearningConvergenceRBA.png", format='png', width=1920, height=1080)

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Learning Convergence Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()
#fig.write_html("LearningConvergenceRBA+Asset.html")
#fig.write_image("LearningConvergenceRBA+Asset.png", format='png', width=1920, height=1080)


In [25]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                  xaxis_title='Iteration',
                  yaxis_title='Sharpe Ratio',
                  )
fig.show()

In [26]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Learning Convergence Portfolios"
))

figC.update_layout(
    title='Learning Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.8, y=0.95),
    width=1920,
    height=1080,
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfLearningConvergence+MC.html")
#figC.write_image("ComparisonOfLearningConvergence+MC.png", format='png', width=1920, height=1080)

### Comparing V1 and V2

In [27]:
def run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs, num_assets=8):

    def generate_rand_port(tickers, num_assets, num_runs):
        rand_port = []
        for _ in range (num_runs):
            base_portfolio = np.random.choice(list(tickers), num_assets, replace=False)
            rand_port.append(base_portfolio)

        return rand_port
    
    random_portfolios = generate_rand_port(names, num_assets=num_assets, num_runs=num_runs)

    total_good_portfolios_length_v1 = 0
    best_portfolios_v1 = []
    best_iterations_v1 = []
    
    total_good_portfolios_length_v2 = 0
    best_portfolios_v2 = []
    best_iterations_v2 = []
    
    for portfolio in random_portfolios:
        base_portfolio_v1, best_portfolio_v1, good_portfolios_v1, _, best_iteration_v1 = MLRBA_V1(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v1 += len(good_portfolios_v1)
        best_portfolios_v1.append(best_portfolio_v1)
        best_iterations_v1.append(best_iteration_v1)
        
        base_portfolio_v2, best_portfolio_v2, good_portfolios_v2, _, best_iteration_v2 = MLRBA_V2(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v2 += len(good_portfolios_v2)
        best_portfolios_v2.append(best_portfolio_v2)
        best_iterations_v2.append(best_iteration_v2)

        print(base_portfolio_v1['tickers'] == base_portfolio_v2['tickers'])

    average_length_v1 = total_good_portfolios_length_v1 / num_runs
    average_iteration_v1 = statistics.fmean(best_iterations_v1)
    std_dev_iteration_v1 = statistics.stdev(best_iterations_v1) if num_runs > 1 else 0

    average_length_v2 = total_good_portfolios_length_v2 / num_runs
    average_iteration_v2 = statistics.fmean(best_iterations_v2)
    std_dev_iteration_v2 = statistics.stdev(best_iterations_v2) if num_runs > 1 else 0

    
    results = {
        'v1': (base_portfolio_v1, average_length_v1, best_portfolios_v1, average_iteration_v1, std_dev_iteration_v1, best_iterations_v1),
        'v2': (base_portfolio_v2, average_length_v2, best_portfolios_v2, average_iteration_v2, std_dev_iteration_v2, best_iterations_v2)
    }
    
    return results

num_runs = 20
results = run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs)

_, _, best_portfolios_v1, average_iteration_v1, std_dev_v1, best_iterations_v1 = results['v1']
_, _, best_portfolios_v2, average_iteration_v2, std_dev_v2, best_iterations_v2 = results['v2']

Portfolios Tested:   3%|▎         | 2502/100000 [00:15<10:07, 160.56it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 711/100000 [00:04<10:29, 157.70it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 3226/100000 [00:18<09:06, 177.24it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 781/100000 [00:04<09:51, 167.68it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   2%|▏         | 2300/100000 [00:14<10:07, 160.85it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 607/100000 [00:04<11:31, 143.78it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▍         | 4183/100000 [00:25<09:38, 165.60it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 1003/100000 [00:06<09:58, 165.40it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 3255/100000 [00:18<09:20, 172.75it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 691/100000 [00:04<09:42, 170.53it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▍         | 4324/100000 [00:25<09:29, 167.90it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 979/100000 [00:06<10:09, 162.57it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▍         | 4304/100000 [00:25<09:19, 171.11it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 1030/100000 [00:06<10:14, 161.01it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▎         | 3691/100000 [00:21<09:27, 169.74it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 1051/100000 [00:06<10:30, 156.86it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▍         | 3804/100000 [00:23<09:57, 161.13it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 968/100000 [00:06<10:57, 150.65it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 2806/100000 [00:17<10:23, 155.98it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 821/100000 [00:05<10:22, 159.41it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 2815/100000 [00:15<08:56, 181.24it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 687/100000 [00:04<09:57, 166.26it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▎         | 3522/100000 [00:21<09:42, 165.74it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 731/100000 [00:04<10:21, 159.77it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 3184/100000 [00:20<10:13, 157.75it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 912/100000 [00:05<10:49, 152.62it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 2529/100000 [00:14<09:09, 177.42it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 654/100000 [00:04<10:07, 163.42it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▎         | 3642/100000 [00:21<09:19, 172.13it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 920/100000 [00:05<10:24, 158.60it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 3390/100000 [00:20<09:32, 168.80it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 938/100000 [00:05<10:21, 159.48it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▍         | 3984/100000 [00:24<09:49, 162.79it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 891/100000 [00:05<10:46, 153.18it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   5%|▍         | 4504/100000 [00:26<09:26, 168.69it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 1025/100000 [00:06<10:15, 160.68it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   4%|▎         | 3523/100000 [00:21<09:38, 166.88it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 850/100000 [00:05<10:15, 160.98it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   3%|▎         | 2920/100000 [00:17<09:56, 162.64it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   1%|          | 887/100000 [00:05<10:28, 157.69it/s]

All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]





In [28]:
from plotly.subplots import make_subplots

avg_sharpe_v1 = np.mean([portfolio['sharpe'] for portfolio in best_portfolios_v1])
avg_sharpe_v2 = np.mean([portfolio['sharpe'] for portfolio in best_portfolios_v2])

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=[
        "Average Iterations to Find Best Portfolio",
        "Average Highest Sharpe Ratio"
    ]
)

fig.add_trace(go.Bar(
    x=['Standard Convergence', 'Learning Convergergence'],
    y=[average_iteration_v1, average_iteration_v2],
    name='Iterations',
    error_y=dict(type='data', array=[std_dev_v1, std_dev_v2], visible=True),
    width=0.4
), row=1, col=1)

fig.add_trace(go.Bar(
    x=['Standard Convergence', 'Learning Convergergence'],
    y=[avg_sharpe_v1, avg_sharpe_v2],
    name='Sharpe Ratio',
    width=0.4
), row=1, col=2)

# Axis titles
fig.update_xaxes(title_text='Method Version', row=1, col=1)
fig.update_yaxes(title_text='Average Iterations', row=1, col=1)

fig.update_xaxes(title_text='Method Version', row=1, col=2)
fig.update_yaxes(title_text='Average Sharpe Ratio', row=1, col=2)

# Layout and display
fig.update_layout(
    title_text='Standard Convergence vs Learning Convergence',
    showlegend=False,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("StandardvsLearning.html")
#fig.write_image("StandardvsLearning.png", format='png')

## 6.0 Portfolio Prediction using LSTM

In [81]:
class PortfolioPredictor:
    def __init__(self, raw_data_train, raw_data_test, best_portfolio, n_steps=1, epochs=50, batch_size=32):
        self.raw_data_train = raw_data_train
        self.raw_data_test = raw_data_test
        self.best_portfolio = best_portfolio
        self.n_steps = n_steps
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None
        self.history = None

    def preprocess_data(self):
        best_portfolio_data_train = self.raw_data_train[self.best_portfolio['tickers']]
        best_portfolio_data_test = self.raw_data_test[self.best_portfolio['tickers']]
        weights = np.array(self.best_portfolio['weights'])

        # Use a scaler fitted on a broader dataset so that training/test normalization is consistent
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        # Fit on the entire raw_data (or on a fixed training period) for consistency
        full_data = pd.concat([best_portfolio_data_train, best_portfolio_data_test])
        self.scaler.fit(full_data)
        
        normalized_train_data = self.scaler.transform(best_portfolio_data_train)
        normalized_test_data = self.scaler.transform(best_portfolio_data_test)

        self.weighted_returns_train = np.dot(normalized_train_data, weights)
        self.weighted_returns_test = np.dot(normalized_test_data, weights)

    def create_datasets(self, data):
        X, y = [], []
        for i in range(len(data) - self.n_steps):
            v = data[i:(i + self.n_steps), :]
            X.append(v)
            y.append(data[i + self.n_steps, :])
        return np.array(X), np.array(y)

    def build_model(self):
        self.model = Sequential([
            LSTM(250, activation='relu', return_sequences=True),
            Dropout(0.2),
            LSTM(50, activation='relu', return_sequences=False),
            Dropout(0.2),
            Dense(1),
        ])

        def tf_weighted_mse(y_true, y_pred, power=3):
            n = tf.shape(y_true)[0]
            normalized_index = tf.cond(
                tf.equal(n, 1),
                lambda: tf.ones([n], dtype=tf.float32),
                lambda: tf.cast(tf.range(n), tf.float32) / tf.cast(n - 1, tf.float32)
            )
            weights = tf.pow(normalized_index, power)
            weights += 1e-6
            weights /= tf.reduce_sum(weights)
            
            squared_errors = tf.square(y_true - y_pred)
            weighted_squared_errors = weights * squared_errors
            return tf.reduce_mean(weighted_squared_errors)

        self.model.compile(optimizer='adam', loss=tf_weighted_mse)

    def train_model(self):
        self.X_train_weighted, self.y_train_weighted = self.create_datasets(self.weighted_returns_train.reshape(-1, 1))
        self.history = self.model.fit(self.X_train_weighted, self.y_train_weighted, epochs=self.epochs, batch_size=self.batch_size, validation_split=0.001, shuffle=False, verbose=0)

    def predict(self):
        X_test_weighted, y_test_weighted = self.create_datasets(self.weighted_returns_test.reshape(-1, 1))
        
        self.predictions = self.model.predict(X_test_weighted)
        self.y_test_weighted = y_test_weighted
        
        return self.predictions

    def normalize_cumulative_returns(self, data):
        data_series = pd.Series(data.flatten())
        pct_change = data_series.pct_change().fillna(0)
        cum_returns = (1 + pct_change).cumprod()
        normalized_returns = cum_returns * 100
        return normalized_returns

    def normalize_cumulative_returns_with_baseline(self, data, baseline):
        data_series = pd.Series(data.flatten())
        pct_change = data_series.pct_change().fillna(0)
        cum_returns = (1 + pct_change).cumprod()
        normalized_returns = cum_returns * baseline
        return normalized_returns

    def plot_loss(self):
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=np.arange(1, len(self.history.history['loss'])+1), y=self.history.history['loss'], mode='lines', name='Training Loss'))
        fig.add_trace(go.Scatter(x=np.arange(1, len(self.history.history['val_loss'])+1), y=self.history.history['val_loss'], mode='lines', name='Validation Loss'))
        fig.update_layout(title='Training and Validation Loss Over Epochs',
                          xaxis_title='Epoch',
                          yaxis_title='Loss',
                          legend_title='Type of Loss')
        fig.show()
        
    def plot_predictions(self):
        normalized_train = self.normalize_cumulative_returns(self.y_train_weighted)
        training_end_value = normalized_train.iloc[-1]
        normalized_test = self.normalize_cumulative_returns_with_baseline(self.y_test_weighted, training_end_value)
        normalized_predicted = self.normalize_cumulative_returns_with_baseline(self.predictions, training_end_value)

        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=self.raw_data_train.index,
            y=normalized_train,
            mode='lines',
            name='Actual Training Returns'
        ))
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index[self.n_steps:],
            y=normalized_test,
            mode='lines',
            name='Actual Test Returns'
        ))
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index[self.n_steps:],
            y=normalized_predicted,
            mode='lines',
            name='Predicted Test Returns'
        ))
        fig.update_layout(
            title='Actual vs Predicted Weighted Portfolio Returns',
            xaxis_title='Date',
            yaxis_title='Normalized Returns',
            legend_title='Portfolio'
        )
        fig.show()

In [82]:
investment_length = 60
investment_length -= 1
new_end_date = add_days_to_date(end_date, investment_length)

raw_data, asset_errors, max_combination= fetch_raw_data_yf(assets, start_date, new_end_date)
names, annualized_returns, unweighted_annaulized_returns, weighted_returns_matrix, normal_returns_matrix, cov, correlation_matrix = get_matrices(raw_data)

split = len(raw_data.index) - investment_length
raw_data_train = raw_data.iloc[:split]
raw_data_test = raw_data.iloc[split:]

portfolio_predictor = PortfolioPredictor(raw_data_train, raw_data_test, best_portfolio, n_steps=3, epochs=30)

portfolio_predictor.preprocess_data()
portfolio_predictor.build_model()
portfolio_predictor.train_model()
prediction = portfolio_predictor.predict()   
portfolio_predictor.plot_loss()
portfolio_predictor.plot_predictions()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Omitted assets: ['BF.B', 'BRK.B']
Time to fetch data: 45.08 seconds
Max combination of assets with complete data: 498
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step


In [None]:
def evaluate_portfolios_over_time(raw_data, window_size=5, num_windows=None, threshold=0.05, epochs=30):
    split = len(raw_data.index) - investment_length
    all_good_portfolios = []
    if num_windows is None:
        num_windows = investment_length // window_size
    
    previous_best_portfolio = None 

    for i in tqdm(range(num_windows)):
        curr_split = i * window_size

        loop_raw_data_train = raw_data.iloc[:split + curr_split]
        loop_raw_data_test = raw_data.iloc[split + curr_split:]
        loop_names, loop_annualized_returns, _, _, _, loop_cov, _ = get_matrices(loop_raw_data_train)
        
        _, loop_best_portfolio, loop_good_portfolios, _, _ = MLRBA_V2(loop_names, loop_cov, loop_annualized_returns)
        best_sharpe = loop_best_portfolio['sharpe']
        
        close_to_best = []
        if previous_best_portfolio is not None:
            close_to_best.append(previous_best_portfolio)
        close_to_best.append(loop_best_portfolio)
        
        for j in range(len(loop_best_portfolio)):
            difference = abs((best_sharpe - loop_good_portfolios[j]['sharpe']) / best_sharpe)
            if difference < threshold:
                close_to_best.append(loop_good_portfolios[j])

        print(f'Length of close to best is: {len(close_to_best)}')

        sharpe_list = [portfolio['sharpe'] for portfolio in close_to_best]
        print("Sharpe ratios (first is best_sharpe):", sharpe_list)
        
        portfolio_results = {}
        for id, portfolio in enumerate(close_to_best):
            portfolio_predictor = PortfolioPredictor(loop_raw_data_train, loop_raw_data_test, portfolio, n_steps=window_size, epochs=epochs)
            portfolio_predictor.preprocess_data()
            portfolio_predictor.build_model()
            portfolio_predictor.train_model()
            prediction = portfolio_predictor.predict() 

            if len(prediction) >= window_size:
                end_pred = prediction[window_size-1]
            else:
                end_pred = prediction[-1]
            
            percentage_diff = (end_pred - prediction[0]) / prediction[0]
            print(prediction[:min(window_size, len(prediction))], prediction[0], percentage_diff * 100)          
            
            portfolio_results[id] = percentage_diff

        best_id = None

        # Check if all predictions (percentage_diff) are negative
        if max(portfolio_results.values()) < 0:
            print("All percentage differences are negative. Choosing an empty portfolio (not holding anything).")
            predicted_best_portfolio = {}
        else:
            best_id = max(portfolio_results, key=portfolio_results.get)
            predicted_best_portfolio = close_to_best[best_id]
            previous_best_portfolio = predicted_best_portfolio
        
        start_date = loop_raw_data_test.index[0]
        end_date = loop_raw_data_test.index[window_size-1]
        
        all_good_portfolios.append({
            "portfolio": predicted_best_portfolio,
            "start_date": start_date,
            "end_date": end_date
        })
        if best_id is not None:
            print(f'Current iteration: {i}, the best portfolio found was portfolio: {best_id}')
        else:
            print(f'Current iteration: {i}, no portfolio selected (empty portfolio chosen).')
    
    return all_good_portfolios

In [84]:
all_good_portfolios = evaluate_portfolios_over_time(raw_data, window_size=5, num_windows=None, threshold=0.5, epochs=20)

Portfolios Tested:   1%|          | 568/100000 [00:03<09:41, 171.10it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 6
Sharpe ratios (first is best_sharpe): [np.float64(6.153980807799063), np.float64(3.091975766930055), np.float64(3.198735473664291), np.float64(3.906707972423398), np.float64(4.379821598816636), np.float64(4.393786882298435)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[[0.70620733]
 [0.709071  ]
 [0.7058082 ]
 [0.6966428 ]
 [0.6895444 ]] [0.70620733] [-2.359499]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
[[0.93182325]
 [0.90752685]
 [0.8761602 ]
 [0.84142315]
 [0.80924773]] [0.93182325] [-13.154374]


Portfolios Tested:   0%|          | 124/100000 [12:13<164:00:12,  5.91s/it]


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[[0.8054052 ]
 [0.7844532 ]
 [0.75775206]
 [0.72767854]
 [0.70050806]] [0.8054052] [-13.024145]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step
[[0.8774135 ]
 [0.85870665]
 [0.83210826]
 [0.8026547 ]
 [0.77587247]] [0.8774135] [-11.572769]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[[0.7170722 ]
 [0.7029579 ]
 [0.68149126]
 [0.6574321 ]
 [0.63574743]] [0.7170722] [-11.341223]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step
[[0.80035526]
 [0.78582186]
 [0.7631425 ]
 [0.73727024]
 [0.71323895]] [0.80035526] [-10.884704]
All percentage differences are negative. Choosing an empty portfolio (not holding anything).
Current iteration: 0, no portfolio selected (empty portfolio chosen).


Portfolios Tested:   1%|          | 831/100000 [00:05<10:25, 158.61it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 3
Sharpe ratios (first is best_sharpe): [np.float64(6.321990925272749), np.float64(3.557544346546154), np.float64(3.680188844315587)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.6991193 ]
 [0.7001763 ]
 [0.69699943]
 [0.693495  ]
 [0.6862671 ]] [0.6991193] [-1.8383489]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[[0.7485594 ]
 [0.7413449 ]
 [0.72772646]
 [0.7096593 ]
 [0.6896959 ]] [0.7485594] [-7.8635736]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[[0.6991016 ]
 [0.6930083 ]
 [0.68077207]
 [0.6651125 ]
 [0.6477564 ]] [0.6991016] [-7.344458]
All percentage differences are negative. Choosing an empty portfolio (not holding anything).
Current iteration: 1, no portfolio selected (empty portfolio chosen).


Portfolios Tested:   1%|          | 862/100000 [00:05<10:20, 159.79it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 3
Sharpe ratios (first is best_sharpe): [np.float64(6.351720431268793), np.float64(3.4478009131564886), np.float64(3.5956894352494415)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[[0.60034907]
 [0.5927152 ]
 [0.59625465]
 [0.6013892 ]
 [0.6035052 ]] [0.60034907] [0.52571505]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[[0.693327 ]
 [0.6957772]
 [0.7140952]
 [0.7266992]
 [0.7325693]] [0.693327] [5.659994]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
[[0.6818177 ]
 [0.6871865 ]
 [0.7064205 ]
 [0.7193204 ]
 [0.72353405]] [0.6818177] [6.1184006]
Current iteration: 2, the best portfolio found was portfolio: 2


Portfolios Tested:   1%|          | 769/100000 [00:05<12:16, 134.81it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 6
Sharpe ratios (first is best_sharpe): [np.float64(3.5956894352494415), np.float64(5.969545380047273), np.float64(2.9849007548285704), np.float64(3.5065028963069134), np.float64(3.6513205814014307), np.float64(4.068359325932084)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step
[[0.67762345]
 [0.67107123]
 [0.6667828 ]
 [0.6726354 ]
 [0.68258816]] [0.67762345] [0.7326649]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
[[0.76956946]
 [0.76226825]
 [0.75582224]
 [0.7549299 ]
 [0.76024586]] [0.76956946] [-1.2115341]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step
[[0.6208697 ]
 [0.61141384]
 [0.6003753 ]
 [0.59142303]
 [0.5880643 ]] [0.6208697] [-5.283779]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step
[[0.6406965 ]
 [0.6315247 ]
 [0.6218049 ]
 [0.61583376]
 [0.61581314]] [0.6406965] [-3.883803]
[1m2/2[0

Portfolios Tested:   1%|          | 816/100000 [00:06<12:37, 130.91it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 4
Sharpe ratios (first is best_sharpe): [np.float64(3.5956894352494415), np.float64(6.217219688305028), np.float64(3.156634635580492), np.float64(3.606785024273933)]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step
[[0.7386992]
 [0.7485683]
 [0.7402317]
 [0.725195 ]
 [0.7227186]] [0.7386992] [-2.1633434]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step
[[0.76115376]
 [0.7704884 ]
 [0.77482516]
 [0.7788547 ]
 [0.7847449 ]] [0.76115376] [3.099395]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step
[[0.59480417]
 [0.59756935]
 [0.589133  ]
 [0.5892627 ]
 [0.5958555 ]] [0.59480417] [0.17674838]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
[[0.677283  ]
 [0.68115634]
 [0.6744693 ]
 [0.67211366]
 [0.6814453 ]] [0.677283] [0.6145602]
Current iteration: 4, the best portfolio found was portfolio: 1


Portfolios Tested:   1%|          | 886/100000 [00:06<12:47, 129.19it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 6
Sharpe ratios (first is best_sharpe): [np.float64(6.217219688305028), np.float64(6.221052158435961), np.float64(3.427124228703048), np.float64(3.851966492997067), np.float64(3.8519664930009005), np.float64(4.261499725511627)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[[0.8083264]
 [0.813238 ]
 [0.8223761]
 [0.8320725]
 [0.8400098]] [0.8083264] [3.9196274]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step
[[0.695006  ]
 [0.6983765 ]
 [0.7066246 ]
 [0.7147673 ]
 [0.72135055]] [0.695006] [3.790548]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
[[0.7415045 ]
 [0.7556576 ]
 [0.77468616]
 [0.7885939 ]
 [0.80315465]] [0.7415045] [8.314199]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.7560329]
 [0.7665749]
 [0.7841457]
 [0.7957846]
 [0.8106648]] [0.7560329] [7.2261257]
[1m1/1[0m [32m━━━━━━━━━━━━

Portfolios Tested:   1%|          | 734/100000 [00:05<12:14, 135.22it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(3.427124228703048), np.float64(5.909067323352231), np.float64(2.9926694114316104), np.float64(3.197872390022826), np.float64(3.4599141660028687), np.float64(3.6895550557098353), np.float64(3.6895550557098473)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
[[0.8374149 ]
 [0.8412259 ]
 [0.83264524]
 [0.82376504]
 [0.81712127]] [0.8374149] [-2.423369]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step
[[0.7929876 ]
 [0.7995548 ]
 [0.8005039 ]
 [0.7968428 ]
 [0.78450507]] [0.7929876] [-1.0696908]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
[[0.7986445 ]
 [0.8022522 ]
 [0.7943183 ]
 [0.78159875]
 [0.76517814]] [0.7986445] [-4.1903925]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[[0.74828714]
 [0.7529836 ]
 [0.74697185]
 [0.7360368 ]
 [0.7215501 ]] [0.7

Portfolios Tested:   1%|          | 939/100000 [00:06<12:02, 137.08it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 5
Sharpe ratios (first is best_sharpe): [np.float64(3.427124228703048), np.float64(6.073229821269254), np.float64(3.412368102259525), np.float64(3.55594449157502), np.float64(3.555944493849705)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[[0.73600036]
 [0.7418423 ]
 [0.7530765 ]
 [0.7692691 ]
 [0.7766369 ]] [0.73600036] [5.521266]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step
[[0.7800058]
 [0.7623216]
 [0.757725 ]
 [0.7644447]
 [0.776434 ]] [0.7800058] [-0.45792073]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.6194308 ]
 [0.6305812 ]
 [0.644203  ]
 [0.66193753]
 [0.6663824 ]] [0.6194308] [7.579797]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
[[0.57949984]
 [0.589393  ]
 [0.6005778 ]
 [0.6140843 ]
 [0.6162136 ]] [0.57949984] [6.3354254]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

Portfolios Tested:   1%|          | 805/100000 [00:06<12:44, 129.70it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 3
Sharpe ratios (first is best_sharpe): [np.float64(3.412368102259525), np.float64(6.126618294058226), np.float64(3.3199773246428568)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
[[0.6336558 ]
 [0.6595507 ]
 [0.67476934]
 [0.68274736]
 [0.6946904 ]] [0.6336558] [9.632141]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.8896656 ]
 [0.9082198 ]
 [0.91729605]
 [0.9197434 ]
 [0.9263662 ]] [0.8896656] [4.1252136]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.7441435 ]
 [0.76364505]
 [0.76563025]
 [0.75418365]
 [0.72485125]] [0.7441435] [-2.5925424]
Current iteration: 8, the best portfolio found was portfolio: 0


Portfolios Tested:   1%|          | 686/100000 [00:05<13:43, 120.56it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 4
Sharpe ratios (first is best_sharpe): [np.float64(3.412368102259525), np.float64(5.787295164861739), np.float64(3.192100711671374), np.float64(3.3816773522724164)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
[[0.71469724]
 [0.726319  ]
 [0.7360187 ]
 [0.75429463]
 [0.7641497 ]] [0.71469724] [6.919361]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
[[0.8786286 ]
 [0.88529897]
 [0.89487815]
 [0.89827317]
 [0.8908518 ]] [0.8786286] [1.3911662]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[[0.64002955]
 [0.5897714 ]
 [0.577528  ]
 [0.5653759 ]
 [0.55551016]] [0.64002955] [-13.205544]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step
[[0.65882075]
 [0.6260026 ]
 [0.6191693 ]
 [0.6137941 ]
 [0.6071002 ]] [0.65882075] [-7.850475]
Current iteration: 9, the best portfolio found was portfolio: 0


Portfolios Tested:   1%|          | 711/100000 [00:04<11:23, 145.24it/s]


All assets have been tested or no improvement possible.
Length of close to best is: 7
Sharpe ratios (first is best_sharpe): [np.float64(3.412368102259525), np.float64(5.79520041211257), np.float64(2.997230941581279), np.float64(3.015108901741802), np.float64(3.46383779736459), np.float64(3.6547725943378024), np.float64(3.7102911540730257)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[[0.81710863]
 [0.8299056 ]
 [0.83327854]
 [0.82737446]] [0.81710863] [1.25636]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[[0.7872118]
 [0.7740804]
 [0.7569987]
 [0.7480517]] [0.7872118] [-4.9745283]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[[0.79082507]
 [0.7863193 ]
 [0.780893  ]
 [0.7848377 ]] [0.79082507] [-0.75710124]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[[0.60899734]
 [0.60571563]
 [0.601661  ]
 [0.6045396 ]] [0.60899734] [-0.7319854]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [85]:
def extract_asset_returns(raw_data, assets, start_date, end_date):
    if not isinstance(raw_data.index, pd.DatetimeIndex):
        raw_data.index = pd.to_datetime(raw_data.index)

    filtered_data = raw_data.loc[start_date:end_date, assets]

    return filtered_data

def chain_portfolio_performance(weekly_series_list, starting_value=100):
    continuous_series = pd.Series()
    current_value = starting_value

    for week_series in weekly_series_list:
        # Normalize the week so that it starts at 1 (or current_value)
        week_normalized = week_series / week_series.iloc[0]
        # Scale the normalized week to start at current_value
        week_scaled = week_normalized * current_value
        # Update the current_value to the last value of this week
        current_value = week_scaled.iloc[-1]
        # Append the week_series to the continuous_series
        continuous_series = pd.concat([continuous_series, week_scaled])
    
    return continuous_series

ML_portfolio = []
initial_value = 100

for i in range(len(all_good_portfolios)):
    curr_best_portfolio = all_good_portfolios[i]['portfolio']
    p_start_date = all_good_portfolios[i]['start_date']
    p_end_date = all_good_portfolios[i]['end_date']
    
    if not curr_best_portfolio:
        if i == 0:
            previous_value = initial_value
        else:
            previous_value = ML_portfolio[-1].iloc[-1]
        # Create a series with the same index as in raw_data for the window duration, all at previous_value.
        window_index = raw_data.loc[p_start_date:p_end_date].index
        portfolio_daily_returns = pd.Series(previous_value, index=window_index)
    else:
        best_curr_port_assets = curr_best_portfolio['tickers']
        best_curr_port_assets_test_data = extract_asset_returns(raw_data, best_curr_port_assets, p_start_date, p_end_date)
        curr_best_portfolio_weights = curr_best_portfolio['weights']
        weighted_returns = best_curr_port_assets_test_data.mul(curr_best_portfolio_weights, axis='columns')
        portfolio_daily_returns = weighted_returns.sum(axis=1)
    
    ML_portfolio.append(portfolio_daily_returns)

ML_portfolio_streamed = chain_portfolio_performance(ML_portfolio, starting_value=initial_value)
ML_portfolio_streamed


The behavior of array concatenation with empty entries is deprecated. In a future version, this will no longer exclude empty items when determining the result dtype. To retain the old behavior, exclude the empty entries before the concat operation.



2024-12-03    100.000000
2024-12-04    100.000000
2024-12-05    100.000000
2024-12-06    100.000000
2024-12-09    100.000000
2024-12-10    100.000000
2024-12-11    100.000000
2024-12-12    100.000000
2024-12-13    100.000000
2024-12-16    100.000000
2024-12-17    100.000000
2024-12-18     96.834969
2024-12-19     99.246172
2024-12-20    102.066634
2024-12-23    103.381153
2024-12-24    103.381153
2024-12-26    103.552960
2024-12-27    101.470219
2024-12-30    100.946957
2024-12-31     99.560024
2025-01-02     99.560024
2025-01-03    100.742310
2025-01-06    100.957372
2025-01-07    100.556883
2025-01-08    101.636554
2025-01-10    101.636554
2025-01-13    101.153092
2025-01-14    102.123788
2025-01-15    102.412357
2025-01-16    104.543072
2025-01-17    104.543072
2025-01-21    104.543072
2025-01-22    104.543072
2025-01-23    104.543072
2025-01-24    104.543072
2025-01-27    104.543072
2025-01-28    108.492227
2025-01-29    108.635228
2025-01-30    113.186027
2025-01-31    110.801801


In [87]:
ML_daily_returns = ML_portfolio_streamed.pct_change()
ML_cumulative_returns = (1 + ML_daily_returns).cumprod()

ML_cumulative_returns.iloc[0] = 1
ML_portfolio_normalized = (ML_cumulative_returns / ML_cumulative_returns.iloc[0]) * 100

Nasdaq_comp = getNasdaq_comp(ML_portfolio_streamed.index[0], ML_portfolio_streamed.index[-1])

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100%)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    )
)

[*********************100%***********************]  1 of 1 completed

Omitted assets: []
Time to fetch data: 0.02 seconds
Max combination of assets with complete data: 1





## 7.0 Testing Against Others

In [88]:
best_port_assets = best_portfolio['tickers']
best_port_assets_test_data = raw_data_test.loc[:, best_port_assets]

Nasdaq_comp = getNasdaq_comp(ML_cumulative_returns.index[0], ML_cumulative_returns.index[-1])

best_portfolio_weights = best_portfolio['weights']
normalized_prices = best_port_assets_test_data.div(best_port_assets_test_data.iloc[0])
daily_returns = normalized_prices.pct_change()
weighted_returns = daily_returns.mul(best_portfolio_weights, axis='columns')
portfolio_daily_returns = weighted_returns.sum(axis=1)
portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod()

portfolio_start = portfolio_cumulative_returns.iloc[0]
portfolio_normalized = (portfolio_cumulative_returns / portfolio_start) * 100

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    )
)

[*********************100%***********************]  1 of 1 completed

Omitted assets: []
Time to fetch data: 0.02 seconds
Max combination of assets with complete data: 1





## 8.0 Find Optimal Portfolio Size

In [47]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'sharpe', 3, 50)

100%|██████████| 10000/10000 [03:02<00:00, 54.89it/s]


In [48]:
rd_portfolio_sizes = [len(portfolio['tickers']) for portfolio in all_portfolios]
rd_volatility = [np.sqrt(portfolio['variance']) for portfolio in all_portfolios]
rd_returns = [portfolio['return'] for portfolio in all_portfolios]

volatility_by_size = defaultdict(list)
for size, vol, ret in zip(rd_portfolio_sizes, rd_volatility, rd_returns):
    volatility_by_size[size].append((vol, ret))

average_volatility = {size: np.mean([v[0] for v in vols]) for size, vols in volatility_by_size.items()}
average_returns = {size: np.mean([v[1] for v in vols]) for size, vols in volatility_by_size.items()}

sorted_sizes = sorted(average_volatility.keys())
sorted_average_vols = [average_volatility[size] for size in sorted_sizes]
sorted_average_rets = [average_returns[size] for size in sorted_sizes]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=sorted_sizes,
    y=sorted_average_vols,
    mode='lines',
    name='Average Volatility'
))

fig.update_layout(
    title='Average Volatility and Returns by Portfolio Size',
    xaxis_title='Number of Assets in Portfolio',
    yaxis_title='Average Value',
    xaxis=dict(type='category'),
)

fig.show()

In [None]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'vol', 50, 50)

100%|██████████| 10000/10000 [06:11<00:00, 26.94it/s]


In [54]:
rd_portfolio_sizes = [sum(weight > 0 for weight in portfolio['weights']) for portfolio in all_portfolios]

portfolio_size_counts = Counter(rd_portfolio_sizes)

sizes = sorted(portfolio_size_counts.keys())
counts = [portfolio_size_counts[size] for size in sizes]

fig = go.Figure(data=[go.Bar(x=sizes, y=counts)])

fig.update_layout(
    title='Distribution of Portfolio Sizes After Optimization',
    xaxis_title='Number of assets in portfolio after optimizing',
    yaxis_title='Number of Portfolios',
    xaxis=dict(type='category'),
    yaxis=dict(type='linear')
)

fig.show()

In [51]:
#fig.write_html("PortfolioSize.html")