# Investment Robo-advisor

## 1. Imports

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import statistics
from tqdm import tqdm
from collections import defaultdict, Counter
import math
import random
from itertools import combinations
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

from PortfolioPredictor import PortfolioPredictor, evaluate_portfolios_over_time, extract_asset_returns, chain_portfolio_performance
from fetchData import fetch_raw_data_yf, getSNP500, fetch_raw_data_yf_all, getNasdaq_comp
from LearningRBA import MLRBA_V1, MLRBA_V2
from MonteCarloRBA import MonteCarloRBA
from PortfolioFunction import maximize_sharpe, create_correlation_matrix, get_sharpe_ratio, get_matrices, maximize_sharpe_SLSQP
from LearningRBA import find_best_asset_to_remove, find_asset_to_add


## 2. Fetch Data

### Get all Nasdaq Stocks

In [2]:
assets = getSNP500()
start_date = pd.Timestamp("2021-01-01")
end_date = pd.Timestamp("2023-01-01")

In [3]:
raw_data, asset_errors, max_combination= fetch_raw_data_yf(assets, start_date, end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Omitted assets: ['GEV', 'SOLV', 'BRK.B', 'KVUE', 'VLTO', 'BF.B', 'SW']
Time to fetch data: 77.60 seconds
Max combination of assets with complete data: 493





## 3. Mean, Volatility and Covariance

In [4]:
names, annualized_returns, unweighted_annaulized_returns, weighted_returns_matrix, normal_returns_matrix, cov, correlation_matrix = get_matrices(raw_data)

volatility = np.sqrt(np.diag(cov))
risk_free_rate=0
sharpe_ratios = (annualized_returns - risk_free_rate) / volatility

In [5]:
hover_texts = [
    f"<br>Symbol: {ticker} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
    for ticker, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
]

fig = go.Figure(data=go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=hover_texts,
    marker=dict(color=sharpe_ratios, colorscale = 'RdBu', size=6, line=dict(width=1), colorbar=dict(title="Sharpe<br>Ratio")
    )
))

fig.update_layout(
    title='Annual Performance of Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Returns',
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("PerformanceofIndividualAssets.html")
#fig.write_image("PerformanceofIndividualAssets.png", format='png', width=1920, height=1080)

### Demonstrating SLSQP

In [None]:
rand_assets = np.random.choice(list(names), 5, replace=False)

selected_returns = annualized_returns.loc[rand_assets].values
selected_covariances = cov.loc[rand_assets, rand_assets].values

optimized_weights, weights_history, sharpe_history = maximize_sharpe_SLSQP(selected_returns, selected_covariances)

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Portfolio Weights Over Iterations", "Sharpe Ratio Over Iterations"))

for i in range(len(selected_returns)):
    fig.add_trace(go.Scatter(x=list(range(len(weights_history))), 
                             y=[h[i] for h in weights_history], 
                             mode='lines+markers', 
                             name=f'Asset {i+1} Weight'), 
                  row=1, col=1)

fig.add_trace(go.Scatter(x=list(range(len(sharpe_history))), 
                         y=sharpe_history, 
                         mode='lines+markers', 
                         name='Sharpe Ratio'), 
              row=1, col=2)

fig.update_layout(title_text='Portfolio Optimization Analysis',
                  xaxis_title='Iteration',
                  yaxis_title='Weight',
                  legend_title='Assets',
                    font=dict(
                        family="Cambria",
                        size=18,
                    )
)

# Update xaxis and yaxis properties for Sharpe Ratio subplot
fig.update_xaxes(title_text="Iteration", row=1, col=2)
fig.update_yaxes(title_text="Sharpe Ratio", row=1, col=2)

fig.show()

#fig.write_html("SLSQPDemo.html")
#fig.write_image("SLSQPDemo.png", format='png', width=1920, height=1080)

## 4.0 Monte Carlo Method

In [6]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, min_assets=8, max_assets=8)

100%|██████████| 10000/10000 [00:16<00:00, 607.87it/s]


In [7]:
iterations = [portfolio['iteration'] for portfolio in dominant_portfolios]
counts = list(range(1, len(dominant_portfolios) + 1))

fig = go.Figure()

fig.add_trace(go.Scatter(x=iterations, y=counts,
                        mode='lines',
                        name='Dominant Portfolios',
                        line=dict(shape='spline')
))  

fig.update_layout(
    title='Growth of Dominant Portfolios Over Iterations',
    xaxis_title='Portfolios Generated',
    yaxis_title='Number of Dominant Portfolios Found',
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("FrequencyOfDom.html")
#fig.write_image("FrequencyOfDom.png", format='png', width=1920, height=1080)

In [8]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in all_portfolios],
    y=[p["return"] for p in all_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in all_portfolios],
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (np.sqrt(p['variance'])):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in all_portfolios
    ]
))

fig1.update_layout(
    xaxis=dict(title='Volatility (Standard Deviation)'),
    yaxis=dict(title='Annualised Returns'),
    title='Monte Carlo Randomly Generated Portfolios',
    font=dict(
        family="Cambria",
        size=18,
    )
)
fig1.show()
#fig1.write_html("MonteCarlo.html")
#fig1.write_image("MonteCarlo.png", format='png', width=1920, height=1080)

In [9]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in dominant_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in dominant_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        #showscale=True,
        #color=[p["return"] / (np.sqrt(p["variance"])) for p in dominant_portfolios],  # Sharpe Ratio
        #colorscale="RdBu",
        #colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['sharpe']:.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in dominant_portfolios
    ],
    name="Monte Carlo Portfolios"
))

fig2.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig2.update_layout(
    title='Monte Carlo Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig2.show()
#fig2.write_html("MCMarkowitzBullet.html")
#fig2.write_image("MCMarkowitzBullet.png", format='png', width=1920, height=1080)

## 5.0 Machine Learning Method

### 5.1 Optimization Function Only

In [10]:
base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V1(names, cov, annualized_returns)
base_portfolio, best_portfolio, len(good_portfolios), len(total_portfolios), best_iteration

Portfolios Tested:   4%|▍         | 1998/50000 [00:04<01:43, 463.35it/s]

All assets have been tested or no improvement found.





({'tickers': array(['TROW', 'SWKS', 'EMR', 'ITW', 'UNP', 'KIM', 'CMCSA', 'MCHP'],
        dtype='<U5'),
  'weights': array([0.00000000e+00, 8.77006736e-17, 4.22309629e-02, 1.03520351e-16,
         0.00000000e+00, 9.57769037e-01, 0.00000000e+00, 0.00000000e+00]),
  'return': np.float64(0.35486666867739325),
  'variance': np.float64(0.08653812076878195),
  'sharpe': np.float64(1.2063170873162385),
  'iteration': 0},
 {'tickers': ['XOM', 'IRM', 'AZO', 'MCK', 'LLY', 'TPL', 'MPC', 'MRNA'],
  'weights': array([0.24185356, 0.0563104 , 0.147803  , 0.22636186, 0.22121547,
         0.02046003, 0.06447652, 0.02151916]),
  'return': np.float64(0.5275125719762419),
  'variance': np.float64(0.03338674512105757),
  'sharpe': np.float64(2.886993284118986),
  'iteration': 1513},
 54,
 1999,
 1512)

In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("ConvergenceRBA.html")
#fig.write_image("ConvergenceRBA.png", format='png', width=1920, height=1080)

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Convergence Strategy Generarted Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()

#fig.write_html("ConvergenceRBA+Asset.html")
#fig.write_image("ConvergenceRBA+Asset.png", format='png', width=1920, height=1080)


In [12]:
good_sharpes = [p['sharpe'] for p in good_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(good_sharpes))), y=good_sharpes, mode='markers+lines', name='Good Portfolios'))

fig.update_layout(
    title='Sharpe Ratios Gains as Improvement Portfolios Found',
    xaxis_title='Improvement Portfolio',
    yaxis_title='Sharpe Ratio',
    template='plotly_white',
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("DiminishingSharpeRBA1.html")
#fig.write_image("DiminishingSharpeRBA1.png", format='png', width=1920, height=1080)

#### Comparing MLRBA_V1 with Monte Carlo

In [13]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Convergence Portfolios"
))

figC.update_layout(
    title='Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.85, y=0.95),
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfConvergence+MC.html")
#figC.write_image("ComparisonOfConvergence+MC.png", format='png', width=1920, height=1080)

### 5.2 Reinforcement Weight Training

In [14]:
base_portfolio, best_portfolio, good_portfolios, total_portfolios, best_iteration = MLRBA_V2(names, cov, annualized_returns, correlation_matrix)
base_portfolio, best_portfolio, len(good_portfolios), len(total_portfolios), best_iteration

Portfolios Tested:   2%|▏         | 1132/50000 [00:02<01:43, 473.05it/s]

All assets have been tested or no improvement possible.





({'tickers': array(['NVDA', 'K', 'NTAP', 'EXC', 'OMC', 'V', 'HPE', 'IR'], dtype='<U5'),
  'weights': array([3.80517565e-17, 5.27483416e-02, 0.00000000e+00, 5.66463796e-01,
         1.92640261e-01, 0.00000000e+00, 1.88147601e-01, 0.00000000e+00]),
  'return': np.float64(0.2280294123435775),
  'variance': np.float64(0.03587768847047737),
  'sharpe': np.float64(1.2038673628141576),
  'iteration': 0},
 {'tickers': ['K', 'XOM', 'MPC', 'MCK', 'LLY', 'AZO', 'DVN', 'MRNA'],
  'weights': array([2.50597643e-17, 2.40886761e-01, 6.26531316e-02, 2.37933634e-01,
         2.41075789e-01, 1.59115047e-01, 3.19181414e-02, 2.64174961e-02]),
  'return': np.float64(0.539770692215369),
  'variance': np.float64(0.035264569426814954),
  'sharpe': np.float64(2.8743523674133784),
  'iteration': 647},
 45,
 1133,
 646)

In [15]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in good_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Portfolios"
))

fig.update_layout(
    title='Learning Convergence Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("LearningConvergenceRBA.html")
#fig.write_image("LearningConvergenceRBA.png", format='png', width=1920, height=1080)

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='green',
        size=5,
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Learning Convergence Portfolios with Individual Assets',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5),
    font=dict(
        family="Cambria",
        size=18,
    )
)


fig.show()
#fig.write_html("LearningConvergenceRBA+Asset.html")
#fig.write_image("LearningConvergenceRBA+Asset.png", format='png', width=1920, height=1080)


In [16]:
good_sharpes = [p['sharpe'] for p in good_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(good_sharpes))), y=good_sharpes, mode='markers+lines', name='Good Portfolios'))

fig.update_layout(
    title='Sharpe Ratios Gains as Improvement Portfolios Found',
    xaxis_title='Improvement Portfolio',
    yaxis_title='Sharpe Ratio',
    template='plotly_white',
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("DiminishingSharpeRBA2.html")
#fig.write_image("DiminishingSharpeRBA2.png", format='png', width=1920, height=1080)

#### Comparing MLRBA_V2 with Monte Carlo

In [17]:
figC = go.Figure(fig2)
figC.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in good_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in good_portfolios],
    mode='markers',
    marker=dict(
        size=7,
        line=dict(width=1),
        color="Red",
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in good_portfolios
    ],
    name="Learning Convergence Portfolios"
))

figC.update_layout(
    title='Learning Convergence vs Monte Carlo vs Individual Assets',
    legend=dict(x=0.8, y=0.95),
    font=dict(
        family="Cambria",
        size=18,
    )
)

figC.show()
#figC.write_html("ComparisonOfLearningConvergence+MC.html")
#figC.write_image("ComparisonOfLearningConvergence+MC.png", format='png', width=1920, height=1080)

### Comparing V1 and V2

In [18]:
def run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs, num_assets=8):

    def generate_rand_port(tickers, num_assets, num_runs):
        rand_port = []
        for _ in range (num_runs):
            base_portfolio = np.random.choice(list(tickers), num_assets, replace=False)
            rand_port.append(base_portfolio)

        return rand_port
    
    random_portfolios = generate_rand_port(names, num_assets=num_assets, num_runs=num_runs)

    total_good_portfolios_length_v1 = 0
    best_portfolios_v1 = []
    best_iterations_v1 = []
    
    total_good_portfolios_length_v2 = 0
    best_portfolios_v2 = []
    best_iterations_v2 = []
    
    for portfolio in random_portfolios:
        base_portfolio_v1, best_portfolio_v1, good_portfolios_v1, _, best_iteration_v1 = MLRBA_V1(names, cov, annualized_returns, base_portfolio=portfolio)
        total_good_portfolios_length_v1 += len(good_portfolios_v1)
        best_portfolios_v1.append(best_portfolio_v1)
        best_iterations_v1.append(best_iteration_v1)
        
        base_portfolio_v2, best_portfolio_v2, good_portfolios_v2, _, best_iteration_v2 = MLRBA_V2(names, cov, annualized_returns, correlation_matrix, base_portfolio=portfolio)
        total_good_portfolios_length_v2 += len(good_portfolios_v2)
        best_portfolios_v2.append(best_portfolio_v2)
        best_iterations_v2.append(best_iteration_v2)

        print(base_portfolio_v1['tickers'] == base_portfolio_v2['tickers'])

    average_length_v1 = total_good_portfolios_length_v1 / num_runs
    average_iteration_v1 = statistics.fmean(best_iterations_v1)
    std_dev_iteration_v1 = statistics.stdev(best_iterations_v1) if num_runs > 1 else 0

    average_length_v2 = total_good_portfolios_length_v2 / num_runs
    average_iteration_v2 = statistics.fmean(best_iterations_v2)
    std_dev_iteration_v2 = statistics.stdev(best_iterations_v2) if num_runs > 1 else 0

    
    results = {
        'v1': (base_portfolio_v1, average_length_v1, best_portfolios_v1, average_iteration_v1, std_dev_iteration_v1, best_iterations_v1),
        'v2': (base_portfolio_v2, average_length_v2, best_portfolios_v2, average_iteration_v2, std_dev_iteration_v2, best_iterations_v2)
    }
    
    return results

num_runs = 20
results = run_MLRBA_multiple_times(names, cov, annualized_returns, num_runs)

_, _, best_portfolios_v1, average_iteration_v1, std_dev_v1, best_iterations_v1 = results['v1']
_, _, best_portfolios_v2, average_iteration_v2, std_dev_v2, best_iterations_v2 = results['v2']

Portfolios Tested:   4%|▍         | 1916/50000 [00:04<01:42, 467.94it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   3%|▎         | 1337/50000 [00:02<01:43, 472.17it/s]


All assets have been tested or no improvement possible.
[ True  True  True  True  True  True  True  True]


Portfolios Tested:   5%|▍         | 2305/50000 [00:04<01:39, 478.70it/s]


All assets have been tested or no improvement found.


Portfolios Tested:   3%|▎         | 1253/50000 [00:02<01:50, 440.28it/s]

KeyboardInterrupt: 

Portfolios Tested:   3%|▎         | 1264/50000 [00:22<01:50, 440.28it/s]

In [None]:
from plotly.subplots import make_subplots

avg_sharpe_v1 = np.mean([portfolio['sharpe'] for portfolio in best_portfolios_v1])
avg_sharpe_v2 = np.mean([portfolio['sharpe'] for portfolio in best_portfolios_v2])

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=[
        "Average Iterations to Find Best Portfolio",
        "Average Highest Sharpe Ratio"
    ]
)

fig.add_trace(go.Bar(
    x=['Standard Convergence', 'Learning Convergergence'],
    y=[average_iteration_v1, average_iteration_v2],
    name='Iterations',
    error_y=dict(type='data', array=[std_dev_v1, std_dev_v2], visible=True),
    width=0.4
), row=1, col=1)

fig.add_trace(go.Bar(
    x=['Standard Convergence', 'Learning Convergergence'],
    y=[avg_sharpe_v1, avg_sharpe_v2],
    name='Sharpe Ratio',
    width=0.4
), row=1, col=2)

# Axis titles
fig.update_xaxes(title_text='Method Version', row=1, col=1)
fig.update_yaxes(title_text='Average Iterations', row=1, col=1)

fig.update_xaxes(title_text='Method Version', row=1, col=2)
fig.update_yaxes(title_text='Average Sharpe Ratio', row=1, col=2)

# Layout and display
fig.update_layout(
    title_text='Standard Convergence vs Learning Convergence',
    showlegend=False,
    font=dict(
        family="Cambria",
        size=18,
    )
)

fig.show()
#fig.write_html("StandardvsLearning.html")
#fig.write_image("StandardvsLearning.png", format='png', width=1920, height=1080)

## 6.0 Portfolio Prediction using LSTM

In [19]:
investment_length = 730
investment_end_date = end_date + pd.Timedelta(days=investment_length)

raw_data, asset_errors, max_combination= fetch_raw_data_yf(assets, start_date, investment_end_date)
names, annualized_returns, unweighted_annaulized_returns, weighted_returns_matrix, normal_returns_matrix, cov, correlation_matrix = get_matrices(raw_data)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Omitted assets: ['BRK.B', 'BF.B']
Time to fetch data: 77.46 seconds
Max combination of assets with complete data: 493


In [20]:
raw_data_train = raw_data.loc[:end_date]
raw_data_test = raw_data.loc[end_date:]

portfolio_predictor = PortfolioPredictor(raw_data_train, raw_data_test, best_portfolio, n_steps=5, epochs=30, activation_function='relu')

portfolio_predictor.preprocess_data()
portfolio_predictor.build_model()
portfolio_predictor.train_model()
prediction = portfolio_predictor.predict()   
portfolio_predictor.plot_loss()
portfolio_predictor.plot_predictions()
portfolio_predictor.compute_performance()

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


Mean Percentage Error:2.6556241346388108%, Accuracy: 97.34437586536119


(np.float64(2.6556241346388108), np.float64(97.34437586536119))

### Full Algorithm

In [None]:
investments = evaluate_portfolios_over_time(raw_data, end_date, window_size=5, epochs=30, length_of_investment = None, candidates_per_divison = 2, candidates_divison=3)

In [None]:
ML_portfolio = []
portfolio_holdings = []  # To store tickers held at each point
initial_value = 100

for i in range(len(investments)):
    curr_best_portfolio = investments[i]['portfolio']
    p_start_date = investments[i]['start_date']
    p_end_date = investments[i]['end_date']
    
    if not curr_best_portfolio:
        # No portfolio for this period
        if i == 0:
            previous_value = initial_value
        else:
            previous_value = ML_portfolio[-1].iloc[-1]
        window_index = raw_data.loc[p_start_date:p_end_date].index
        portfolio_daily_returns = pd.Series(previous_value, index=window_index)
        held_tickers = []  # Nothing held
    else:
        # Portfolio exists for this period
        best_curr_port_assets = curr_best_portfolio['tickers']
        best_curr_port_assets_test_data = extract_asset_returns(raw_data, best_curr_port_assets, p_start_date, p_end_date)
        curr_best_portfolio_weights = curr_best_portfolio['weights']
        weighted_returns = best_curr_port_assets_test_data.mul(curr_best_portfolio_weights, axis='columns')
        portfolio_daily_returns = weighted_returns.sum(axis=1)
        held_tickers = best_curr_port_assets

    ML_portfolio.append(portfolio_daily_returns)
    portfolio_holdings.append((p_start_date, p_end_date, held_tickers))

ML_portfolio_streamed = chain_portfolio_performance(ML_portfolio, starting_value=initial_value)

# Print tickers held at each point
for period_info in portfolio_holdings:
    start, end, tickers = period_info
    print(f"Held from {start} to {end}: {tickers}")

#Number of unique assets held
unique_assets = set()
for _, _, tickers in portfolio_holdings:
    unique_assets.update(tickers)

print(f"\nNumber of unique tickers held: {len(unique_assets)}")

ML_portfolio_streamed

In [None]:
ML_daily_returns = ML_portfolio_streamed.pct_change()
ML_cumulative_returns = (1 + ML_daily_returns).cumprod()

ML_cumulative_returns.iloc[0] = 1
ML_portfolio_normalized = (ML_cumulative_returns / ML_cumulative_returns.iloc[0]) * 100

Nasdaq_comp = getNasdaq_comp(ML_portfolio_streamed.index[0], ML_portfolio_streamed.index[-1])

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio Growth'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth',
    xaxis_title='Date',
    yaxis_title='Normalized Value (Base 100)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    ),
    font=dict(
        family="Cambria",
        size=18,
    )
)
fig.show()
#fig.write_html("RBAVsNasdaq.html")
#fig.write_image("RBAVsNasdaq.png", format='png', width=1920, height=1080)

## 7.0 Testing Against Others

In [None]:
best_port_assets = best_portfolio['tickers']
best_port_assets_test_data = raw_data_test.loc[:, best_port_assets]

Nasdaq_comp = getNasdaq_comp(ML_cumulative_returns.index[0], ML_cumulative_returns.index[-1])

best_portfolio_weights = best_portfolio['weights']
normalized_prices = best_port_assets_test_data.div(best_port_assets_test_data.iloc[0])
daily_returns = normalized_prices.pct_change()
weighted_returns = daily_returns.mul(best_portfolio_weights, axis='columns')
portfolio_daily_returns = weighted_returns.sum(axis=1)
portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod()

portfolio_start = portfolio_cumulative_returns.iloc[0]
portfolio_normalized = (portfolio_cumulative_returns / portfolio_start) * 100

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=Nasdaq_comp.index,
    y=Nasdaq_comp['Normalized'],
    mode='lines',
    name='Nasdaq Composite'
))

fig.add_trace(go.Scatter(
    x=ML_cumulative_returns.index,
    y=ML_portfolio_normalized,
    mode='lines',
    name='Portfolio with Adjustments'
))

fig.add_trace(go.Scatter(
    x=portfolio_normalized.index,
    y=portfolio_normalized,
    mode='lines',
    name='Base Portfolio'
))

fig.update_layout(
    title='Comparison of Portfolio vs. Nasdaq Composite Growth : 2023-2025',
    xaxis_title='Date',
    yaxis_title='Normalized Returns (Base 100%)',
    xaxis=dict(
        type='date',
        tickformat='%b %Y',
        tickmode='auto'
    ),
    font=dict(
        family="Cambria",
        size=18,
    )
)
fig.show()
#fig.write_html("MLRBAvsRBAvsNasdaq.html")
#fig.write_image("MLRBAvsRBAvsNasdaq.png", format='png', width=1920, height=1080)

## 8.0 Find Optimal Portfolio Size

In [None]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'sharpe', 3, 50)

In [None]:
rd_portfolio_sizes = [len(portfolio['tickers']) for portfolio in all_portfolios]
rd_volatility = [np.sqrt(portfolio['variance']) for portfolio in all_portfolios]
rd_returns = [portfolio['return'] for portfolio in all_portfolios]

volatility_by_size = defaultdict(list)
for size, vol, ret in zip(rd_portfolio_sizes, rd_volatility, rd_returns):
    volatility_by_size[size].append((vol, ret))

average_volatility = {size: np.mean([v[0] for v in vols]) for size, vols in volatility_by_size.items()}
average_returns = {size: np.mean([v[1] for v in vols]) for size, vols in volatility_by_size.items()}

sorted_sizes = sorted(average_volatility.keys())
sorted_average_vols = [average_volatility[size] for size in sorted_sizes]
sorted_average_rets = [average_returns[size] for size in sorted_sizes]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=sorted_sizes,
    y=sorted_average_vols,
    mode='lines',
    name='Average Volatility'
))

fig.update_layout(
    title='Average Volatility and Returns by Portfolio Size',
    xaxis_title='Number of Assets in Portfolio',
    yaxis_title='Average Value',
    xaxis=dict(type='category'),
)

fig.show()

In [None]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 10000, 'vol', 50, 50)

In [None]:
rd_portfolio_sizes = [sum(weight > 0 for weight in portfolio['weights']) for portfolio in all_portfolios]

portfolio_size_counts = Counter(rd_portfolio_sizes)

sizes = sorted(portfolio_size_counts.keys())
counts = [portfolio_size_counts[size] for size in sizes]

fig = go.Figure(data=[go.Bar(x=sizes, y=counts)])

fig.update_layout(
    title='Distribution of Portfolio Sizes After Optimization',
    xaxis_title='Number of assets in portfolio after optimizing',
    yaxis_title='Number of Portfolios',
    xaxis=dict(type='category'),
    yaxis=dict(type='linear')
)

fig.show()

In [None]:
#fig.write_html("PortfolioSize.html")