<a href="https://colab.research.google.com/github/LinusTC/RoboAdvisor/blob/main/MarkowitzBullet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Markowitz Efficient Frontier

## 1. Imports

In [215]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from tqdm import tqdm
from collections import defaultdict

from fetchData import fetch_raw_data_yf, get_matrices, getNasdaqStocks
from MonteCarloRBA import MonteCarloRBA
from portfolioFunction import maximize_sharpe, create_correlation_matrix, get_sharpe_ratio
from LearningRBA import find_best_asset_to_remove


## 2. Fetch Data

### Get all Nasdaq Stocks

In [216]:
assets= [
    "AAPL",  # Apple Inc.
    "MSFT",  # Microsoft Corporation
    "AMZN",  # Amazon.com Inc.
    "GOOGL", # Alphabet Inc. (Google) Class A
    "GOOG",  # Alphabet Inc. (Google) Class C
    "META",    # Meta Platforms Inc (formerly Facebook)
    "TSLA",  # Tesla Inc
    "UA", # Berkshire Hathaway Inc. Class B
    "JPM",   # JPMorgan Chase & Co.
    "V",     # Visa Inc.
    "JNJ",   # Johnson & Johnson
    "WMT",   # Walmart Inc.
    "PG",    # Procter & Gamble Co.
    "UNH",   # UnitedHealth Group Inc.
    "MA",    # Mastercard Inc.
    "NVDA",  # NVIDIA Corporation
    "HD",    # Home Depot Inc.
    "BAC",   # Bank of America Corp
    "DIS",   # Walt Disney Co
    "PYPL",  # PayPal Holdings
    "VZ",    # Verizon Communications Inc.
    "ADBE",  # Adobe Inc.
    "CMCSA", # Comcast Corporation
    "NFLX",  # Netflix Inc.
    "KO",    # Coca-Cola Co
    "NKE",   # NIKE Inc.
    "PFE",   # Pfizer Inc.
    "MRK",   # Merck & Co., Inc.
    "PEP",   # PepsiCo, Inc.
    "T",     # AT&T Inc.
    "ABT",   # Abbott Laboratories
    "CRM",   # Salesforce.com Inc.
    "ORCL",  # Oracle Corporation
    "ABBV",  # AbbVie Inc.
    "CSCO",  # Cisco Systems, Inc.
    "INTC",  # Intel Corporation
    "TMO",   # Thermo Fisher Scientific Inc.
    "XOM",   # Exxon Mobil Corporation
    "ACN",   # Accenture plc
    "LLY",   # Eli Lilly and Company
    "COST",  # Costco Wholesale Corporation
    "MCD",   # McDonald's Corp
    "DHR",   # Danaher Corporation
    "MDT",   # Medtronic plc
    "NEE",   # NextEra Energy, Inc.
    "BMY",   # Bristol-Myers Squibb Company
    "QCOM",  # Qualcomm Inc
    "CVX",   # Chevron Corporation
    "WFC",   # Wells Fargo & Co
    "LMT",    # Lockheed Martin Corporation
    "GS",   # Goldman Sachs Group, Inc.
    "MS",   # Morgan Stanley
    "IBM",  # International Business Machines Corporation
    "GE",   # General Electric Company
    "F",    # Ford Motor Company
    "GM",   # General Motors Company
    "UBER", # Uber Technologies, Inc.
    "LYFT", # Lyft, Inc.
    "SNAP", # Snap Inc.
    "TWTR", # Twitter, Inc.
    "SPOT", # Spotify Technology S.A.
    "AMD",  # Advanced Micro Devices, Inc.
    "TXN",  # Texas Instruments Incorporated
    "BABA", # Alibaba Group Holding Limited
    "SAP",  # SAP SE
    "HON",  # Honeywell International Inc.
    "BA",   # Boeing Company
    "RTX",  # Raytheon Technologies Corporation
    "CAT",  # Caterpillar Inc.
    "DE",   # Deere & Company
    "MMM",  # 3M Company
    "DUK",  # Duke Energy Corporation
    "SO",   # Southern Company
    "EXC",  # Exelon Corporation
    "NEE",  # NextEra Energy, Inc.
    "AEP",  # American Electric Power Company, Inc.
    "SRE",  # Sempra Energy
    "ETN",  # Eaton Corporation plc
    "EMR",  # Emerson Electric Co.
    "SYY",  # Sysco Corporation
    "KR",   # Kroger Co.
    "GIS",  # General Mills, Inc.
    "K",    # Kellogg Company
    "CPB",  # Campbell Soup Company
    "MO",   # Altria Group, Inc.
    "PM",   # Philip Morris International Inc.
    "BTI",  # British American Tobacco plc
    "RDY",  # Dr. Reddy's Laboratories Ltd.
    "GILD", # Gilead Sciences, Inc.
    "BIIB", # Biogen Inc.
    "CELG", # Celgene Corporation
    "AMGN", # Amgen Inc.
    "SYK",  # Stryker Corporation
    "BSX",  # Boston Scientific Corporation
    "ISRG", # Intuitive Surgical, Inc.
    "ZBH",  # Zimmer Biomet Holdings, Inc.
    "EW",   # Edwards Lifesciences Corporation
    "RMD",  # ResMed Inc.
    "VRTX", # Vertex Pharmaceuticals Incorporated
    "REGN",  # Regeneron Pharmaceuticals, Inc.
]

len(assets)

100

In [217]:
raw_data, asset_errors, max_combination= fetch_raw_data_yf(assets)

[*********************100%***********************]  99 of 99 completed

5 Failed downloads:
['SPOT', 'LYFT', 'UBER']: YFPricesMissingError('possibly delisted; no price data found  (1d 2015-01-01 -> 2018-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1420088400, endDate = 1514782800")')
['CELG', 'TWTR']: YFTzMissingError('possibly delisted; no timezone found')


Omitted assets ( 5 ):  ['SPOT', 'CELG', 'TWTR', 'LYFT', 'UBER']
Time to fetch data: 3.54 seconds


## 3. Mean, Volatility and Covariance

In [218]:
names, cov, annualized_returns = get_matrices(raw_data, max_combination, None)[0]
correlation_matrix = create_correlation_matrix(cov)
volatility = np.sqrt(np.diag(cov))

risk_free_rate=0
sharpe_ratios = (annualized_returns - risk_free_rate) / volatility

1it [00:00, 253.08it/s]


In [219]:
hover_texts = [
    f"{ticker} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
    for ticker, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
]

fig = go.Figure(data=go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=hover_texts,
    marker=dict(color=sharpe_ratios, colorscale = 'RdBu', size=6, line=dict(width=1), colorbar=dict(title="Sharpe<br>Ratio")
    )
))

fig.update_layout(
    title='Markowitz Mean Varience Model',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Returns',
)

fig.show()


## 4.0 Monte Carlo Method

In [220]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 1000)

100%|██████████| 1000/1000 [00:02<00:00, 407.32it/s]


In [221]:
print (len(dominant_portfolios) ,len(all_portfolios))

77 1000


In [222]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in all_portfolios],
    y=[p["return"] for p in all_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in all_portfolios],
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (np.sqrt(p['variance'])):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in all_portfolios
    ]
))

fig1.update_layout(
    xaxis=dict(title='Volatility (Standard Deviation)'),
    yaxis=dict(title='Annualised Returns'),
    title='Sample of Random Portfolios'
)

fig1.show()

In [223]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=[np.sqrt(p["variance"]) for p in dominant_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in dominant_portfolios],
    mode='markers',
    marker=dict(
        color=[p["return"] / (np.sqrt(p["variance"])) for p in dominant_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {np.sqrt(p['variance']):.3f}<br>" +
        f"Sharpe Ratio: {p['sharpe']:.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in dominant_portfolios
    ],
    name="Portfolios"
))

fig2.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='brown',
        size=5,
        symbol='triangle-up',  # Sets the marker shape to a triangle
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig2.update_layout(
    title='Sample of Random Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5
    )
)


fig2.show()

## 5.0 ML Method

In [None]:
def MLRBA(ticker, covariances, returns, num_iterations=1000, risk_free_rate = 0, return_power = 1, std_power = 1, return_weight=2/5, corr_weight=1/5, vol_weight= 2/5):
    num_assets =  8
    base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
    base_portfolio = list(ticker)[:num_assets]
    highest_weighted_sharpe = -np.inf

    all_portfolios = []

    tested_assets = set()

    def _get_portfolio_stats (portfolio_assets, risk_free_rate = 0):
        p_asset_ret = returns.loc[portfolio_assets].values
        p_asset_var = covariances.loc[portfolio_assets, portfolio_assets].values
        best_p_weights = maximize_sharpe(p_asset_ret, p_asset_var)
        p_ret = np.dot(best_p_weights,p_asset_ret)
        p_var = np.dot(best_p_weights, p_asset_var @ best_p_weights)
        sharpe = get_sharpe_ratio(p_ret, p_var, risk_free_rate, return_power, std_power)

        return p_asset_ret, p_asset_var, sharpe, p_ret, p_var, best_p_weights

    def _update_portfolios_array(portfolios, assets, weights, p_ret, p_var):
        portfolios.append({
            "tickers": assets,
            "weights": weights,
            "return": p_ret,
            "variance": p_var,
            "sharpe": (p_ret-risk_free_rate)/np.sqrt(p_var),
        })

    curr_ret, curr_var, curr_weighted_sharpe, curr_p_return, curr_p_variance, curr_p_weights = _get_portfolio_stats(base_portfolio, risk_free_rate)
    _update_portfolios_array(all_portfolios, base_portfolio, curr_p_weights, curr_p_return, curr_p_variance)

    good_portfolios = all_portfolios.copy()
    best_portfolio = base_portfolio.copy()

    highest_weighted_sharpe = curr_weighted_sharpe
    for _ in tqdm(range(num_iterations)):
        asset_to_remove = find_best_asset_to_remove(best_portfolio, curr_var, curr_ret, return_weight, corr_weight, vol_weight)     #most_correlated_asset, _, _ = find_correlation_matrix(portfolio, curr_variances)
        new_portfolio = [str(asset) for asset in best_portfolio if asset != asset_to_remove]

        ranked_assets = find_asset_to_add(new_portfolio, ticker, covariances, returns, return_weight, corr_weight, vol_weight)         # Find the next best asset to add to the portfolio
        asset_to_add = ranked_assets.index[0]

        for asset in ranked_assets.index:
            if asset not in tested_assets:
                asset_to_add = asset
                break

        new_portfolio.append(asset_to_add)
        tested_assets.add(asset_to_add)

        # Substitute in and measure portfolio performance based on sharpe ratio
        new_returns, new_var, new_weighted_sharpe, new_p_return, new_p_variance, new_p_weights = _get_portfolio_stats(new_portfolio, risk_free_rate)

        _update_portfolios_array(all_portfolios, new_portfolio, new_p_weights, new_p_return, new_p_variance)

        if new_weighted_sharpe > highest_weighted_sharpe:
            highest_weighted_sharpe = new_weighted_sharpe
            best_portfolio = new_portfolio
            curr_ret, curr_var = new_returns, new_var

            _update_portfolios_array(good_portfolios, new_portfolio, new_p_weights, new_p_return, new_p_variance)

            tested_assets.clear()

        # If Sharpe ratio was worse, then move on to the next least correlated asset
        # If Sharpe ratio is better, set as new base portfolio, and repeat the process for num_iterations times
        # Adjust the sharpe ratio, maybe more emphasis on returns/volatility
        # Update weights to value return or corr
        # See how many iterations it takes to get here, whats a good threshold/stopping point
        # Backtesting
        # Train a model to maybe predict the sharpe ratio of a portfolio

    base_details = good_portfolios[0]
    best_details = good_portfolios[-1]

    return base_details, best_details, good_portfolios, all_portfolios

base_portfolio, best_portfolio, good_portfolios, total_portfolios = MLRBA(names, cov, annualized_returns, 300)
base_portfolio, best_portfolio


100%|██████████| 300/300 [00:02<00:00, 135.12it/s]


({'tickers': ['BA_Close',
   'MCD_Close',
   'T_Close',
   'SO_Close',
   'CPB_Close',
   'UA_Close',
   'CVX_Close',
   'VZ_Close'],
  'weights': array([4.25009815e-01, 4.36521607e-01, 3.78130817e-15, 0.00000000e+00,
         0.00000000e+00, 1.39181695e-14, 1.38468578e-01, 0.00000000e+00]),
  'return': np.float64(0.43812138941036305),
  'variance': np.float64(0.010073871734639592),
  'sharpe': np.float64(4.3651206088251815)},
 {'tickers': ['BA_Close',
   'MCD_Close',
   'SO_Close',
   'CVX_Close',
   'ABBV_Close',
   'UNH_Close',
   'WMT_Close',
   'CAT_Close'],
  'weights': array([2.41173955e-01, 1.18220137e-01, 5.76581256e-16, 5.63014338e-02,
         2.05201879e-01, 9.58477524e-02, 1.53583598e-01, 1.29671244e-01]),
  'return': np.float64(0.49024817463473286),
  'variance': np.float64(0.007244346183071566),
  'sharpe': np.float64(5.759919151053737)})

In [225]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in total_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in total_portfolios],
    mode='markers',
    marker=dict(
        color=[p["sharpe"] for p in total_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.3%}<br>Volatility: {p['variance']**0.5:.3f}<br>" +
        f"Sharpe Ratio: {p['return'] / (p['variance']**0.5):.3f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.3f}" for i in range(len(p['tickers']))])
        for p in total_portfolios
    ],
    name="Portfolios"
))

fig.add_trace(go.Scatter(
    x=volatility,
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.3f} <br>Returns: {ret:.3%} <br>Sharpe Ratio: {sr:.3f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='brown',
        size=5,
        symbol='triangle-up',  # Sets the marker shape to a triangle
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig.update_layout(
    title='Sample of Random Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5
    )
)


fig.show()

In [226]:
sharpe_ratios = [portfolio['sharpe'] for portfolio in total_portfolios]

fig = go.Figure(data=go.Scatter(x=list(range(len(sharpe_ratios))), y=sharpe_ratios, mode='lines+markers'))
fig.update_layout(title='Sharpe Ratio Over Iterations',
                  xaxis_title='Iteration',
                  yaxis_title='Sharpe Ratio',
                  )
fig.show()

## 6.0 Find Optimal Portfolio Size

In [227]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 50000, "random", 3, 50)

100%|██████████| 50000/50000 [00:29<00:00, 1687.10it/s]


In [228]:
portfolio_sizes = [len(portfolio['tickers']) for portfolio in all_portfolios]
volatility = [np.sqrt(portfolio['variance']) for portfolio in all_portfolios]
returns = [portfolio['return'] for portfolio in all_portfolios]

volatility_by_size = defaultdict(list)
for size, vol, ret in zip(portfolio_sizes, volatility, returns):
    volatility_by_size[size].append((vol, ret))

average_volatility = {size: np.mean([v[0] for v in vols]) for size, vols in volatility_by_size.items()}
average_returns = {size: np.mean([v[1] for v in vols]) for size, vols in volatility_by_size.items()}

sorted_sizes = sorted(average_volatility.keys())
sorted_average_vols = [average_volatility[size] for size in sorted_sizes]
sorted_average_rets = [average_returns[size] for size in sorted_sizes]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=sorted_sizes,
    y=sorted_average_vols,
    mode='lines',
    name='Average Volatility'
))
fig.add_trace(go.Scatter(
    x=sorted_sizes,
    y=sorted_average_rets,
    mode='lines',
    name='Average Returns'
))

fig.update_layout(
    title='Average Volatility and Returns by Portfolio Size',
    xaxis_title='Number of Assets in Portfolio',
    yaxis_title='Average Value',
    xaxis=dict(type='category'),
)

fig.show()
