# Markowitz Efficient Frontier

## 1. Imports

In [127]:
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
from tqdm import tqdm
import scipy.optimize

from fetchData import fetch_raw_data_yf, get_matrices, getNasdaqStocks
from MonteCarloRBA import MonteCarloRBA
from portfolioFunction import maximize_sharpe, create_correlation_matrix
from LearningRBA import find_best_asset_to_remove


## 2. Fetch Data

### Get all Nasdaq Stocks

In [128]:
assets= [
    "AAPL",  # Apple Inc.
    "MSFT",  # Microsoft Corporation
    "AMZN",  # Amazon.com Inc.
    "GOOGL", # Alphabet Inc. (Google) Class A
    "GOOG",  # Alphabet Inc. (Google) Class C
    "META",    # Meta Platforms Inc (formerly Facebook)
    "TSLA",  # Tesla Inc
    "UA", # Berkshire Hathaway Inc. Class B
    "JPM",   # JPMorgan Chase & Co.
    "V",     # Visa Inc.
    "JNJ",   # Johnson & Johnson
    "WMT",   # Walmart Inc.
    "PG",    # Procter & Gamble Co.
    "UNH",   # UnitedHealth Group Inc.
    "MA",    # Mastercard Inc.
    "NVDA",  # NVIDIA Corporation
    "HD",    # Home Depot Inc.
    "BAC",   # Bank of America Corp
    "DIS",   # Walt Disney Co
    "PYPL",  # PayPal Holdings
    "VZ",    # Verizon Communications Inc.
    "ADBE",  # Adobe Inc.
    "CMCSA", # Comcast Corporation
    "NFLX",  # Netflix Inc.
    "KO",    # Coca-Cola Co
    "NKE",   # NIKE Inc.
    "PFE",   # Pfizer Inc.
    "MRK",   # Merck & Co., Inc.
    "PEP",   # PepsiCo, Inc.
    "T",     # AT&T Inc.
    "ABT",   # Abbott Laboratories
    "CRM",   # Salesforce.com Inc.
    "ORCL",  # Oracle Corporation
    "ABBV",  # AbbVie Inc.
    "CSCO",  # Cisco Systems, Inc.
    "INTC",  # Intel Corporation
    "TMO",   # Thermo Fisher Scientific Inc.
    "XOM",   # Exxon Mobil Corporation
    "ACN",   # Accenture plc
    "LLY",   # Eli Lilly and Company
    "COST",  # Costco Wholesale Corporation
    "MCD",   # McDonald's Corp
    "DHR",   # Danaher Corporation
    "MDT",   # Medtronic plc
    "NEE",   # NextEra Energy, Inc.
    "BMY",   # Bristol-Myers Squibb Company
    "QCOM",  # Qualcomm Inc
    "CVX",   # Chevron Corporation
    "WFC",   # Wells Fargo & Co
    "LMT",    # Lockheed Martin Corporation
    "GS",   # Goldman Sachs Group, Inc.
    "MS",   # Morgan Stanley
    "IBM",  # International Business Machines Corporation
    "GE",   # General Electric Company
    "F",    # Ford Motor Company
    "GM",   # General Motors Company
    "UBER", # Uber Technologies, Inc.
    "LYFT", # Lyft, Inc.
    "SNAP", # Snap Inc.
    "TWTR", # Twitter, Inc.
    "SPOT", # Spotify Technology S.A.
    "AMD",  # Advanced Micro Devices, Inc.
    "TXN",  # Texas Instruments Incorporated
    "BABA", # Alibaba Group Holding Limited
    "SAP",  # SAP SE
    "HON",  # Honeywell International Inc.
    "BA",   # Boeing Company
    "RTX",  # Raytheon Technologies Corporation
    "CAT",  # Caterpillar Inc.
    "DE",   # Deere & Company
    "MMM",  # 3M Company
    "DUK",  # Duke Energy Corporation
    "SO",   # Southern Company
    "EXC",  # Exelon Corporation
    "NEE",  # NextEra Energy, Inc.
    "AEP",  # American Electric Power Company, Inc.
    "SRE",  # Sempra Energy
    "ETN",  # Eaton Corporation plc
    "EMR",  # Emerson Electric Co.
    "SYY",  # Sysco Corporation
    "KR",   # Kroger Co.
    "GIS",  # General Mills, Inc.
    "K",    # Kellogg Company
    "CPB",  # Campbell Soup Company
    "MO",   # Altria Group, Inc.
    "PM",   # Philip Morris International Inc.
    "BTI",  # British American Tobacco plc
    "RDY",  # Dr. Reddy's Laboratories Ltd.
    "GILD", # Gilead Sciences, Inc.
    "BIIB", # Biogen Inc.
    "CELG", # Celgene Corporation
    "AMGN", # Amgen Inc.
    "SYK",  # Stryker Corporation
    "BSX",  # Boston Scientific Corporation
    "ISRG", # Intuitive Surgical, Inc.
    "ZBH",  # Zimmer Biomet Holdings, Inc.
    "EW",   # Edwards Lifesciences Corporation
    "RMD",  # ResMed Inc.
    "VRTX", # Vertex Pharmaceuticals Incorporated
    "REGN",  # Regeneron Pharmaceuticals, Inc.
]

len(assets)

100

In [129]:
raw_data, asset_errors, max_combination= fetch_raw_data_yf(assets)

[*********************100%***********************]  99 of 99 completed

5 Failed downloads:
['UBER', 'SPOT', 'LYFT']: YFPricesMissingError('possibly delisted; no price data found  (1d 2015-01-01 -> 2018-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1420088400, endDate = 1514782800")')
['CELG', 'TWTR']: YFTzMissingError('possibly delisted; no timezone found')


Omitted assets ( 5 ):  ['SPOT', 'CELG', 'UBER', 'TWTR', 'LYFT']
Time to fetch data: 3.16 seconds


## 3. Mean, Volatility and Covariance

In [130]:
names, cov, annualized_returns = get_matrices(raw_data, max_combination, None)[0]
correlation_matrix = create_correlation_matrix(cov)
volatility = np.sqrt(np.diag(cov))

risk_free_rate=0 
sharpe_ratios = (annualized_returns - risk_free_rate) / volatility

In [131]:
hover_texts = [
    f"{ticker} <br>Volatility: {vol:.2f} <br>Returns: {ret:.2%} <br>Sharpe Ratio: {sr:.2f}"
    for ticker, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
]

fig = go.Figure(data=go.Scatter(
    x=volatility, 
    y=annualized_returns, 
    mode='markers',
    hoverinfo='text',
    hovertext=hover_texts,
    marker=dict(color=sharpe_ratios, colorscale = 'RdBu', size=6, line=dict(width=1), colorbar=dict(title="Sharpe<br>Ratio")
    )
))

fig.update_layout(
    title='Markowitz Mean Varience Model',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Returns',
)

fig.show()


## 4.0 Monte Carlo Method

In [132]:
all_portfolios, dominant_portfolios = MonteCarloRBA(names, cov, annualized_returns, 1000)

100%|██████████| 1000/1000 [00:02<00:00, 382.58it/s]


In [133]:
print (len(dominant_portfolios) ,len(all_portfolios))

55 1000


In [134]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in all_portfolios],
    y=[p["return"] for p in all_portfolios],
    mode='markers',
    marker=dict(
        color=[p["return"] / (p["variance"]**0.5) for p in all_portfolios],
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.2%}<br>Volatility: {p['variance']**0.5:.2f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.2f}" for i in range(len(p['tickers']))])
        for p in all_portfolios
    ]
))

fig1.update_layout(
    xaxis=dict(title='Volatility (Standard Deviation)'),
    yaxis=dict(title='Annualised Returns'),
    title='Sample of Random Portfolios'
)

fig1.show()

In [135]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(
    x=[p["variance"]**0.5 for p in dominant_portfolios],  # Convert variance to volatility
    y=[p["return"] for p in dominant_portfolios],
    mode='markers',
    marker=dict(
        color=[p["return"] / (p["variance"]**0.5) for p in dominant_portfolios],  # Sharpe Ratio
        showscale=True,
        size=7,
        line=dict(width=1),
        colorscale="RdBu",
        colorbar=dict(title="Sharpe<br>Ratio")
    ),
    hoverinfo='text',
    text=[
        f"Return: {p['return']:.2%}<br>Volatility: {p['variance']**0.5:.2f}<br>" +
        "<br>".join([f"{p['tickers'][i]}: Weight={p['weights'][i]:.2f}" for i in range(len(p['tickers']))])
        for p in dominant_portfolios
    ],
    name="Portfolios"
))


fig2.add_trace(go.Scatter(
    x=volatility, 
    y=annualized_returns,
    mode='markers',
    hoverinfo='text',
    hovertext=[
        f"{name} <br>Volatility: {vol:.2f} <br>Returns: {ret:.2%} <br>Sharpe Ratio: {sr:.2f}"
        for name, vol, ret, sr in zip(names, volatility, annualized_returns, sharpe_ratios)
    ],
    marker=dict(
        color='brown',
        size=5,
        symbol='triangle-up',  # Sets the marker shape to a triangle
        line=dict(width=1)
    ),
    name="Individual Assets"
))

fig2.update_layout(
    title='Sample of Random Portfolios',
    xaxis_title='Volatility (Standard Deviation)',
    yaxis_title='Annualized Return',
    legend=dict(y=5
    )
)


fig2.show()

## 5.0 ML Method

In [143]:
def MLRBA(ticker, covariances, returns, num_iterations=10000, risk_free_rate = 0):
    num_assets = 5
    base_portfolio = np.random.choice(list(ticker), num_assets, replace=False)
    highest_sharpe = -np.inf
    sharpe_ratios = []
    #base_portfolio = list(ticker)[:num_assets]

    # Asset calculations
    curr_ret = returns.loc[base_portfolio].values
    curr_var = covariances.loc[base_portfolio, base_portfolio].values
    asset_weights = maximize_sharpe(curr_ret, curr_var)
    # Portfolio Calculations
    curr_portfolio_returns = np.dot(asset_weights, curr_ret)
    curr_portfolio_var = np.dot(asset_weights, curr_var @ asset_weights)

    highest_sharpe = (curr_portfolio_returns - risk_free_rate) / np.sqrt(curr_portfolio_var)
    sharpe_ratios.append(highest_sharpe)
    print(sharpe_ratios)
    print(base_portfolio)

    tested_assets = set()

    for _ in tqdm(range(num_iterations)):
        asset_to_remove = find_best_asset_to_remove(base_portfolio, curr_var, curr_ret)
        '''most_correlated_asset, _, _ = find_correlation_matrix(portfolio, curr_variances)'''

        #print(base_portfolio)
        #print(asset_to_remove)
        portfolio = [str(asset) for asset in base_portfolio if asset != asset_to_remove]
        #print(portfolio)

        ranked_assets = find_asset_to_add(portfolio, ticker, covariances, returns)         # Find the next best asset to add to the portfolio
        asset_to_add = ranked_assets.index[0]

        #print(asset_to_add)
        for asset in ranked_assets.index:
            if asset not in tested_assets:
                asset_to_add = asset
                break
        
        portfolio.append(asset_to_add)
        tested_assets.add(asset_to_add)
        #print(portfolio)

        # Substitute in and measure portfolio performance based on sharpe ratio
        new_returns = returns.loc[portfolio].values
        new_var = covariances.loc[portfolio, portfolio].values
        asset_weights = maximize_sharpe(new_returns, new_var)

        # Portfolio Calculations
        new_portfolio_returns = np.dot(asset_weights, new_returns)
        new_portfolio_var = np.dot(asset_weights, new_var @ asset_weights)
        new_sharpe_ratio = (new_portfolio_returns - risk_free_rate) / np.sqrt(new_portfolio_var)

        if new_sharpe_ratio > highest_sharpe:
            sharpe_ratios.append(new_sharpe_ratio)
            highest_sharpe = new_sharpe_ratio
            base_portfolio = portfolio
            tested_assets.clear()

        # If Sharpe ratio was worse, then move on to the next least correlated asset
        # If Sharpe ratio is better, set as new base portfolio, and repeat the process for num_iterations times

    return portfolio, highest_sharpe, sharpe_ratios

def find_asset_to_add(portfolio_assets, all_assets, all_covariance, all_returns, return_weight=0.2, corr_weight=0.8):
    remaining_assets = [asset for asset in all_assets if asset not in portfolio_assets]
    
    corr_matrix = create_correlation_matrix(all_covariance)
    avg_corrs = corr_matrix.loc[remaining_assets, portfolio_assets].mean(axis=1)
    
    norm_corr = (avg_corrs - avg_corrs.min()) / (avg_corrs.max() - avg_corrs.min())
    norm_returns = (all_returns.loc[remaining_assets] - all_returns.min()) / (all_returns.max() - all_returns.min())

    combined_score = corr_weight * norm_corr + return_weight * norm_returns
    
    ranked_assets = combined_score.sort_values(ascending=False)
    
    return ranked_assets

portfolio, highest_sharpe, sharpe_ratios = MLRBA(names, cov, annualized_returns, 1000)
portfolio, highest_sharpe, sharpe_ratios


[np.float64(1.1691399666337663)]
['SNAP_Close' 'DHR_Close' 'BAC_Close' 'LLY_Close' 'BSX_Close']


100%|██████████| 1000/1000 [00:04<00:00, 239.27it/s]


(['BA_Close', 'PYPL_Close', 'META_Close', 'V_Close', 'MA_Close'],
 np.float64(4.764533885870018),
 [np.float64(1.1691399666337663),
  np.float64(3.1304220401255507),
  np.float64(3.231212438335536),
  np.float64(3.487978961991482),
  np.float64(3.53723818299531),
  np.float64(3.80496988182076),
  np.float64(3.8049698818236215),
  np.float64(3.8273143646862353),
  np.float64(3.827314364686705),
  np.float64(3.9098176259561455),
  np.float64(3.9098176259584005),
  np.float64(4.068926151957772),
  np.float64(4.068926151972387),
  np.float64(4.06892615197337),
  np.float64(4.08690062624838),
  np.float64(4.568512475185374),
  np.float64(4.7119297920020164),
  np.float64(4.764533695522531),
  np.float64(4.764533885869194),
  np.float64(4.764533885870018)])