# First portfolio optimization using Mean-Variance method

In [1]:
import yfinance as yf
import pandas as pd 
import numpy as np
import datetime

In [2]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

Now, let’s pull stocks for the  industries:

Healthcare: Moderna (MRNA), Pfizer (PFE), Johnson & Johnson (JNJ)

Tech: Google (GOOGL), Apple (AAPL)

Retail: Costco (COST), Walmart (WMT),  Kroger Co (KR)

Finance: JPMorgan Chase & Co (JPM), Bank of America (BAC)

In [3]:
tickers = ['MRNA', 'PFE', 'JNJ', 'GOOGL', 'AAPL', 'COST', 'WMT', 'KR', 'JPM', 'BAC']

Define helper function for downloading stocks data; set start and end date for retrieving data

In [4]:
start = datetime.datetime(2024,11,15)
end = datetime.datetime(2025,4,4)

In [5]:
def get_stocks(tickers, start, end):
    df = yf.download(tickers, start, end)[["Close"]].reset_index()
    df.columns = [col[1] if col[0] == 'Close' else 'Date' for col in df.columns]
    return df

Download stocks, save it to csv, read it again, set a column format for 'Date', change it to index column

In [6]:
portfolio = get_stocks(tickers, start=start, end=end)

portfolio['Date'] = pd.to_datetime(portfolio['Date'])
portfolio.set_index('Date', inplace=True)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  10 of 10 completed


Now, let’s calculate the covariance matrix and store the calculated returns in variables S and mu, respectively:

In [7]:
mu = mean_historical_return(portfolio)
S = CovarianceShrinkage(portfolio).ledoit_wolf()

 The Sharpe ratio is the ratio between returns and risk. The lower the risk and the higher the returns, the higher the Sharpe ratio.

The algorithm looks for the maximum Sharpe ratio, which translates to the portfolio with the highest return and lowest risk. Ultimately, the higher the Sharpe ratio, the better the performance of the portfolio. 

In [8]:
ef = EfficientFrontier(mu, S)
weights = ef.max_sharpe()

cleaned_weights = ef.clean_weights()
print(dict(cleaned_weights))

{'AAPL': 0.0, 'BAC': 0.0, 'COST': 0.19311, 'GOOGL': 0.0, 'JNJ': 0.04043, 'JPM': 0.0, 'KR': 0.76646, 'MRNA': 0.0, 'PFE': 0.0, 'WMT': 0.0}


In [9]:
ef.portfolio_performance(verbose=True)

Expected annual return: 60.6%
Annual volatility: 21.0%
Sharpe Ratio: 2.79


(0.6060398995048926, 0.21002831300023747, 2.7902899905891734)

Finally, let’s convert the weights into actual allocations values (i.e., how many of each stock to buy). For our allocation, let’s consider an investment amount of $100,000:

In [10]:
latest_prices = get_latest_prices(portfolio)
print(latest_prices)

AAPL     203.190002
BAC       37.220001
COST     967.080017
GOOGL    150.720001
JNJ      159.820007
JPM      227.290009
KR        70.739998
MRNA      25.730000
PFE       24.290001
WMT       87.260002
Name: 2025-04-03 00:00:00, dtype: float64


In [11]:
da = DiscreteAllocation(weights, latest_prices, total_portfolio_value=100000)

allocation, leftover = da.greedy_portfolio()
print("Discrete allocation:", allocation)
print("Funds remaining: ${:.2f}".format(leftover))

Discrete allocation: {'KR': 1083, 'COST': 20, 'JNJ': 25}
Funds remaining: $51.48


Of course, this return is inflated and is not likely to hold up in the future. 

Mean variance optimization doesn’t perform very well since it makes many simplifying assumptions, such as returns being normally distributed and the need for an invertible covariance matrix. Fortunately, method like Hierarchical Risk Parity address these limitations. 

# First portfolio optimization using Hierarchical Risk Parity method

In [12]:
from pypfopt import HRPOpt

We then need to calculate the returns:

In [13]:
returns = portfolio.pct_change().dropna()
returns.head()

Unnamed: 0_level_0,AAPL,BAC,COST,GOOGL,JNJ,JPM,KR,MRNA,PFE,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-11-18,0.013422,-0.000642,0.013714,0.016291,0.005,-0.001141,0.000689,0.072185,0.002419,-0.002018
2024-11-19,0.00114,-0.006635,0.011571,0.016087,-0.011436,-0.007917,0.000689,-0.056188,0.009654,0.029971
2024-11-20,0.003154,-0.007541,-0.002225,-0.012014,0.000719,-0.009503,-0.008434,-0.009386,-0.006375,0.006698
2024-11-21,-0.002096,0.008684,0.029706,-0.047449,0.01561,0.01653,0.016837,0.035463,0.007618,0.013879
2024-11-22,0.005908,0.011623,0.008748,-0.017121,-0.002122,0.015485,0.010925,0.074771,0.020692,0.023193


Then run the optimization algorithm to get the weights

In [14]:
hrp = HRPOpt(returns)
hrp_weights = hrp.optimize()

In [15]:
hrp.portfolio_performance(verbose=True)
print(dict(hrp_weights))

Expected annual return: 5.6%
Annual volatility: 12.2%
Sharpe Ratio: 0.29
{'AAPL': 0.05228331083949059, 'BAC': 0.07378326510395987, 'COST': 0.11088342480841161, 'GOOGL': 0.10011528266316701, 'JNJ': 0.14761060853326138, 'JPM': 0.10114497633458923, 'KR': 0.18145181296267565, 'MRNA': 0.017056536700147982, 'PFE': 0.15225643926607305, 'WMT': 0.0634143427882236}


This result is much more reasonable and more likely to hold up in the future since HRP is not as sensitive to outliers as mean variance optimization is. 

Finally, let’s calculate the discrete allocation using our weights:

In [16]:
da_hrp = DiscreteAllocation(hrp_weights, latest_prices, total_portfolio_value=100000)

allocation, leftover = da_hrp.greedy_portfolio()
print("Discrete allocation (HRP):", allocation)
print("Funds remaining (HRP): ${:.2f}".format(leftover))

Discrete allocation (HRP): {'KR': 257, 'PFE': 627, 'JNJ': 93, 'COST': 11, 'JPM': 45, 'GOOGL': 67, 'BAC': 198, 'WMT': 73, 'AAPL': 26, 'MRNA': 67}
Funds remaining (HRP): $16.17


While the performance decreased, we can be more confident that this model will perform just as well when we refresh our data.