In [1]:
pip install --upgrade yfinance==0.2.54



In [3]:
pip install PyPortfolioOpt

Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.6-py3-none-any.whl.metadata (22 kB)
Collecting ecos<3.0.0,>=2.0.14 (from PyPortfolioOpt)
  Downloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.0 kB)
Downloading pyportfolioopt-1.5.6-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (220 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.1/220.1 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ecos, PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.6 ecos-2.0.14


In [16]:
import yfinance as yf
import pandas as pd
import numpy as np
import cvxpy as cp
from pypfopt import EfficientFrontier, risk_models, expected_returns, EfficientCVaR, black_litterman
from pypfopt.black_litterman import BlackLittermanModel

In [49]:
# Define 50 tickers (note: 'FB' replaced by 'META')
tickers = [
    "AAPL", "MSFT", "GOOG", "BRK-B", "JPM", "JNJ"
]

riskfree = 0.05

# Download historical adjusted close prices (2020-2025)
data = yf.download(tickers, start="2015-01-01", end="2025-01-01")["Close"]
data = data.dropna(axis=1)  # Remove any tickers with missing data

# Split data into training (in-sample) and testing (out-of-sample) periods.
# In-sample: 2020-01-01 to 2023-01-01; Out-of-sample: 2023-01-02 to 2025-01-01
train_data = data.loc["2015-01-01":"2023-01-01"]
test_data = data.loc["2023-01-02":"2025-01-01"]

[*********************100%***********************]  6 of 6 completed


In [54]:
# Calculate expected returns and covariance matrix from the training data.
S_train = risk_models.sample_cov(train_data)
viewdict = {"AAPL": 0.1, "MSFT": 0.1, "GOOG": 0.1, "BRK-B": 0.1, "JPM": 0.1, "JNJ": 0.1}
bl = BlackLittermanModel(S_train, absolute_views=viewdict) # Create BlackLittermanModel instance
mu_train = bl.bl_returns() # Get the Black-Litterman expected returns as a numpy array
print(mu_train)

Ticker
AAPL     0.085806
BRK-B    0.073354
GOOG     0.082228
JNJ      0.064448
JPM      0.086699
MSFT     0.087517
dtype: float64


In [55]:
with open('tickers.txt', 'w') as f:
    for ticker in tickers:
        f.write(ticker + '\n')

In [56]:


def optimize_portfolio(method, mu, S, riskfree, objective = 'max_sharpe', reg = 1e-2, expret = 0.1):
    """
    Build an efficient frontier and add a custom objective/constraint
    based on the regularization method.
    """
    ef = EfficientFrontier(mu, S, weight_bounds=(0, 1))

    if method == 'l1':
        # L1 regularisation encourages sparsity.
        l1_reg = reg
        ef.add_objective(lambda w: l1_reg * cp.norm1(w))
    elif method == 'l2':
        # L2 regularisation penalizes large weights.
        l2_reg = reg
        ef.add_objective(lambda w: l2_reg * cp.sum_squares(w))
    elif method == 'vanilla':
        pass
    else:
        raise ValueError("Unknown method. Choose 'l1', 'l2', or 'diversification' or 'vanilla'.")

    if objective == 'max_sharpe':
        weights = ef.max_sharpe(risk_free_rate=riskfree)
        #weights = ef.efficient_return(0.1)
        #cleaned_weights = ef.clean_weights()
        # Get the in-sample portfolio performance (annual return, volatility, Sharpe ratio)
        ret_train, vol_train, sr_train = ef.portfolio_performance(verbose=False, risk_free_rate=riskfree)
        return weights, ret_train, vol_train, sr_train

    elif objective == 'optvol':
        weights = ef.efficient_return(expret)
        ret_train, vol_train, sr_train = ef.portfolio_performance(verbose=False, risk_free_rate=riskfree)
        return weights, ret_train, vol_train, sr_train

def optimize_cvar(mu, train_data, maxcvar = 0.1, l2_reg = 1e-2):
        ef = EfficientCVaR(mu, train_data, beta=0.95, weight_bounds=(0, 1))
        ef.add_objective(lambda w: l2_reg * cp.sum_squares(w))
        weights = ef.efficient_risk(maxcvar)
        ret_train, vol_train = ef.portfolio_performance(verbose=False)
        return weights, ret_train, vol_train


def compute_out_of_sample_return(weights, test_data):
    """
    Given a weights dictionary and test period price data, compute the realized
    annualized portfolio return.
    """
    # Convert weights dictionary to a Series (for proper alignment)
    weights_series = pd.Series(weights)
    # Compute daily returns for the test period.
    test_returns = test_data.pct_change().dropna()
    # Calculate the portfolio's daily return (aligned by tickers)
    portfolio_daily_returns = (test_returns * weights_series).sum(axis=1)
    # Annualize the return (assume 252 trading days)
    realized_return = (1 + portfolio_daily_returns).prod() ** (252 / len(portfolio_daily_returns)) - 1
    return realized_return


In [57]:
import warnings
warnings.filterwarnings('ignore')
# List of regularization methods to evaluate.
methods = ['vanilla', 'l1', 'l2']
results = {}

print("Evaluating portfolio performance for different regularization methods:\n")
for method in methods:
    mindiff = 1000
    regs = np.logspace(-5, 2, num=100) if method != 'vanilla' else [0]
    for reg in regs:
        weights, exp_return, vol, sr = optimize_portfolio(method, mu_train, S_train, riskfree=riskfree, reg=reg, objective='max_sharpe', expret=0.2)
        realized_return = compute_out_of_sample_return(weights, test_data)
        diff = abs(exp_return - realized_return)
        #print((reg, diff))

        if diff < mindiff:
            results[method] = {'expected_return': exp_return,
                            'realized_return': realized_return,
                            'difference': diff,
                            'weights': weights,
                            'reg': reg}
            mindiff = diff

    print(f"Method: {method}")
    print(f"  Optimized Weights: {results[method]['weights']}")
    print(f"  Expected (in-sample) annual return: {results[method]['expected_return']:.2%}")
    print(f"  Realized (out-of-sample) annual return: {results[method]['realized_return']:.2%}")
    print(f"  Absolute Difference: {mindiff:.2%}")
    print(f"  Regularization parameter: {results[method]['reg']}")

# Determine the method with the smallest difference.
best_method = min(results, key=lambda m: results[m]['difference'])

print(best_method, "is the best method")
print(f"  Absolute Difference: {mindiff:.2%}")
print(f"  Regularization parameter: {results[method]['reg']}")

# Determine the method with the smallest difference.
best_method = min(results, key=lambda m: results[m]['difference'])

print(best_method, "is the best method")

Evaluating portfolio performance for different regularization methods:

Method: vanilla
  Optimized Weights: OrderedDict([('AAPL', 0.1665403721958118), ('BRK-B', 0.0), ('GOOG', 0.0403011680811109), ('JNJ', 0.0), ('JPM', 0.4246334610279978), ('MSFT', 0.3685249986950796)])
  Expected (in-sample) annual return: 8.67%
  Realized (out-of-sample) annual return: 38.93%
  Absolute Difference: 30.26%
  Regularization parameter: 0
Method: l1
  Optimized Weights: OrderedDict([('AAPL', 9.883265273e-07), ('BRK-B', 1.870322225e-07), ('GOOG', 7.804993124e-07), ('JNJ', -3.478110573e-07), ('JPM', 9.962381637e-07), ('MSFT', 0.9999973991138352)])
  Expected (in-sample) annual return: 8.75%
  Realized (out-of-sample) annual return: 33.96%
  Absolute Difference: 25.20%
  Regularization parameter: 100.0
Method: l2
  Optimized Weights: OrderedDict([('AAPL', 0.1988495599072251), ('BRK-B', 0.1296963552910806), ('GOOG', 0.1789561504458888), ('JNJ', 0.080222488285034), ('JPM', 0.2038936070202921), ('MSFT', 0.208

In [58]:
weights_vanilla, exp_return_vanilla, cvar_vanilla = optimize_cvar(mu_train, train_data, maxcvar = 0.03, l2_reg=0)
realized_return_vanilla = compute_out_of_sample_return(weights_vanilla, test_data)
diff_vanilla = abs(exp_return_vanilla - realized_return_vanilla)

In [59]:
regs = np.logspace(-1, 2, num=100)
mindiff = 1000

for reg in regs:
    weights, exp_return, cvar = optimize_cvar(mu_train, train_data, maxcvar = 0.03, l2_reg=reg)
    realized_return = compute_out_of_sample_return(weights, test_data)
    diff = abs(exp_return - realized_return)
    #print((reg, diff))

    if diff < mindiff:
        results = {'expected_return': exp_return,
                        'realized_return': realized_return,
                        'difference': diff,
                        'weights': weights,
                        'reg': reg}
        mindiff = diff

In [60]:
print(f"  Optimized Weights: {results['weights']}")
print(f"  Expected (in-sample) annual return: {results['expected_return']:.2%}")
print(f"  Realized (out-of-sample) annual return: {results['realized_return']:.2%}")
print(f"  Regularization parameter: {results['reg']}")
print(f"  Absolute Difference: {mindiff:.2%}\n")
print(f"  Absolute Difference (Vanilla method): {diff_vanilla:.2%}")

  Optimized Weights: OrderedDict([('AAPL', 0.1666956533422822), ('BRK-B', 0.1666333959147198), ('GOOG', 0.166677762689284), ('JNJ', 0.1665888614855804), ('JPM', 0.1667001192144062), ('MSFT', 0.1667042073537274)])
  Expected (in-sample) annual return: 8.00%
  Realized (out-of-sample) annual return: 29.39%
  Regularization parameter: 100.0
  Absolute Difference: 21.39%

  Absolute Difference (Vanilla method): 25.23%
