In [1]:
pip install PyPortfolioOpt

Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.6-py3-none-any.whl.metadata (22 kB)
Collecting ecos<3.0.0,>=2.0.14 (from PyPortfolioOpt)
  Downloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.0 kB)
Downloading pyportfolioopt-1.5.6-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (220 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.1/220.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ecos, PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.6 ecos-2.0.14


In [174]:
import yfinance as yf
import pandas as pd
import numpy as np
import cvxpy as cp
from pypfopt import EfficientFrontier, risk_models, expected_returns, EfficientCVaR

# Define 50 tickers (note: 'FB' replaced by 'META')
tickers = [
    "AAPL", "MSFT", "GOOG", "BRK-B", "JPM", "JNJ",
    "V", "PG", "UNH", "HD", "MA", "BAC", "VZ",
    "ADBE", "CMCSA", "T", "INTC", "CSCO", "PFE", "CRM", "ABT", "TMO",
    "CVX", "MRK", "ORCL", "KO", "ACN", "MCD", "NKE", "WMT", "NEE",
    "IBM", "TXN", "COST", "LIN", "SBUX", "PM", "LOW", "QCOM", "BMY"
]

riskfree = 0.05

# Download historical adjusted close prices (2020-2025)
data = yf.download(tickers, start="2015-01-01", end="2025-01-01")["Close"]
data = data.dropna(axis=1)  # Remove any tickers with missing data

# Split data into training (in-sample) and testing (out-of-sample) periods.
# In-sample: 2020-01-01 to 2023-01-01; Out-of-sample: 2023-01-02 to 2025-01-01
train_data = data.loc["2015-01-01":"2023-01-01"]
test_data = data.loc["2023-01-02":"2025-01-01"]

# Calculate expected returns and covariance matrix from the training data.
mu_train = expected_returns.mean_historical_return(train_data)
S_train = risk_models.sample_cov(train_data)

with open('tickers.txt', 'w') as f:
    for ticker in tickers:
        f.write(ticker + '\n')

[*********************100%***********************]  40 of 40 completed


In [150]:


def optimize_portfolio(method, mu, S, riskfree, objective = 'max_sharpe', reg = 1e-2, expret = 0.1):
    """
    Build an efficient frontier and add a custom objective/constraint
    based on the regularization method.
    """
    ef = EfficientFrontier(mu, S, weight_bounds=(0, 1))

    if method == 'l1':
        # L1 regularisation encourages sparsity.
        l1_reg = reg
        ef.add_objective(lambda w: l1_reg * cp.norm1(w))
    elif method == 'l2':
        # L2 regularisation penalizes large weights.
        l2_reg = reg
        ef.add_objective(lambda w: l2_reg * cp.sum_squares(w))
    elif method == 'vanilla':
        pass
    else:
        raise ValueError("Unknown method. Choose 'l1', 'l2', or 'diversification' or 'vanilla'.")

    if objective == 'max_sharpe':
        weights = ef.max_sharpe(risk_free_rate=riskfree)
        #weights = ef.efficient_return(0.1)
        #cleaned_weights = ef.clean_weights()
        # Get the in-sample portfolio performance (annual return, volatility, Sharpe ratio)
        ret_train, vol_train, sr_train = ef.portfolio_performance(verbose=False, risk_free_rate=riskfree)
        return weights, ret_train, vol_train, sr_train
    
    elif objective == 'optvol':
        weights = ef.efficient_return(expret)
        ret_train, vol_train, sr_train = ef.portfolio_performance(verbose=False, risk_free_rate=riskfree)
        return weights, ret_train, vol_train, sr_train
    
def optimize_cvar(mu, train_data, maxcvar = 0.1, l2_reg = 1e-2):
        ef = EfficientCVaR(mu, train_data, beta=0.95, weight_bounds=(0, 1))
        ef.add_objective(lambda w: l2_reg * cp.sum_squares(w))
        weights = ef.efficient_risk(maxcvar)
        ret_train, vol_train = ef.portfolio_performance(verbose=False)
        return weights, ret_train, vol_train


def compute_out_of_sample_return(weights, test_data):
    """
    Given a weights dictionary and test period price data, compute the realized
    annualized portfolio return.
    """
    # Convert weights dictionary to a Series (for proper alignment)
    weights_series = pd.Series(weights)
    # Compute daily returns for the test period.
    test_returns = test_data.pct_change().dropna()
    # Calculate the portfolio's daily return (aligned by tickers)
    portfolio_daily_returns = (test_returns * weights_series).sum(axis=1)
    # Annualize the return (assume 252 trading days)
    realized_return = (1 + portfolio_daily_returns).prod() ** (252 / len(portfolio_daily_returns)) - 1
    return realized_return


In [180]:

# List of regularization methods to evaluate.
methods = ['vanilla', 'l1', 'l2']
results = {}

print("Evaluating portfolio performance for different regularization methods:\n")
for method in methods:
    mindiff = 1000
    regs = np.logspace(-5, 2, num=100) if method != 'vanilla' else [0]
    for reg in regs:
        weights, exp_return, vol, sr = optimize_portfolio(method, mu_train, S_train, riskfree=riskfree, reg=reg, objective='max_sharpe', expret=0.2)
        realized_return = compute_out_of_sample_return(weights, test_data)
        diff = abs(exp_return - realized_return)
        #print((reg, diff))
        
        if diff < mindiff:
            results[method] = {'expected_return': exp_return,
                            'realized_return': realized_return,
                            'difference': diff,
                            'weights': weights,
                            'reg': reg}
            mindiff = diff

    print(f"Method: {method}")
    print(f"  Optimized Weights: {results[method]["weights"]}")
    print(f"  Expected (in-sample) annual return: {results[method]['expected_return']:.2%}")
    print(f"  Realized (out-of-sample) annual return: {results[method]['realized_return']:.2%}")
    print(f"  Absolute Difference: {mindiff:.2%}")
    print(f"  Regularization parameter: {results[method]['reg']}")

# Determine the method with the smallest difference.
best_method = min(results, key=lambda m: results[m]['difference'])

print(best_method, "is the best method")

Evaluating portfolio performance for different regularization methods:

Method: vanilla
  Optimized Weights: OrderedDict({'AAPL': 0.0271259937472007, 'ABT': 0.0, 'ACN': 0.0, 'ADBE': 0.0, 'BAC': 0.0, 'BMY': 0.0, 'BRK-B': 0.0, 'CMCSA': 0.0, 'COST': 0.1587815985631094, 'CRM': 0.0, 'CSCO': 0.0, 'CVX': 0.0, 'GOOG': 0.0, 'HD': 0.0, 'IBM': 0.0, 'INTC': 0.0, 'JNJ': 0.0, 'JPM': 0.0, 'KO': 0.0, 'LIN': 0.0, 'LOW': 0.0, 'MA': 0.0, 'MCD': 0.0763776881605165, 'MRK': 0.0, 'MSFT': 0.1790315821297693, 'NEE': 0.1361182871065492, 'NKE': 0.0, 'ORCL': 0.0, 'PFE': 0.0, 'PG': 0.0, 'PM': 0.0, 'QCOM': 0.0, 'SBUX': 0.0, 'T': 0.0, 'TMO': 0.0756154848950002, 'TXN': 0.0, 'UNH': 0.3469493653978547, 'V': 0.0, 'VZ': 0.0, 'WMT': 0.0})
  Expected (in-sample) annual return: 21.98%
  Realized (out-of-sample) annual return: 14.63%
  Absolute Difference: 7.35%
  Regularization parameter: 0




Method: l1
  Optimized Weights: OrderedDict({'AAPL': 0.0271165057458006, 'ABT': 2.4185296313e-06, 'ACN': 2.7293657321e-06, 'ADBE': 1.4278498331e-06, 'BAC': 6.8153953987e-06, 'BMY': 3.4689644842e-06, 'BRK-B': 4.107376443e-06, 'CMCSA': 5.5736029469e-06, 'COST': 0.1587488725829986, 'CRM': 4.4569936471e-06, 'CSCO': 4.4370099564e-06, 'CVX': 5.2759685636e-06, 'GOOG': 6.576113224e-07, 'HD': 2.0550539638e-06, 'IBM': 6.0758503519e-06, 'INTC': 8.9715283206e-06, 'JNJ': 2.1064142274e-06, 'JPM': 5.2110724266e-06, 'KO': 2.6499987088e-06, 'LIN': 2.8127560874e-06, 'LOW': 2.8806758927e-06, 'MA': 2.4892572204e-06, 'MCD': 0.076364729676053, 'MRK': 1.4144419052e-06, 'MSFT': 0.1790114599293497, 'NEE': 0.1360930524088249, 'NKE': 3.4175328251e-06, 'ORCL': 4.0999122469e-06, 'PFE': 2.2591232936e-06, 'PG': 2.0435741352e-06, 'PM': 3.407367161e-06, 'QCOM': 6.241816176e-06, 'SBUX': 2.9232255014e-06, 'T': 4.9526209686e-06, 'TMO': 0.0756115703385862, 'TXN': 2.856313792e-06, 'UNH': 0.3469350174874895, 'V': 2.91325120



Method: l2
  Optimized Weights: OrderedDict({'AAPL': 0.0560508377455185, 'ABT': 0.0274297351356488, 'ACN': 0.0358334466231584, 'ADBE': 0.0498674247165601, 'BAC': 0.0150499900918088, 'BMY': 0.0003663104290884, 'BRK-B': 0.0135703380248551, 'CMCSA': 0.0, 'COST': 0.0412855597690989, 'CRM': 0.0165850191510379, 'CSCO': 0.0160921603378941, 'CVX': 0.0171496174993006, 'GOOG': 0.0352475563958725, 'HD': 0.0388887684733322, 'IBM': 0.0, 'INTC': 0.0, 'JNJ': 0.0142795075407111, 'JPM': 0.0244530729755254, 'KO': 0.0112060881369027, 'LIN': 0.0290854380742478, 'LOW': 0.0351713307773884, 'MA': 0.0458265635007449, 'MCD': 0.0367436796192238, 'MRK': 0.0238876688019492, 'MSFT': 0.0612244468916689, 'NEE': 0.041144728490563, 'NKE': 0.0247183126217168, 'ORCL': 0.0140067300119599, 'PFE': 0.0183735917932423, 'PG': 0.0146155478742547, 'PM': 0.0095443328424702, 'QCOM': 0.0093558595187892, 'SBUX': 0.0270486153951383, 'T': 0.0, 'TMO': 0.0475746766039808, 'TXN': 0.0406463738087842, 'UNH': 0.062008736301274, 'V': 0.0341



In [169]:
weights, exp_return, cvar = optimize_cvar(mu_train, train_data, maxcvar = 0.03, l2_reg=0.2)

In [183]:
regs = np.logspace(-1, 2, num=100)
mindiff = 1000

for reg in regs:
    print(reg)
    weights, exp_return, cvar = optimize_cvar(mu_train, train_data, maxcvar = 0.03, l2_reg=reg)
    realized_return = compute_out_of_sample_return(weights, test_data)
    diff = abs(exp_return - realized_return)
    #print((reg, diff))
    
    if diff < mindiff:
        results = {'expected_return': exp_return,
                        'realized_return': realized_return,
                        'difference': diff,
                        'weights': weights,
                        'reg': reg}
        mindiff = diff

0.1
0.10722672220103231
0.11497569953977356
0.12328467394420663
0.1321941148466029
0.1417474162926805
0.1519911082952934
0.16297508346206444
0.17475284000076838
0.1873817422860384
0.20092330025650468
0.2154434690031884
0.23101297000831597
0.24770763559917108
0.26560877829466867
0.2848035868435802
0.30538555088334157
0.32745491628777285
0.3511191734215131
0.3764935806792468
0.40370172585965547
0.43287612810830584
0.464158883361278
0.49770235643321115
0.533669923120631
0.5722367659350217
0.6135907273413173
0.6579332246575681
0.7054802310718644
0.7564633275546289
0.8111308307896873
0.8697490026177834
0.9326033468832199
1.0
1.072267222010323
1.1497569953977356
1.232846739442066
1.3219411484660293
1.4174741629268055
1.5199110829529339
1.6297508346206444
1.7475284000076838
1.8738174228603839
2.009233002565047
2.1544346900318843
2.31012970008316
2.4770763559917115
2.656087782946687
2.848035868435802
3.0538555088334154
3.2745491628777286
3.511191734215131
3.7649358067924674
4.037017258596556
4

In [171]:
realized_return = compute_out_of_sample_return(weights, test_data)
diff = abs(exp_return - realized_return)

In [185]:
print(f"  Optimized Weights: {results['weights']}")
print(f"  Expected (in-sample) annual return: {results['expected_return']:.2%}")
print(f"  Realized (out-of-sample) annual return: {results['realized_return']:.2%}")
print(f"  Absolute Difference: {mindiff:.2%}\n")
print(f"  Regularization parameter: {results['reg']}")

  Optimized Weights: OrderedDict({'AAPL': 0.0575438626683218, 'ABT': 0.0273349566958778, 'ACN': 0.0362708299050119, 'ADBE': 0.0511628368420917, 'BAC': 0.0145468160612708, 'BMY': 0.0, 'BRK-B': 0.0127369112589356, 'CMCSA': 0.0, 'COST': 0.0416838739362528, 'CRM': 0.0162606853849242, 'CSCO': 0.0155258815879014, 'CVX': 0.0165768999790522, 'GOOG': 0.0356823983306244, 'HD': 0.039404346126125, 'IBM': 0.0, 'INTC': 0.0, 'JNJ': 0.0132695103378669, 'JPM': 0.024311440391501, 'KO': 0.0100701762630397, 'LIN': 0.0290552080337846, 'LOW': 0.0355787817309863, 'MA': 0.0468525930996757, 'MCD': 0.0369182840484091, 'MRK': 0.0233602498818598, 'MSFT': 0.0629975694156902, 'NEE': 0.041501902392496, 'NKE': 0.0245853165139718, 'ORCL': 0.0132754221077498, 'PFE': 0.0176006723647655, 'PG': 0.0136328174021531, 'PM': 0.0083685804588123, 'QCOM': 0.0086445558192815, 'SBUX': 0.0270131938813797, 'T': 0.0, 'TMO': 0.0484413652120223, 'TXN': 0.0413637888285007, 'UNH': 0.0635614510059261, 'V': 0.0345467758736504, 'VZ': 0.0, 'W