In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import cvxpy as cp
from pypfopt import EfficientFrontier, risk_models, expected_returns, EfficientCVaR

# Define 50 tickers (note: 'FB' replaced by 'META')
tickers = [
    "AAPL", "MSFT", "GOOG", "BRK-B", "JPM", "JNJ",
    "V", "PG", "UNH", "HD", "MA", "BAC", "VZ",
    "ADBE", "CMCSA", "T", "INTC", "CSCO", "PFE", "CRM", "ABT", "TMO",
    "CVX", "MRK", "ORCL", "KO", "ACN", "MCD", "NKE", "WMT", "NEE",
    "IBM", "TXN", "COST", "LIN", "SBUX", "PM", "LOW", "QCOM", "BMY"
]

riskfree = 0.05

# Download historical adjusted close prices (2020-2025)
data = yf.download(tickers, start="2015-01-01", end="2025-01-01")["Close"]
data = data.dropna(axis=1)  # Remove any tickers with missing data

# Split data into training (in-sample) and testing (out-of-sample) periods.
# In-sample: 2020-01-01 to 2023-01-01; Out-of-sample: 2023-01-02 to 2025-01-01
train_data = data.loc["2015-01-01":"2023-01-01"]
test_data = data.loc["2023-01-02":"2025-01-01"]

# Calculate expected returns and covariance matrix from the training data.
mu_train = expected_returns.mean_historical_return(train_data)
S_train = risk_models.sample_cov(train_data)

with open('tickers.txt', 'w') as f:
    for ticker in tickers:
        f.write(ticker + '\n')

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  40 of 40 completed


In [2]:


def optimize_portfolio(method, mu, S, riskfree, objective = 'max_sharpe', reg = 1e-2, expret = 0.1):
    """
    Build an efficient frontier and add a custom objective/constraint
    based on the regularization method.
    """
    ef = EfficientFrontier(mu, S, weight_bounds=(0, 1))

    if method == 'l1':
        # L1 regularisation encourages sparsity.
        l1_reg = reg
        ef.add_objective(lambda w: l1_reg * cp.norm1(w))
    elif method == 'l2':
        # L2 regularisation penalizes large weights.
        l2_reg = reg
        ef.add_objective(lambda w: l2_reg * cp.sum_squares(w))
    elif method == 'vanilla':
        pass
    else:
        raise ValueError("Unknown method. Choose 'l1', 'l2', or 'diversification' or 'vanilla'.")

    if objective == 'max_sharpe':
        weights = ef.max_sharpe(risk_free_rate=riskfree)
        #weights = ef.efficient_return(0.1)
        #cleaned_weights = ef.clean_weights()
        # Get the in-sample portfolio performance (annual return, volatility, Sharpe ratio)
        ret_train, vol_train, sr_train = ef.portfolio_performance(verbose=False, risk_free_rate=riskfree)
        return weights, ret_train, vol_train, sr_train
    
    elif objective == 'optvol':
        weights = ef.efficient_return(expret)
        ret_train, vol_train, sr_train = ef.portfolio_performance(verbose=False, risk_free_rate=riskfree)
        return weights, ret_train, vol_train, sr_train
    
def optimize_cvar(mu, train_data, maxcvar = 0.1, l2_reg = 1e-2):
        ef = EfficientCVaR(mu, train_data, beta=0.95, weight_bounds=(0, 1))
        ef.add_objective(lambda w: l2_reg * cp.sum_squares(w))
        weights = ef.efficient_risk(maxcvar)
        ret_train, vol_train = ef.portfolio_performance(verbose=False)
        return weights, ret_train, vol_train


def compute_out_of_sample_return(weights, test_data):
    """
    Given a weights dictionary and test period price data, compute the realized
    annualized portfolio return.
    """
    # Convert weights dictionary to a Series (for proper alignment)
    weights_series = pd.Series(weights)
    # Compute daily returns for the test period.
    test_returns = test_data.pct_change().dropna()
    # Calculate the portfolio's daily return (aligned by tickers)
    portfolio_daily_returns = (test_returns * weights_series).sum(axis=1)
    # Annualize the return (assume 252 trading days)
    realized_return = (1 + portfolio_daily_returns).prod() ** (252 / len(portfolio_daily_returns)) - 1
    return realized_return


In [4]:
import warnings
warnings.filterwarnings('ignore')
# List of regularization methods to evaluate.
methods = ['vanilla', 'l1', 'l2']
results = {}

print("Evaluating portfolio performance for different regularization methods:\n")
for method in methods:
    mindiff = 1000
    regs = np.logspace(-5, 2, num=100) if method != 'vanilla' else [0]
    for reg in regs:
        weights, exp_return, vol, sr = optimize_portfolio(method, mu_train, S_train, riskfree=riskfree, reg=reg, objective='max_sharpe', expret=0.2)
        realized_return = compute_out_of_sample_return(weights, test_data)
        diff = abs(exp_return - realized_return)
        #print((reg, diff))
        
        if diff < mindiff:
            results[method] = {'expected_return': exp_return,
                            'realized_return': realized_return,
                            'difference': diff,
                            'weights': weights,
                            'reg': reg}
            mindiff = diff

    print(f"Method: {method}")
    print(f"  Optimized Weights: {results[method]["weights"]}")
    print(f"  Expected (in-sample) annual return: {results[method]['expected_return']:.2%}")
    print(f"  Realized (out-of-sample) annual return: {results[method]['realized_return']:.2%}")
    print(f"  Absolute Difference: {mindiff:.2%}")
    print(f"  Regularization parameter: {results[method]['reg']}")

# Determine the method with the smallest difference.
best_method = min(results, key=lambda m: results[m]['difference'])

print(best_method, "is the best method")

Evaluating portfolio performance for different regularization methods:

Method: vanilla
  Optimized Weights: OrderedDict({'AAPL': 0.0271253557970373, 'ABT': 0.0, 'ACN': 0.0, 'ADBE': 0.0, 'BAC': 0.0, 'BMY': 0.0, 'BRK-B': 0.0, 'CMCSA': 0.0, 'COST': 0.1587830871655542, 'CRM': 0.0, 'CSCO': 0.0, 'CVX': 0.0, 'GOOG': 0.0, 'HD': 0.0, 'IBM': 0.0, 'INTC': 0.0, 'JNJ': 0.0, 'JPM': 0.0, 'KO': 0.0, 'LIN': 0.0, 'LOW': 0.0, 'MA': 0.0, 'MCD': 0.076377837889138, 'MRK': 0.0, 'MSFT': 0.1790313166093607, 'NEE': 0.1361175965496113, 'NKE': 0.0, 'ORCL': 0.0, 'PFE': 0.0, 'PG': 0.0, 'PM': 0.0, 'QCOM': 0.0, 'SBUX': 0.0, 'T': 0.0, 'TMO': 0.075615450312436, 'TXN': 0.0, 'UNH': 0.3469493556768627, 'V': 0.0, 'VZ': 0.0, 'WMT': 0.0})
  Expected (in-sample) annual return: 21.98%
  Realized (out-of-sample) annual return: 14.63%
  Absolute Difference: 7.35%
  Regularization parameter: 0
Method: l1
  Optimized Weights: OrderedDict({'AAPL': 0.0271158676660409, 'ABT': 2.4185477374e-06, 'ACN': 2.7293833121e-06, 'ADBE': 1.4278

In [8]:
weights_vanilla, exp_return_vanilla, cvar_vanilla = optimize_cvar(mu_train, train_data, maxcvar = 0.03, l2_reg=0)
realized_return_vanilla = compute_out_of_sample_return(weights_vanilla, test_data)
diff_vanilla = abs(exp_return_vanilla - realized_return_vanilla)

In [9]:
regs = np.logspace(-1, 2, num=100)
mindiff = 1000

for reg in regs:
    weights, exp_return, cvar = optimize_cvar(mu_train, train_data, maxcvar = 0.03, l2_reg=reg)
    realized_return = compute_out_of_sample_return(weights, test_data)
    diff = abs(exp_return - realized_return)
    #print((reg, diff))
    
    if diff < mindiff:
        results = {'expected_return': exp_return,
                        'realized_return': realized_return,
                        'difference': diff,
                        'weights': weights,
                        'reg': reg}
        mindiff = diff

In [10]:
print(f"  Optimized Weights: {results['weights']}")
print(f"  Expected (in-sample) annual return: {results['expected_return']:.2%}")
print(f"  Realized (out-of-sample) annual return: {results['realized_return']:.2%}")
print(f"  Regularization parameter: {results['reg']}")
print(f"  Absolute Difference: {mindiff:.2%}\n")
print(f"  Absolute Difference (Vanilla method): {diff_vanilla:.2%}")

  Optimized Weights: OrderedDict({'AAPL': 0.0575438508296273, 'ABT': 0.0273349666097927, 'ACN': 0.0362708293130116, 'ADBE': 0.0511628370549773, 'BAC': 0.0145468334820685, 'BMY': 0.0, 'BRK-B': 0.0127369114718212, 'CMCSA': 0.0, 'COST': 0.0416838938878327, 'CRM': 0.0162606879196893, 'CSCO': 0.0155258821953638, 'CVX': 0.0165768773334086, 'GOOG': 0.03568239854351, 'HD': 0.0394043418253542, 'IBM': 0.0, 'INTC': 0.0, 'JNJ': 0.0132695120617937, 'JPM': 0.0243114377968952, 'KO': 0.0100701625489579, 'LIN': 0.0290552115852786, 'LOW': 0.0355787521294269, 'MA': 0.0468525845285433, 'MCD': 0.0369182922591989, 'MRK': 0.0233602412867675, 'MSFT': 0.062997565098668, 'NEE': 0.0415018983142338, 'NKE': 0.0245853282503374, 'ORCL': 0.0132754314239257, 'PFE': 0.017600671407502, 'PG': 0.0136328379520229, 'PM': 0.0083685878949728, 'QCOM': 0.0086445560321671, 'SBUX': 0.0270131887291182, 'T': 0.0, 'TMO': 0.0484413532578156, 'TXN': 0.0413637984288914, 'UNH': 0.063561461864784, 'V': 0.034546766593519, 'VZ': 0.0, 'WMT'