# Black-Litterman Model

In [1]:
!pip install pandas_ta



In [2]:
# Imports
import pandas as pd
import pandas_ta as ta
import pandas_datareader.data as web

import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from tqdm import tqdm

from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt import black_litterman
from pypfopt.black_litterman import BlackLittermanModel
from pypfopt.efficient_frontier import EfficientFrontier

from sktime.split import temporal_train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.dummy import DummyRegressor

### Step 1: Pull Ticker Data and Macro Data

In [3]:
# Inputs and Assumptions
tickers = ['AAPL', 'NVDA', 'META', 'TSLA', 'GOOG', 'JPM', 'V', 'WMT', 'OXY']
market_ticker = 'SPY'
risk_free_rate = 0.02
start_date = '2020-01-01'
# end_date = '2025-12-01'
tau = 0.05

In [4]:
# Fetch Price Data
prices_df = yf.download(tickers, start = start_date)['Close']
prices_df.dropna(inplace = True)
prices_df

[*********************100%***********************]  9 of 9 completed


Ticker,AAPL,GOOG,JPM,META,NVDA,OXY,TSLA,V,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,72.468262,67.859550,119.573402,208.324783,5.971077,38.862629,28.684000,183.186203,36.433842
2020-01-03,71.763702,67.526550,117.995438,207.222488,5.875504,39.802719,29.534000,181.729324,36.112198
2020-01-06,72.335541,69.191551,117.901611,211.125229,5.900145,41.116997,30.102667,181.336319,36.038677
2020-01-07,71.995361,69.148369,115.897232,211.582001,5.971574,41.317791,31.270666,180.857117,35.704784
2020-01-08,73.153503,69.693291,116.801308,213.727051,5.982775,41.053101,32.809334,183.953033,35.582264
...,...,...,...,...,...,...,...,...,...
2025-12-19,273.670013,308.609985,317.209991,658.770020,180.990005,39.619999,481.200012,349.250000,114.360001
2025-12-22,270.970001,311.329987,323.089996,661.500000,183.690002,40.270000,488.730011,352.089996,112.599998
2025-12-23,272.359985,315.679993,325.929993,664.940002,189.210007,40.099998,485.559998,353.380005,110.900002
2025-12-24,273.809998,315.670013,329.170013,667.549988,188.610001,40.000000,485.399994,355.140015,111.610001


In [5]:
# Fetch Return Data
returns_df = prices_df.pct_change()
returns_df.dropna(inplace = True)
returns_df

Ticker,AAPL,GOOG,JPM,META,NVDA,OXY,TSLA,V,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-03,-0.009722,-0.004907,-0.013197,-0.005291,-0.016006,0.024190,0.029633,-0.007953,-0.008828
2020-01-06,0.007968,0.024657,-0.000795,0.018834,0.004194,0.033020,0.019255,-0.002163,-0.002036
2020-01-07,-0.004703,-0.000624,-0.017000,0.002164,0.012106,0.004883,0.038801,-0.002643,-0.009265
2020-01-08,0.016086,0.007880,0.007801,0.010138,0.001876,-0.006406,0.049205,0.017118,-0.003431
2020-01-09,0.021241,0.011044,0.003651,0.014311,0.010983,0.008671,-0.021945,0.006930,0.010331
...,...,...,...,...,...,...,...,...,...
2025-12-19,0.005437,0.016000,0.013450,-0.008548,0.039336,-0.002769,-0.004489,0.009364,-0.004093
2025-12-22,-0.009866,0.008814,0.018537,0.004144,0.014918,0.016406,0.015648,0.008132,-0.015390
2025-12-23,0.005130,0.013972,0.008790,0.005200,0.030051,-0.004222,-0.006486,0.003664,-0.015098
2025-12-24,0.005324,-0.000032,0.009941,0.003925,-0.003171,-0.002494,-0.000330,0.004981,0.006402


In [6]:
# Pull Macro Data from FRED
fred_tickers = ["T10Y2Y", "DFF", "PCEPI"]
fred_df = web.DataReader(fred_tickers, "fred", start_date)
fred_df.columns = ['2s10s', 'FFR', 'PCE']

# Pull Index Data from yfinance
index_tickers = ['^VIX', 'GC=F', 'CL=F']
index_df = yf.download(index_tickers, start = start_date)['Close']
index_df.columns = ['Vol', 'Gold', 'Oil']

# Merge DataFrames and Consolidate
macros_df = pd.concat([fred_df, index_df], axis = 1)
macros_df.ffill(inplace = True)
macros_df.dropna(inplace = True)
macros_df

[*********************100%***********************]  3 of 3 completed


Unnamed: 0,2s10s,FFR,PCE,Vol,Gold,Oil
2020-01-02,0.30,1.55,104.490,61.180000,1524.500000,12.47
2020-01-03,0.27,1.55,104.490,63.049999,1549.199951,14.02
2020-01-04,0.27,1.55,104.490,63.049999,1549.199951,14.02
2020-01-05,0.27,1.55,104.490,63.049999,1549.199951,14.02
2020-01-06,0.27,1.55,104.490,63.270000,1566.199951,13.85
...,...,...,...,...,...,...
2025-12-22,0.73,3.64,127.625,58.009998,4444.600098,14.08
2025-12-23,0.70,3.64,127.625,58.380001,4482.799805,14.00
2025-12-24,0.68,3.64,127.625,58.349998,4480.600098,13.47
2025-12-25,0.68,3.64,127.625,58.349998,4480.600098,13.47


### Step 2: Calculate Covariance Matrix $\Sigma$

In [7]:
# Use Ledoit-Wolf Shrinkage for the Covariance Matrix
cov_matrix = CovarianceShrinkage(prices_df).ledoit_wolf()
cov_matrix

Ticker,AAPL,GOOG,JPM,META,NVDA,OXY,TSLA,V,WMT
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,0.1031,0.062785,0.04129,0.075256,0.096009,0.05029,0.10177,0.048278,0.026624
GOOG,0.062785,0.106206,0.040146,0.085054,0.096793,0.047586,0.089082,0.043709,0.019933
JPM,0.04129,0.040146,0.1004,0.04378,0.055888,0.092125,0.06141,0.050996,0.018555
META,0.075256,0.085054,0.04378,0.192182,0.122425,0.04346,0.101854,0.050035,0.023461
NVDA,0.096009,0.096793,0.055888,0.122425,0.281254,0.059776,0.164926,0.063249,0.026849
OXY,0.05029,0.047586,0.092125,0.04346,0.059776,0.346488,0.077651,0.057596,0.013242
TSLA,0.10177,0.089082,0.06141,0.101854,0.164926,0.077651,0.438282,0.062551,0.026571
V,0.048278,0.043709,0.050996,0.050035,0.063249,0.057596,0.062551,0.075688,0.019167
WMT,0.026624,0.019933,0.018555,0.023461,0.026849,0.013242,0.026571,0.019167,0.055743


### Step 3: Calculate Prior Expected Returns Vector $\Pi$

In [8]:
# Pull Market Cap Data
market_caps = {}

for ticker in tickers:
    ticker_info = yf.Ticker(ticker).info
    market_caps[ticker] = ticker_info.get('marketCap')
    
market_caps

{'AAPL': 4057362333696,
 'NVDA': 4638834163712,
 'META': 1671840661504,
 'TSLA': 1580396052480,
 'GOOG': 3802144702464,
 'JPM': 901671813120,
 'V': 685113540608,
 'WMT': 890886356992,
 'OXY': 39260635136}

In [9]:
# Pull Market Prices
market_prices_df = yf.Ticker(market_ticker).history(start = start_date)['Close']

# Calculate Market-Implied Risk Premium
delta = black_litterman.market_implied_risk_aversion(market_prices_df)

# Compute Priors
market_priors = black_litterman.market_implied_prior_returns(market_caps, delta, cov_matrix, risk_free_rate)
market_priors

Ticker
AAPL    0.326497
GOOG    0.324694
JPM     0.204776
META    0.386394
NVDA    0.566428
OXY     0.227538
TSLA    0.536914
V       0.217132
WMT     0.116923
dtype: float64

### Step 4: Determine the Views Vector $Q$

In [10]:
def engineer_features_target(ticker_prices):
    '''
    Creates Features and a Target for a Ticker
    Parameters: Ticker Prices
    Returns: Tuple with
        A DataFrame with Feature Columns and a Next Day Returns Target Column
        A Series with Current Day Feature Data
    '''

    # Initialize a New DataFrame
    ticker_df = pd.DataFrame()
    
    # Create New Features
    ticker_df['RSI'] = ta.rsi(ticker_prices, length=14)
    ticker_df['SMA50'] = ta.sma(ticker_prices, length=50)
    ticker_df['Trend Ratio'] = ticker_prices / ticker_df['SMA50']
    ticker_df['Vol20'] = ticker_prices.pct_change().rolling(20).std()

    # Add in Macro Data
    ticker_df = ticker_df.join(macros_df)

    # Add in Target Column
    ticker_df['Next Day Returns'] =  ticker_prices.pct_change().shift(-1)

    # Extract Current Day Data
    current_features = ticker_df.iloc[-1].drop(['Next Day Returns'])

    # Data Cleaning
    ticker_df.dropna(inplace = True)
    
    return (ticker_df, current_features)

In [11]:
# Initialize Dictionaries of DataFrames to Keep Feature Target Data and Current Feature Data for Each Ticker 
features_target_data = {}
current_features = {}

# Iterate Across Tickers and Create Features
for ticker in tickers:
    features_target_data[ticker], current_features[ticker] = engineer_features_target(prices_df[ticker])

# Verify Shapes
for ticker in features_target_data.keys():
    print(f'{ticker} Shape: {features_target_data[ticker].shape}')

AAPL Shape: (1455, 11)
NVDA Shape: (1455, 11)
META Shape: (1455, 11)
TSLA Shape: (1455, 11)
GOOG Shape: (1455, 11)
JPM Shape: (1455, 11)
V Shape: (1455, 11)
WMT Shape: (1455, 11)
OXY Shape: (1455, 11)


In [12]:
def model_create_evaluate(ticker_features_target):
    # Create and Evaluate a Predictive Model
    # Parameters: A DataFrame of Feature Columns and a Target Column for Next Day Returns
    # Returns: A Dictionary with a Predictive Model, Optimal Parameters, Performance Metrics, and Dummy Metrics

    # Define features (X) and target (y)
    X = ticker_features_target.drop(columns = ['Next Day Returns'])
    y = ticker_features_target['Next Day Returns']

    # Temporal Train Test Split
    y_train, y_test, X_train, X_test = temporal_train_test_split(y = y, X = X)

    # Create Regressor
    regressor = RandomForestRegressor(random_state = 22)
    regressor.fit(X_train, y_train)

    # Perform a Grid Search for Optimal Parameters
    regressor_params = {'n_estimators': np.arange(1, 20, 2), 'max_features': np.arange(1, 20, 2)}
    regressor_grid_search = GridSearchCV(estimator = regressor, param_grid = regressor_params)
    regressor_grid_search.fit(X_train, y_train)
    
    # Evaluate
    y_preds = regressor_grid_search.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_preds))
    mae = mean_absolute_error(y_test, y_preds)
    r2 = r2_score(y_test, y_preds)
    mda = (np.sign(y_test) == np.sign(y_preds)).mean()

    # Establish Baseline
    dummy = DummyRegressor()
    dummy.fit(X_train, y_train)
    dummy_y_preds = dummy.predict(X_test)
    dummy_rmse = np.sqrt(mean_squared_error(y_test, dummy_y_preds))
    dummy_mae = mean_absolute_error(y_test, dummy_y_preds)
    dummy_r2 = r2_score(y_test, dummy_y_preds)
    dummy_mda = (np.sign(y_test) == np.sign(y_preds)).mean()
    
    return {
        'Model': regressor_grid_search,
        'Optimal Params': regressor_grid_search.best_params_,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
        'MDA': mda,
        'Dummy RMSE': dummy_rmse,
        'Dummy MAE': dummy_mae,
        'Dummy R2': dummy_r2,
        'Dummy MDA': dummy_mda
    }

In [13]:
# Initialize a List of DataFrames to Keep Model Data
models_data = {}

# Iterate Across Tickers
for ticker in tqdm(tickers):
    models_data[ticker] = model_create_evaluate(features_target_data[ticker])

# Organize in a DataFrame
models_df = pd.DataFrame(models_data).T
models_df

100%|██████████| 9/9 [02:20<00:00, 15.58s/it]


Unnamed: 0,Model,Optimal Params,RMSE,MAE,R2,MDA,Dummy RMSE,Dummy MAE,Dummy R2,Dummy MDA
AAPL,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 13}",0.023714,0.01802,-0.663221,0.543956,0.018402,0.011717,-0.001561,0.543956
NVDA,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 19}",0.039214,0.030225,-0.500585,0.535714,0.032058,0.022947,-0.002854,0.535714
META,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 7, 'n_estimators': 17}",0.036508,0.031034,-1.661912,0.486264,0.022379,0.0153,-0.000181,0.486264
TSLA,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 19}",0.043508,0.031718,-0.088578,0.502747,0.0417,0.030482,-2e-06,0.502747
GOOG,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 19}",0.020799,0.01532,-0.14728,0.5,0.019423,0.014234,-0.000505,0.5
JPM,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 9}",0.017653,0.011822,-0.172179,0.57967,0.01631,0.010865,-0.000655,0.57967
V,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 17}",0.014337,0.010231,-0.094963,0.483516,0.013705,0.009252,-0.000543,0.483516
WMT,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 19}",0.015171,0.011061,-0.064281,0.521978,0.014723,0.010219,-0.002372,0.521978
OXY,GridSearchCV(estimator=RandomForestRegressor(r...,"{'max_features': 1, 'n_estimators': 13}",0.024193,0.01803,-0.350172,0.472527,0.021031,0.014618,-0.020315,0.472527


In [14]:
# Initialize a Dictionary to Record Predicted Next Day Returns from the Current Day Features
predicted_returns_data = {}

# Iterate Across Tickers
for ticker in tickers:
    predicted_returns_data[ticker] = models_df.loc[ticker]['Model'].predict(pd.DataFrame([current_features[ticker]]))

# Organize in a Data Frame
predicted_returns_df = pd.DataFrame(predicted_returns_data).T
predicted_returns_df.columns = ['Predicted Returns']
predicted_returns_df

Unnamed: 0,Predicted Returns
AAPL,0.012539
NVDA,0.021855
META,-0.041141
TSLA,-0.010475
GOOG,0.004057
JPM,0.004006
V,0.001206
WMT,0.009187
OXY,4.6e-05


In [15]:
# Compile Predictions into a Views Vector
Q = predicted_returns_df

### Step 5: Set the Picking Matrix $P$

In [16]:
P = np.eye(len(tickers))
P

array([[1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1.]])

### Step 6: Calculate the Uncertainty Matrix $\Omega$

In [17]:
# Use Idzorek’s Method with a Vector of View Confidences instead of the Uncertainty Matrix
view_confidences = models_df['MDA'].where(models_df['MDA'] > 0.5, 0)
view_confidences

AAPL    0.543956
NVDA    0.535714
META           0
TSLA    0.502747
GOOG           0
JPM      0.57967
V              0
WMT     0.521978
OXY            0
Name: MDA, dtype: object

### Step 7: Apply the Black-Litterman Model

In [18]:
# Apply Black-Litterman
bl = BlackLittermanModel(cov_matrix, pi = market_priors, Q = Q, P = P, omega = 'idzorek', view_confidences = view_confidences.to_numpy(), tau = tau)

# Compute Returns
bl_returns = pd.DataFrame(bl.bl_returns())
bl_returns.columns = ['Returns']
bl_returns

Unnamed: 0_level_0,Returns
Ticker,Unnamed: 1_level_1
AAPL,0.096406
GOOG,0.096213
JPM,0.063961
META,0.098235
NVDA,0.278286
OXY,0.035842
TSLA,0.259345
V,0.052151
WMT,0.048013


In [19]:
# Compute Weights
bl_raw_weights = bl.bl_weights(risk_aversion = delta) 

# Clean Weights
bl_weights = bl.clean_weights()
bl_weights

# Use Efficient Frontier to Output Portfolio Weights
ef = EfficientFrontier(bl.bl_returns(), bl.bl_cov())
ef.max_sharpe()
final_weights = pd.Series(ef.clean_weights()).to_frame('Weights')
final_weights

Unnamed: 0,Weights
AAPL,0.0
GOOG,0.0
JPM,0.0
META,0.0
NVDA,0.56034
OXY,0.0
TSLA,0.19042
V,0.0
WMT,0.24924
