In [1]:
!pip install PyPortfolioOpt



In [2]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from pypfopt import black_litterman, risk_models
from pypfopt import BlackLittermanModel, plotting
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns

# APT
**1. Inputs**
- Creating the inputs, fetching historical data from assets, macroeconomic data

In [4]:
# This block generates a df for all the asset returns on a quarterly, adjusted basis
##User input for our asset tickers
tickers = 'BYD DRIV IE000MCVFK47 KC=F COCHINSHIP NH'
df1 = yf.download(tickers=tickers, start='2020-01-01', end='2025-01-01', interval='1d')

# Select Adjusted Close and resample to quarterly frequency
df1 = df1['Close']  # Select 'Adj Close' column directly
df1 = df1.resample('QE').last()  # Resample to quarterly frequency

# Calculate quarterly returns
asset_returns_df = df1.pct_change().dropna()  # Calculate percentage change and drop nil values
asset_returns_df

[*********************100%***********************]  3 of 3 completed


Ticker,BYD,DRIV,IE000MCVFK47
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-09-30,0.028785,-0.031438,0.001037
2021-12-31,0.036516,0.114951,-0.004343
2022-03-31,0.005552,-0.097281,-0.064327
2022-06-30,-0.241496,-0.217202,-0.081376
2022-09-30,-0.039224,-0.062878,-0.033204
2022-12-31,0.147367,-0.00535,0.006199
2023-03-31,0.179068,0.199799,0.014992
2023-06-30,0.084387,0.09983,0.003035
2023-09-30,-0.120894,-0.097926,-0.003591
2023-12-31,0.031955,0.059695,0.06025


In [5]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# User Input for our macro indicators
date1 = '2015-01-01'
date2 = '2023-12-31'
factors = {
    "Unemployment Rate (USA)": "/content/drive/My Drive/EconDB/RealGDPUSA.csv",

}

# Create an empty dictionary to store the data
data = {}

# Fetch the data for each indicator factor
for name, filepath in factors.items():
    df2 = pd.read_csv(filepath, index_col='Date', parse_dates=['Date'])
    data[name] = df2

# Align all data to ensure the same number of time periods
data_aligned = pd.concat(data.values(), axis=1).dropna()
data_aligned.columns = list(data.keys())

# Filter only the dates required
data_aligned = data_aligned.loc[date1:date2]

# Resample factor data to quarterly frequency, NOTE: last() vs mean() could change data output
data_aligned = data_aligned.resample('QE').last()

# Calculate quarterly returns
factors_returns_df = data_aligned.pct_change().dropna()

factors_returns_df


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  factors_returns_df = data_aligned.pct_change().dropna()


Unnamed: 0_level_0,Unemployment Rate (USA)
Date,Unnamed: 1_level_1
2015-06-30,0.0
2015-09-30,0.0
2015-12-31,0.0
2016-03-31,0.021816
2016-06-30,0.0
2016-09-30,0.0
2016-12-31,0.0
2017-03-31,0.029941
2017-06-30,0.0
2017-09-30,0.0


In [6]:
#This block aligns the two df (asset, and factor returns) into two matrices
common_index = asset_returns_df.index.intersection(factors_returns_df.index) #Finds the list of shared dates between the two matrices

#Then filters only the data with overlapping dates, keep
aligned_asset_returns = asset_returns_df.loc[common_index]
aligned_factors_returns = factors_returns_df.loc[common_index]

# Convert to matrices for further processing
aligned_asset_matrix = aligned_asset_returns.values
aligned_factors_matrix = aligned_factors_returns.values

# Verify alignment and output
print("Aligned Asset Returns Matrix Shape:", aligned_asset_matrix.shape)
print("Aligned Factor Returns Matrix Shape:", aligned_factors_matrix.shape)

print("\nAligned Asset Returns Matrix:")
print(aligned_asset_matrix)
print("\nAligned Indicator Matrix:")
print(aligned_factors_matrix)


Aligned Asset Returns Matrix Shape: (7, 3)
Aligned Factor Returns Matrix Shape: (7, 1)

Aligned Asset Returns Matrix:
[[ 0.02878498 -0.03143753  0.00103719]
 [ 0.03651588  0.1149507  -0.00434273]
 [ 0.00555163 -0.09728145 -0.06432672]
 [-0.24149566 -0.21720165 -0.08137568]
 [-0.03922398 -0.06287839 -0.03320419]
 [ 0.14736724 -0.00534982  0.00619944]
 [ 0.17906838  0.19979875  0.01499219]]

Aligned Indicator Matrix:
[[0.        ]
 [0.        ]
 [0.01316334]
 [0.        ]
 [0.        ]
 [0.        ]
 [0.03196212]]


**2. APT Regression**

In [7]:
#This block implements linear regression based on the APT formula to calculate the systematic (alpha and beta) and idiosyncractic risk factors
import numpy as np
from sklearn.linear_model import LinearRegression

n_factors = 3  # Number of factors
n_assets = 5  # Number of assets

# Placeholder for results
alphas = []
betas = []
residuals = []

# performing linear regression to determine alpha, beta, and epsilon for each asset
for i in range(n_assets):
    model = LinearRegression()
    model.fit(aligned_factors_matrix, aligned_asset_matrix[:, i])  # Regress asset returns on factors

    alpha = model.intercept_  # Intercept (alpha)
    beta = model.coef_  # Factor loadings (betas)
    #calculating residuals (epsilon) below
    predicted_returns = model.predict(aligned_factors_matrix)
    residual = aligned_asset_matrix[:, i] - predicted_returns

    alphas.append(alpha)
    betas.append(beta)
    residuals.append(residual)

# Variance of residuals (idiosyncratic risk)
idiosyncratic_variances = [np.var(residual) for residual in residuals]

# Display results
print("Alphas (Intercepts):", alphas)
print("\nBetas (Factor Loadings):", np.array(betas))
print("\nIdiosyncratic Variances:", idiosyncratic_variances)

IndexError: index 3 is out of bounds for axis 1 with size 3

In [None]:
#This block takes the input from APT then converts to covariance matrix of assets
# Convert betas to a matrix
B = np.array(betas)  # Factor loadings matrix (n_assets x n_factors)

# Variance of residuals (idiosyncratic risk)
idiosyncratic_variances = [np.var(residual) for residual in residuals]
Sigma_epsilon = np.diag(idiosyncratic_variances)  # Diagonal matrix of idiosyncratic variances

# Compute factor covariance matrix (Sigma_F)
Sigma_F = np.cov(aligned_factors_matrix, rowvar=False)  # Covariance of factor returns

# Calculate total covariance matrix
# Covariance matrix = B * Sigma_F * B.T + Sigma_epsilon
covariance_matrix = B @ Sigma_F @ B.T + Sigma_epsilon

# Display results
print("Alphas (Intercepts):", alphas)
print("\nBetas (Factor Loadings):", B)
print("\nIdiosyncratic Variances:", idiosyncratic_variances)
print("\nTotal Covariance Matrix:")
print(covariance_matrix)


In [None]:
# This block calculates the expected asset returns from APT inputs
expected_factor_returns = np.mean(aligned_factors_matrix, axis=0)
expected_asset_returns = alphas + np.dot(B, expected_factor_returns)

# Display results
print("\nExpected asset returns:")
print(expected_asset_returns)

# Black-Litterman Optimization
**1. Inputs**
- Creating the inputs, fetching historical data from assets, macroeconomic data

**1. Priors**
- Inputting data and creating market implied market returns


In [None]:
# Example matrices (replace these with your actual matrices)
# Covariance matrix columns/rows represent the assets
assets = ["Asset 1", "Asset 2", "Asset 3", "Asset 4"]
expected_returns = np.array([0.05, 0.07, 0.1, 0.03])  # Replace with your own data
cov_matrix = pd.DataFrame(
    [
        [0.1, 0.02, 0.04, 0.01],
        [0.02, 0.08, 0.01, 0.02],
        [0.04, 0.01, 0.12, 0.03],
        [0.01, 0.02, 0.03, 0.07]
    ],
    index=assets,  # Rows correspond to asset names
    columns=assets  # Columns correspond to asset names
)

# Convert market_caps dictionary to a Pandas Series with matching indices
market_caps = pd.Series({
    "Asset 1": 1e10,
    "Asset 2": 1.5e10,
    "Asset 3": 2e10,
    "Asset 4": 5e9
})

# Ensure the market_caps index matches the assets in the covariance matrix
market_caps = market_caps.reindex(assets)


# Market-implied risk aversion parameter (using a risk-free rate of 0.02)
spy_data = yf.download("SPY", start="2020-01-01", end="2025-01-01")
market_prices = spy_data["Close"]
risk_free_rate = 0.02
delta = black_litterman.market_implied_risk_aversion(market_prices, risk_free_rate=risk_free_rate)

# Compute the market-implied prior returns
market_prior = black_litterman.market_implied_prior_returns(market_caps, delta, cov_matrix)

In [None]:
plotting.plot_covariance(cov_matrix, plot_correlation=True);


In [None]:
# Visualize Market-Implied Prior Returns
plt.figure(figsize=(8, 4))
plt.title("Market-Implied Prior Returns", fontsize=14)
plt.bar(market_caps.keys(), market_prior, color='lightgreen')
plt.ylabel("Expected Returns")
plt.xlabel("Assets")
plt.show()

**2. Views**
- Combine our subjective macroeconomic analysis views into the model, either absolute or relative

In [None]:
# Example: Absolute view that Asset 1 will return 6%, and relative view that Asset 2 will outperform Asset 3 by 2%
absolute_views = {
    "Asset 1": 0.06
}


# Define the picking matrix (P) and views vector (Q)
P = np.array([
    [1, -1, 0, 0],  # View 1: A outperforms B by 3%
    [0, 0, -1, 1]   # View 2: D outperforms C by 1%
])

Q = np.array([
    0.03,  # Expected outperformance of A over B
    -0.01  # Expected underperformance of C relative to D
])

relative_views = {"Asset 2": 0.02, "Asset 3": -0.02}  # Example relative views (optional)

# Create Black-Litterman model instance
bl = BlackLittermanModel(
    cov_matrix=cov_matrix,
    pi=market_prior,
    absolute_views=absolute_views
)

# Calculate the posterior returns and covariance matrix
posterior_returns = bl.bl_returns()
posterior_cov = bl.bl_cov()

In [None]:
fig, ax = plt.subplots(figsize=(7,7))
im = ax.imshow(bl.omega)

# We want to show all ticks...
ax.set_xticks(np.arange(len(bl.tickers)))
ax.set_yticks(np.arange(len(bl.tickers)))

ax.set_xticklabels(bl.tickers)
ax.set_yticklabels(bl.tickers)
plt.show()

rets_df = pd.DataFrame([market_prior, posterior_returns, pd.Series(absolute_views), expected_asset_returns],
             index=["Prior", "Posterior", "Views"]).T
rets_df

rets_df.plot.bar(figsize=(12,8));

**3. Optimization**
- Optimization with efficient frontier

In [None]:
# Calculate the posterior returns and covariance matrix
posterior_returns = bl.bl_returns()
posterior_cov = bl.bl_cov()

# Optimize the portfolio using Efficient Frontier
ef = EfficientFrontier(posterior_returns, posterior_cov)
weights = ef.max_sharpe()
cleaned_weights = ef.clean_weights()



In [None]:
# Display the portfolio weights and performance
print("Optimized Portfolio Weights:", cleaned_weights)
performance = ef.portfolio_performance(verbose=True)

# Visualize Optimized Portfolio Weights
plt.figure(figsize=(8, 4))
plt.title("Optimized Portfolio Weights", fontsize=14)
plt.bar(cleaned_weights.keys(), cleaned_weights.values(), color='orange')
plt.ylabel("Portfolio Weight")
plt.xlabel("Assets")
plt.show()