In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.optimize import minimize


In [4]:
asset_returns = pd.read_csv("/content/asset_returns.csv", index_col=0, parse_dates=True)
market_returns = pd.read_csv("/content/market_returns.csv", index_col=0, parse_dates=True)
markowitz_weights = pd.read_csv("/content/markowits_weights.csv", index_col=0)

In [5]:
betas = {}
alphas = {}
residual_vars = {}

In [9]:
market_returns.columns[0]

'^NSEI'

In [11]:
for stock in asset_returns.columns:
  y=asset_returns[stock]
  x=sm.add_constant(market_returns)
  model=sm.OLS(y,x).fit()

  alphas[stock]=model.params['const']
  betas[stock]=model.params[market_returns.columns[0]]
  residual_vars[stock]=model.resid.var()*252

pd.DataFrame({
    "alpha":alphas,
    "betas":betas
})


Unnamed: 0,alpha,betas
HDFCBANK.NS,-8.7e-05,1.053165
ICICIBANK.NS,0.000142,1.341838
INFY.NS,0.000439,0.830596
RELIANCE.NS,0.000238,1.102143
TCS.NS,0.000461,0.69181


In [20]:
betas = pd.Series(betas)
residual_vars = pd.Series(residual_vars)
betas,residual_vars

(HDFCBANK.NS     1.053165
 ICICIBANK.NS    1.341838
 INFY.NS         0.830596
 RELIANCE.NS     1.102143
 TCS.NS          0.691810
 dtype: float64,
 HDFCBANK.NS     0.027644
 ICICIBANK.NS    0.046732
 INFY.NS         0.056366
 RELIANCE.NS     0.047751
 TCS.NS          0.046165
 dtype: float64)

7Ô∏è‚É£ WHY THIS IS POWERFUL (AND WHY IT EXISTS)
Markowitz covariance:

Needs
ùëÅ
(
ùëÅ
‚àí
1
)
2
2
N(N‚àí1)
	‚Äã

 covariances

Noisy, unstable for large N

SIM covariance:

Needs:

N betas

1 market variance

N residual variances

Much more parsimonious

More stable out-of-sample

üìå This is why SIM is used in:

Large portfolios

Teaching CAPM-based optimization

Risk models as a first layer

In [15]:
betas.shape

(5,)

In [19]:
market_var=market_returns.var()*252

In [28]:
sim_cov=np.outer(betas,betas)*market_var[0]+np.diag(residual_vars)
type(sim_cov)

  sim_cov=np.outer(betas,betas)*market_var[0]+np.diag(residual_vars)


numpy.ndarray

In [30]:
sim_cov = pd.DataFrame(
    sim_cov,
    index=asset_returns.columns,
    columns=asset_returns.columns
)
sim_cov

Unnamed: 0,HDFCBANK.NS,ICICIBANK.NS,INFY.NS,RELIANCE.NS,TCS.NS
HDFCBANK.NS,0.065044,0.047652,0.029496,0.03914,0.024568
ICICIBANK.NS,0.047652,0.107445,0.037581,0.049868,0.031302
INFY.NS,0.029496,0.037581,0.079629,0.030868,0.019376
RELIANCE.NS,0.03914,0.049868,0.030868,0.088711,0.02571
TCS.NS,0.024568,0.031302,0.019376,0.02571,0.062304


In [32]:
mean_returns = asset_returns.mean() * 252
num_assets = len(mean_returns)

Where they differ (the important part)
Markowitz Œ£
Œ£
=
Sample covariance of returns
Œ£=Sample covariance of returns

Purely statistical

No economic structure

Every pair of assets estimated independently

SIM Œ£
Œ£
=
ùõΩ
ùõΩ
‚ä§
ùúé
ùëö
2
+
ùê∑
Œ£=Œ≤Œ≤
‚ä§
œÉ
m
2
	‚Äã

+D

Factor structure

Assumes common market driver

Residual risks are uncorrelated

This is not just a different estimator ‚Äî it‚Äôs a model of reality.

In [38]:
def sim_portfolio_vol(weights):
  return np.sqrt(weights.T @ sim_cov @ weights)


In [43]:
constraints=({'type':'eq',
             'fun':lambda w:sum(w)-1})
bounds=tuple((0,1) for _ in range(num_assets))
init_guess=np.repeat(1/num_assets,num_assets)

In [45]:
sim_mvp=minimize(
    sim_portfolio_vol,
    init_guess,
    method='SLSQP',
    bounds=bounds,
    constraints=constraints
)
sim_mvp

     message: Optimization terminated successfully
     success: True
      status: 0
         fun: 0.19465824987607452
           x: [ 2.570e-01  0.000e+00  2.412e-01  1.192e-01  3.826e-01]
         nit: 7
         jac: [ 1.947e-01  2.015e-01  1.946e-01  1.948e-01  1.947e-01]
        nfev: 42
        njev: 7
 multipliers: [ 1.947e-01]

In [46]:
markowitz_weights

Unnamed: 0,MVP,Tangency
HDFCBANK.NS,0.33442,0.03863
ICICIBANK.NS,0.041839,0.173128
INFY.NS,0.126029,0.198186
RELIANCE.NS,0.159182,0.212151
TCS.NS,0.33853,0.377905


In [47]:
sim_weights = sim_mvp.x

In [48]:
comparison = pd.DataFrame({
    "Markowitz_MVP": markowitz_weights["MVP"],
    "SIM_MVP": sim_weights
}, index=mean_returns.index)


In [49]:
comparison

Unnamed: 0,Markowitz_MVP,SIM_MVP
HDFCBANK.NS,0.33442,0.25697
ICICIBANK.NS,0.041839,0.0
INFY.NS,0.126029,0.241169
RELIANCE.NS,0.159182,0.119243
TCS.NS,0.33853,0.382618


In [50]:
markowitz_cov = asset_returns.cov() * 252

In [60]:
type(comparison["Markowitz_MVP"])

In [61]:
markowitz_vol = np.sqrt(
    comparison["Markowitz_MVP"] @ markowitz_cov @ comparison["Markowitz_MVP"]
)
markowitz_vol

np.float64(0.1934811131577193)

In [63]:
sim_vol = sim_portfolio_vol(sim_weights)
sim_vol

np.float64(0.19465824987607452)

In [64]:
comparison.to_csv("/content/sim_vs_markowitz_weights.csv")