## Problem 1


In [1]:
#chap4.py
import pandas as pd
import qrpm_funcs as qf
#Get 3 currencies until the end of
#previous year. Form sample covariance matrix
#and do simple efficient frontier calculations

lastday=qf.LastYearEnd()
#Swiss franc, pound sterling, Japanese Yen
seriesnames=['DEXSZUS','DEXUSUK','DEXJPUS']
cdates,ratematrix=qf.GetFREDMatrix(seriesnames,enddate=lastday)
multipliers=[-1,1,-1]

lgdates,difflgs=qf.levels_to_log_returns(cdates,ratematrix,multipliers) # dates and log-returns

In [2]:
import numpy as np

ind = lgdates.index('2020-12-31')
L = np.array(difflgs[:ind+1]) #Learning set
H = np.array(difflgs[ind+1:]) #Hold-out set
m = np.mean(L,axis=0)
c = np.cov(L.T)
#display the output
#vectors and matrices are in fractional units;
#    fraction*100=percent
#    fraction*10000=basis point
#    (fraction^2)*10000=percent^2
np.set_printoptions(precision=4)
print("The Learning Set From",lgdates[0],"to",lgdates[ind],"(",len(lgdates[:ind+1]),"observations):")
print("\nMeans:",m*10000,"bps/day")
print("(CHF, GBP, JPY)\n")
print("     ",c[0]*10000)
print("C(L)=",c[1]*10000)
print("     ",c[2]*10000)
print(f'(%/day)\N{SUPERSCRIPT TWO} units')

The Learning Set From 1971-01-05 to 2020-12-31 ( 12535 observations):

Means: [ 1.2503 -0.4632  0.9918] bps/day
(CHF, GBP, JPY)

      [0.5052 0.2434 0.2158]
C(L)= [0.2434 0.3579 0.1147]
      [0.2158 0.1147 0.4065]
(%/day)² units


In [3]:
#invert the c matrix, which is in (fraction/day)^2 units
#so ci (c-inverse) is in (days/fraction)^2 units
ci=np.linalg.inv(c)
print("         ",ci[0]/10000)
print(f'C-inv(L)=',ci[1]/10000)
print("         ",ci[2]/10000)
print(f'(days/%)\N{SUPERSCRIPT TWO} units')

          [ 3.469  -1.9455 -1.2927]
C-inv(L)= [-1.9455  4.1635 -0.1423]
          [-1.2927 -0.1423  3.1864]
(days/%)² units


##### Below are calculation of $u^{\intercal}C^{-1}u$, $u^{\intercal}C^{-1}m$, and $m^{\intercal}C^{-1}m$ 

In [4]:
#sum entries in ci
uciu=np.sum(ci) 
print(f'u\'(C-inv)u =',uciu/10000,f'(days/%)\N{SUPERSCRIPT TWO}')
ucim=np.sum(ci @ m)
print(f'u\'(C-inv)m =',ucim,'days')
mcim = m @ (ci @ m)
print(f'm\'(C-inv)m =',mcim*10000,'bps')

u'(C-inv)u = 4.058162372655762 (days/%)²
u'(C-inv)m = 1.0643541086758752 days
m'(C-inv)m = 8.628306834187681 bps


##### Solutions for three-currency minimum-variance portfolio

Equation 4.6:
$$
w = \lambda (I-\frac{C^{-1}J}{u^{\intercal} C^{-1}u})C^{-1}m + \frac{C^{-1}u}{u^{\intercal}C^{-1}u}
$$

$\lambda$ is a varying parameter that moves along the efficient frontier.

In [5]:
#Vectors for equation 4.6
u=[1]*3
vec2=(ci @ u)/uciu
vec1=np.subtract(ci @ m,vec2*ucim)
print(vec1,vec2)

lambdacoeff=(uciu*mcim-ucim*ucim)/uciu
constmu=ucim/uciu
print(lambdacoeff*10000, constmu*10000)

[ 3.8957 -5.0463  1.1506] [0.0569 0.5115 0.4316]
8.349153472486716 0.2622748946290524


##### So the the minimum-variance matrix is:

$$
w_1 =  [3.8957\lambda+0.0569 , -5.0463\lambda+0.5115 ,  1.1506\lambda+0.4316] 
$$

##### Now using Ledoit-Wolf shrinkage with $s = \frac{1}{3}$

In [6]:
n = 3
s = 1/3
u = np.array([[1,1,1]])
J = u.T @ u
I = np.identity(3)
S = np.zeros((3,3))
for i in range(3):
    S[i][i] = (c[i][i])**(1/2)
Si = np.linalg.inv(S)  
# Sample correlation matrix
R = Si @ c @Si
# the constant correlation matrix
lu = (int(u @ R @ u.T) - n) / (n*(n-1))
c_rho = S @ (I + lu * (J-I)) @ S
# Ledoit-Wolf shrinkage estimator
c_lw = s*c_rho + (1-s)*c
print('The Ledoit-Wolf constant-correlation covariance shrinkage estimator is:\n',c_lw)
print('The original constant-correlation covariance matrix is:\n',c)

The Ledoit-Wolf constant-correlation covariance shrinkage estimator is:
 [[5.0522e-05 2.0954e-05 1.9424e-05]
 [2.0954e-05 3.5786e-05 1.1887e-05]
 [1.9424e-05 1.1887e-05 4.0651e-05]]
The original constant-correlation covariance matrix is:
 [[5.0522e-05 2.4345e-05 2.1582e-05]
 [2.4345e-05 3.5786e-05 1.1474e-05]
 [2.1582e-05 1.1474e-05 4.0651e-05]]


In [7]:
ci_lw = np.linalg.inv(c_lw)
uciu_lw = np.sum(ci_lw) 
ucim_lw = np.sum(ci_lw @ m)
mcim_lw = m @ (ci_lw @ m)

u=[1]*3
vec2_lw=(ci_lw @ u)/uciu_lw
vec1_lw=np.subtract(ci_lw @ m,vec2_lw*ucim_lw)
print(vec1_lw, vec2_lw)
lambdacoeff_lw = (uciu_lw*mcim_lw-ucim_lw*ucim_lw)/uciu_lw
constmu_lw = ucim_lw/uciu_lw

[ 3.139  -4.5685  1.4295] [0.1341 0.469  0.3968]


##### So the the minimum-variance matrix is:

$$
w_2 =  [3.139\lambda+0.1341 , -4.5685\lambda+0.469 ,  1.4295\lambda+0.3968] 
$$

##### Next we calculate the portfolio variances on holdout sample $H$ with $w_1$ and $w_2$.

In [8]:
# covariance matrix of H
c_H = np.cov(H.T)

def get_variance(lbd):
    w1 = np.array([3.8957*lbd+0.0569, -5.0463*lbd+0.5115, 1.1506*lbd+0.4316])
    w2 = np.array([3.139*lbd+0.1341, -4.5685*lbd+0.469, 1.4295*lbd+0.3968])
    var1 = w1@c_H@w1.T
    var2 = w2@c_H@w2.T
    return f'lambda = {lbd}, variances are w1:{var1}, w2:{var2}' 

for lbd in range(1,20):
    print(get_variance(lbd/20))

lambda = 0.05, variances are w1:8.824792523311604e-06, w2:8.947836383194718e-06
lambda = 0.1, variances are w1:1.0497288422632765e-05, w2:1.04550586306161e-05
lambda = 0.15, variances are w1:1.416780615916463e-05, w2:1.3602672842185565e-05
lambda = 0.2, variances are w1:1.9836345732907206e-05, w2:1.839067901790312e-05
lambda = 0.25, variances are w1:2.7502907143860492e-05, w2:2.4819077157768748e-05
lambda = 0.3, variances are w1:3.716749039202448e-05, w2:3.288786726178246e-05
lambda = 0.35, variances are w1:4.8830095477399185e-05, w2:4.2597049329944256e-05
lambda = 0.4, variances are w1:6.249072239998459e-05, w2:5.3946623362254144e-05
lambda = 0.45, variances are w1:7.814937115978073e-05, w2:6.693658935871213e-05
lambda = 0.5, variances are w1:9.580604175678752e-05, w2:8.156694731931818e-05
lambda = 0.55, variances are w1:0.00011546073419100512, w2:9.78376972440723e-05
lambda = 0.6, variances are w1:0.0001371134484624333, w2:0.00011574883913297451
lambda = 0.65, variances are w1:0.0001

##### We can see that in most cases $w_2$ has smaller variance than $w_1$ in the holdout sample $H$, which shows the effects of the shrunk matrix.

## Problem 2


In [9]:
# equal weight
w3 = np.array([[1/3,1/3,1/3]])
r3 = m@w3.T
nominator = uciu*r3-ucim
denominator = uciu*mcim-ucim*ucim
lbd = nominator/denominator

u=[1]*3
vec2=(ci @ u)/uciu
vec1=np.subtract(ci @ m,vec2*ucim)
we = lbd*vec1 + vec2

print("w3: ",w3)
print("we: ",we)

from math import sqrt
# expected return of each asset on Holdout
m_H = np.mean(H,axis=0)
# returns
mu3 = m_H @ w3.T
mue = m_H @ we.T
print("w3 return in bps/day: ", mu3*10000)
print("we return in bps/day: ", mue*10000)
# standard deviations
sigma3 = sqrt(w3 @ c_H @ w3.T)
sigmae = sqrt(we @ c_H @ we.T)
print("std of w3 in %/day: ", sigma3*10000)
print("std of we in %/day: ", sigmae*10000)

w3:  [[0.3333 0.3333 0.3333]]
we:  [0.2112 0.3116 0.4772]
w3 return in bps/day:  [-2.0841]
we return in bps/day:  -2.541830657233333
std of w3 in %/day:  30.443491964159456
std of we in %/day:  29.54312435507896


Based on the results above, we conclude during holdout set 'H':

$w_e$ has the lower standard deviation  while $w_3$ has higher return.

## Problem 4

In [10]:
import pandas as pd
import numpy as np
import qrpm_funcs as qf

lastday=qf.LastYearEnd()
lastyear = int(lastday[:4])
periods = (lastyear-1926)*12+6

fac_url='http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip'
df_fac = pd.read_csv(fac_url, skiprows=3, nrows=periods, usecols=[0, 1, 2, 3])
df_fac.rename(columns={df_fac.columns[0] : 'yearmon'}, inplace=True)

ind_url='http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/5_Industry_Portfolios_CSV.zip'
df_ind = pd.read_csv(ind_url, skiprows=11, nrows=periods)
df_ind.rename(columns={df_ind.columns[0] : 'yearmon'}, inplace=True)

df_8factor=pd.merge(df_fac,df_ind,on='yearmon')

df_8factor = pd.concat([df_8factor["yearmon"], \
            np.log(1+df_8factor[df_8factor.columns[1:]]/100.)],axis=1)

df_8factor = df_8factor[df_8factor['yearmon']>201112]
df_8factor = df_8factor.reset_index(drop=True)
df_8factor

Unnamed: 0,yearmon,Mkt-RF,SMB,HML,Cnsmr,Manuf,HiTec,Hlth,Other
0,201201,0.049266,0.020195,-0.009747,0.032951,0.029073,0.068686,0.033531,0.069806
1,201202,0.043251,-0.018673,0.004291,0.036043,0.042868,0.060625,0.012521,0.041238
2,201203,0.030626,-0.006421,0.011336,0.039701,-0.006622,0.040758,0.038644,0.047837
3,201204,-0.008536,-0.004209,-0.007831,0.002397,-0.006320,-0.013187,0.006081,-0.016739
4,201205,-0.063899,0.000700,-0.010758,-0.033453,-0.079801,-0.065499,-0.033040,-0.080451
...,...,...,...,...,...,...,...,...,...
115,202108,0.028685,-0.004209,-0.001501,0.016955,0.007968,0.042676,0.028101,0.025570
116,202109,-0.044684,0.007075,0.049552,-0.033350,-0.028811,-0.063579,-0.061982,-0.022859
117,202110,0.064382,-0.023781,-0.004912,0.082777,0.058363,0.065226,0.022251,0.069992
118,202111,-0.015621,-0.013288,-0.004510,0.005087,-0.018673,0.003494,-0.043847,-0.050872


In [11]:
excel_file_path = 'ratio_data.xlsx' 
df_stocks = pd.read_excel(excel_file_path)
df_stocks = pd.concat([df_stocks["Date"], \
            np.log(1+df_stocks[df_stocks.columns[1:]]/100.)],axis=1)
df_stocks = df_stocks.reset_index(drop=True)
df_stocks

Unnamed: 0,Date,AAPL,AMZN,ED,F,JNJ,JPM,ORCL,TSLA,V,WMT,XOM
0,2021-12-31,0.010685,0.009463,0.010929,0.010765,0.010911,0.009920,0.009565,0.009189,0.011122,0.010277,0.010174
1,2021-11-30,0.010990,0.010346,0.010346,0.011229,0.009590,0.009306,0.009414,0.010224,0.009124,0.009368,0.009363
2,2021-10-29,0.010531,0.010214,0.010334,0.011990,0.010035,0.010387,0.010989,0.014263,0.009462,0.010663,0.010901
3,2021-09-30,0.009276,0.009420,0.009575,0.010809,0.009285,0.010182,0.009728,0.010485,0.009676,0.009367,0.010731
4,2021-08-31,0.010371,0.010376,0.010277,0.009297,0.010063,0.010483,0.010176,0.010649,0.009268,0.010374,0.009568
...,...,...,...,...,...,...,...,...,...,...,...,...
115,2012-05-31,0.009844,0.009139,0.010205,0.009318,0.009637,0.007683,0.008963,0.008865,0.009341,0.011186,0.009128
116,2012-04-30,0.009693,0.011386,0.010125,0.009037,0.009821,0.009365,0.010052,0.008857,0.010368,0.009580,0.009906
117,2012-03-30,0.010992,0.011207,0.010005,0.010030,0.010084,0.011650,0.009920,0.011085,0.010089,0.010374,0.009977
118,2012-02-29,0.011813,0.009199,0.009907,0.009918,0.009912,0.010465,0.010315,0.011427,0.011519,0.009582,0.010333


In [12]:
# OLS 
import statsmodels.api as sm
X = df_8factor[['Mkt-RF','SMB','HML','Cnsmr','Manuf','HiTec','Hlth ','Other']] 
X = sm.add_constant(X)
stocks = ['AAPL','AMZN','ED','F','JNJ','JPM','ORCL','TSLA','V','WMT','XOM'] 
r2 = {}
for name in stocks:
    model = sm.OLS(df_stocks[name],X)
    result = model.fit()
    print('In %s, absolute t-values of 8 factors are\n'% name, abs(result.tvalues[1:]))
    r2[name]=result.rsquared

In AAPL, absolute t-values of 8 factors are
 Mkt-RF    1.469565
SMB       0.663552
HML       0.590189
Cnsmr     2.263448
Manuf     1.189612
HiTec     1.245751
Hlth      1.426080
Other     1.029122
dtype: float64
In AMZN, absolute t-values of 8 factors are
 Mkt-RF    0.037043
SMB       0.656320
HML       1.027253
Cnsmr     0.109708
Manuf     0.080067
HiTec     0.272726
Hlth      0.207226
Other     0.461278
dtype: float64
In ED, absolute t-values of 8 factors are
 Mkt-RF    0.789394
SMB       0.153807
HML       0.582659
Cnsmr     0.867976
Manuf     0.624084
HiTec     0.894597
Hlth      0.228965
Other     0.824979
dtype: float64
In F, absolute t-values of 8 factors are
 Mkt-RF    1.010803
SMB       1.176870
HML       1.179428
Cnsmr     0.958854
Manuf     0.736115
HiTec     1.080728
Hlth      1.140768
Other     0.708809
dtype: float64
In JNJ, absolute t-values of 8 factors are
 Mkt-RF    0.511910
SMB       0.807321
HML       0.295641
Cnsmr     0.837166
Manuf     1.258238
HiTec     0.409471

Factor that has maximum absolute t value in each stock:
'AAPL': Cnsmr, 
'AMZN': HML, 
'ED': HiTec, 
'F':HML, 
'JNJ': Manuf,
'JPM': Other,
'ORCL': Manuf,
'TSLA': Manuf, 
'V': HML, 
'WMT': SMB, 
'XOM': HML

R square is not high in this dataset, possibly because that the 8 factors cannot do a good job in explaining individual stock returns.