# 量化投资范式总结

objective function = factor risk + idiosyncratic risk - expected portfolio return + transaction costs

FAQ:
1. 无卖空约束？转化为不等式二次规划问题

## 风险因子模型

$$Var\left( r_{p}\right)  =h^{T}\left( BFB^{T}+S\right)h$$

- B - 风险因子暴露度矩阵
- F - 风险因子协方差矩阵
- S - 特异性风险矩阵（残差）
- h - 资产权重

### 生成风险因子模型的两种方式

- 商业风险因子模型
- PCA风险因子模型

In [21]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA

In [41]:
# PCA风险因子模型示例，基于中证全指包含的股票，2019年数据
return_df = pd.read_pickle('../a_return_pct.pkl')
print(return_df.shape)
return_df.head()

(243, 3384)


Unnamed: 0,平安银行,万科Ａ,国农科技,世纪星源,深振业Ａ,全新好,神州高铁,中国宝安,深物业A,南 玻Ａ,...,兆易创新,康德莱,中电电机,艾华集团,麦迪科技,至正股份,洛阳钼业,继峰股份,方盛制药,读者传媒
2019-01-03,0.009793,0.007113,0.011831,0.003745,0.0,-0.035714,-0.010309,-0.013953,0.007503,0.007538,...,-0.010033,-0.00155,-0.004057,-0.033811,-0.021107,-0.025683,0.008086,0.018041,0.011038,0.0
2019-01-04,0.050647,0.035729,0.021538,0.026119,0.021359,0.021711,0.018229,0.03066,0.017021,0.022444,...,0.025845,0.029503,0.014257,0.029692,0.003125,0.029164,0.024064,-0.005063,0.030568,0.050526
2019-01-07,-0.001026,0.004813,0.037349,0.032727,0.015209,0.0075,0.040921,0.018307,0.026151,0.007317,...,0.035732,0.0,0.016064,0.009269,-0.000935,0.025068,0.015666,0.0,0.033898,0.014028
2019-01-08,-0.008214,-0.001996,-0.02381,-0.003521,-0.003745,-0.026055,0.017199,0.01573,-0.007136,0.0,...,-0.027027,0.054299,0.027668,0.015306,0.005613,0.004785,-0.010283,-0.001272,0.008197,-0.001976
2019-01-09,0.028986,0.0132,-0.001785,0.010601,0.007519,0.033121,-0.012077,-0.00885,-0.001027,0.0,...,0.015033,-0.02289,-0.011538,-0.018593,-0.00093,0.005291,0.002597,0.056051,0.026423,-0.00198


In [42]:
n_factor = 20

pca_model = PCA(n_components=n_factor, svd_solver='full')
pca_model.fit(return_df)

PCA(copy=True, iterated_power='auto', n_components=20, random_state=None,
    svd_solver='full', tol=0.0, whiten=False)

In [43]:
B = pd.DataFrame(pca_model.components_, index=range(n_factor), columns=return_df.columns)
print(B.shape)
B.head()

(20, 3384)


Unnamed: 0,平安银行,万科Ａ,国农科技,世纪星源,深振业Ａ,全新好,神州高铁,中国宝安,深物业A,南 玻Ａ,...,兆易创新,康德莱,中电电机,艾华集团,麦迪科技,至正股份,洛阳钼业,继峰股份,方盛制药,读者传媒
0,-0.010613,-0.008877,-0.013888,-0.01836,-0.015376,-0.017011,-0.015965,-0.022324,-0.015516,-0.017067,...,-0.022682,-0.012763,-0.018954,-0.012701,-0.011689,-0.016469,-0.014356,-0.014505,-0.023268,-0.021462
1,-0.018951,-0.021256,-0.013525,-0.002052,-0.013395,-0.006224,-0.009324,-0.004145,-0.011841,-0.004515,...,0.054933,-0.00343,-0.007466,0.017699,0.013575,-0.00514,-0.019126,-0.012925,-0.023783,-0.003136
2,0.038749,0.033979,0.002002,-0.011823,0.011013,-0.009408,-0.005577,-0.018934,0.006159,-0.006906,...,0.027047,-0.004981,-0.010865,0.013698,0.009503,-0.014437,-0.005204,-0.000759,-0.001747,0.003355
3,-0.022296,-0.026341,0.02186,-0.012882,-0.009339,-0.003017,-0.008989,-0.010302,-0.001256,-0.01326,...,-0.014066,0.002038,0.009529,0.004147,-0.01104,0.009361,-0.011576,0.016224,0.022604,-0.028377
4,0.005558,-0.005267,-0.016215,-0.004311,-0.014981,-0.021331,-0.007886,-0.005168,-0.016373,0.007648,...,0.002801,-0.007515,0.021989,-0.001317,0.02644,-0.009635,-0.005958,0.006087,0.017325,-0.004842


In [44]:
ann_factor = 252
# factor return
f = pca_model.transform(return_df)
print(f.shape)
# factor cov matrix
F = np.diag(f.var(axis=0, ddof=1)) * ann_factor
print(F.shape)
F

(243, 20)
(20, 20)


array([[209.79870812,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ],
       [  0.        ,  15.45765793,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ],
       [  0.        ,   0.        ,   9.7370178 ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ,
          0.        ,   0.        ,   0.        ,   0.        ],
       [  0.        ,   0.        ,  

In [45]:
# idiosyncratic risk
s = return_df - np.dot(f, B)
print(s.shape)
S = pd.DataFrame(np.diag(s.var(axis=0)) * ann_factor, index=return_df.columns, columns=return_df.columns)
print(S.shape)
S.head()

(243, 3384)
(3384, 3384)


Unnamed: 0,平安银行,万科Ａ,国农科技,世纪星源,深振业Ａ,全新好,神州高铁,中国宝安,深物业A,南 玻Ａ,...,兆易创新,康德莱,中电电机,艾华集团,麦迪科技,至正股份,洛阳钼业,继峰股份,方盛制药,读者传媒
平安银行,0.047533,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
万科Ａ,0.0,0.035708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
国农科技,0.0,0.0,0.162331,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
世纪星源,0.0,0.0,0.0,0.087438,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
深振业Ａ,0.0,0.0,0.0,0.0,0.030001,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
#整体风险
full_risk = np.dot(np.dot(B.T, F), B) + S
full_risk

Unnamed: 0,平安银行,万科Ａ,国农科技,世纪星源,深振业Ａ,全新好,神州高铁,中国宝安,深物业A,南 玻Ａ,...,兆易创新,康德莱,中电电机,艾华集团,麦迪科技,至正股份,洛阳钼业,继峰股份,方盛制药,读者传媒
平安银行,0.101342,0.047218,0.027102,0.036299,0.041672,0.029904,0.037042,0.045246,0.035805,0.039627,...,0.048875,0.022485,0.037434,0.026663,0.022411,0.031991,0.042013,0.032410,0.051682,0.054437
万科Ａ,0.047218,0.086257,0.024616,0.033593,0.040383,0.022378,0.034440,0.040725,0.034457,0.034961,...,0.033953,0.019857,0.033835,0.022999,0.019688,0.026728,0.037337,0.029216,0.035808,0.051265
国农科技,0.027102,0.024616,0.235439,0.059739,0.049409,0.059930,0.046034,0.069920,0.053059,0.044668,...,0.056513,0.043176,0.062463,0.035628,0.027722,0.046669,0.040947,0.042008,0.083385,0.052866
世纪星源,0.036299,0.033593,0.059739,0.168317,0.061191,0.063817,0.061625,0.093005,0.061876,0.068866,...,0.087043,0.047792,0.071255,0.047647,0.046625,0.063514,0.054067,0.052620,0.090601,0.081638
深振业Ａ,0.041672,0.040383,0.049409,0.061191,0.088282,0.055027,0.054888,0.070857,0.058401,0.055028,...,0.065870,0.042769,0.059878,0.039361,0.036891,0.052907,0.050954,0.048489,0.071277,0.072608
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
至正股份,0.031991,0.026728,0.046669,0.063514,0.052907,0.058868,0.056905,0.080904,0.053596,0.059175,...,0.071307,0.044794,0.063834,0.041564,0.031752,0.099848,0.054688,0.051032,0.077716,0.073362
洛阳钼业,0.042013,0.037337,0.040947,0.054067,0.050954,0.048088,0.052422,0.070025,0.044927,0.056310,...,0.056363,0.038705,0.060051,0.033948,0.025509,0.054688,0.114718,0.046165,0.063952,0.064783
继峰股份,0.032410,0.029216,0.042008,0.052620,0.048489,0.050823,0.050666,0.064179,0.049441,0.054290,...,0.055676,0.040669,0.070122,0.038129,0.036690,0.051032,0.046165,0.154133,0.081535,0.065569
方盛制药,0.051682,0.035808,0.083385,0.090601,0.071277,0.083926,0.072436,0.105735,0.075706,0.087139,...,0.089927,0.068037,0.092189,0.053184,0.066715,0.077716,0.063952,0.081535,0.394883,0.098939


In [56]:
# 假设平均持有所有股票
h = np.repeat(1 / len(full_risk), len(full_risk))
h

array([0.00029551, 0.00029551, 0.00029551, ..., 0.00029551, 0.00029551,
       0.00029551])

In [58]:
# 总体风险
np.dot(np.dot(h, full_risk), h)

0.05821928038560485

## Alpha 模型