## 8. 如何对选股因子进行优化？

## 目录
1. 选择Alpha003作为优化对象
2. 如何设置优化目标？
3. 如何进行参数设置？
4. 如何输出最优结果？

## 选择Alpha006作为优化对象

In [1]:
import numpy as np
import pandas as pd

def correlation(x,y,window=10):
    return x.rolling(window).corr(y)

def rank(df):
    return df.rank(axis=1, pct=True)

class alphas(object):
    def __init__(self, pn_data):
        """
        :传入参数 pn_data: pandas.Panel
        """
        # 获取历史数据
        self.open = pd.DataFrame(pn_data.minor_xs('open'), dtype=np.float64)
        self.volume = pd.DataFrame(pn_data.minor_xs('volume'), dtype=np.float64)

    # alpha006: (-1 * correlation(open, volume, 10))
    def alpha006(self, c=10):
        alpha = -1 * correlation(self.open, self.volume, c)
        return alpha

#传入数据

if __name__ == '__main__':
    import pandas as pd
    import tushare as ts

    codes = ['000001', '601318', '600029', '000089', '000402', 
             '000895', '600006', '000858', '600036', '600050']
    stocks_dict = {}
    for c in codes:
        stock = ts.get_k_data(c, start='2016-01-01', end='2016-12-31', ktype='D', autype='qfq')
        stock.index = pd.to_datetime(stock['date'], format='%Y-%m-%d')
        stock.pop('date')
        stocks_dict[c] = stock
    
    pn = pd.Panel(stocks_dict)
    prices = pn.minor_xs('close')
    alpha = alphas(pn).alpha006()
    
    
    #转为Alphalens的数据格式
    alpha = alpha.stack()
    print prices.tail()
    print alpha.tail()

           000001 000089 000402  000858  000895 600006 600029 600036 600050  \
date                                                                          
2016-12-26   9.12    8.1  10.66  33.653  19.355   7.12   7.07  17.75   7.81   
2016-12-27   9.08   8.11  10.58  33.643   19.43   7.04   7.03  17.71   7.55   
2016-12-28   9.06   7.97  10.43  33.222   19.26   6.93   6.97  17.62   7.69   
2016-12-29   9.08    7.9  10.34  33.163  19.534   6.92   6.99  17.49   7.54   
2016-12-30    9.1      8   10.3   33.78  19.771   6.88   7.02   17.6   7.31   

           601318  
date               
2016-12-26  35.12  
2016-12-27  35.26  
2016-12-28  35.29  
2016-12-29  35.13  
2016-12-30  35.43  
date              
2016-12-30  600006   -0.229996
            600029    0.035984
            600036   -0.293132
            600050    0.075360
            601318   -0.296788
dtype: float64


## 如何设置优化目标？

In [2]:
import alphalens

def opt(c, q):
    alpha = alphas(pn).alpha006(c)
    factor_data = alpha.stack()
    factor_data = alphalens.utils.get_clean_factor_and_forward_returns(factor_data, prices, quantiles=q)
    mean_return_by_q, std_by_q = alphalens.performance.mean_return_by_quantile(factor_data)
    sharpe = mean_return_by_q/std_by_q
    dct = sharpe.iloc[0].to_dict()
    dct.update({'cor_period': c, 'quantile': q})
    return dct

## 如何进行参数设置？

In [3]:
import itertools

dict_list = []
for param in itertools.product(range(2, 6), range(3, 6)):
    dct = opt(*param)
    for i in [1, 5, 10]:
        dct[str(i)] = dct.pop(i)
        dict_list.append(dct)
# print dict_list

result = pd.DataFrame(dict_list)

print result

           1        10         5  cor_period  quantile
0   0.913825  1.539255  0.331380           2         3
1   0.913825  1.539255  0.331380           2         3
2   0.913825  1.539255  0.331380           2         3
3   0.760644  1.594924  0.356017           2         4
4   0.760644  1.594924  0.356017           2         4
5   0.760644  1.594924  0.356017           2         4
6   1.171541  2.171023  0.826320           2         5
7   1.171541  2.171023  0.826320           2         5
8   1.171541  2.171023  0.826320           2         5
9  -0.251774  0.801417  0.331025           3         3
10 -0.251774  0.801417  0.331025           3         3
11 -0.251774  0.801417  0.331025           3         3
12 -0.306966  1.299807  0.702793           3         4
13 -0.306966  1.299807  0.702793           3         4
14 -0.306966  1.299807  0.702793           3         4
15 -0.219944  1.088090 -0.077157           3         5
16 -0.219944  1.088090 -0.077157           3         5
17 -0.2199

## 如何输出最优结果？

In [4]:
print result.nlargest(5, columns='1')
print result.nlargest(5, columns='5')
print result.nlargest(5, columns='10')

           1        10         5  cor_period  quantile
6   1.171541  2.171023  0.826320           2         5
7   1.171541  2.171023  0.826320           2         5
8   1.171541  2.171023  0.826320           2         5
24  1.072324  1.035241  0.151935           4         5
25  1.072324  1.035241  0.151935           4         5
           1        10         5  cor_period  quantile
6   1.171541  2.171023  0.826320           2         5
7   1.171541  2.171023  0.826320           2         5
8   1.171541  2.171023  0.826320           2         5
12 -0.306966  1.299807  0.702793           3         4
13 -0.306966  1.299807  0.702793           3         4
          1        10         5  cor_period  quantile
6  1.171541  2.171023  0.826320           2         5
7  1.171541  2.171023  0.826320           2         5
8  1.171541  2.171023  0.826320           2         5
3  0.760644  1.594924  0.356017           2         4
4  0.760644  1.594924  0.356017           2         4
