In [74]:
from hmmlearn.hmm import GaussianHMM
import numpy as np
from matplotlib import cm, pyplot as plt
import matplotlib.dates as dates
import pandas as pd
from datetime import timedelta,date

import warnings
warnings.filterwarnings("ignore")

def GHMM_Generate(Sec):
    
    beginDate = '2006-1-1'
    endDate = '2014-12-30'
    n_state = 4

    data_raw = get_price(Sec, start_date = beginDate, end_date=endDate, frequency='daily', fields=['close','volume','money'],fq = "pre")
    
    
    logRet_5 = np.log(np.array(data_raw['close'][5:])) - np.log(np.array(data_raw['close'][:-5]))

    logRet_20 = np.log(np.array(data_raw['close'][20:])) - np.log(np.array(data_raw['close'][:-20]))

    logVol_5 = np.log(np.array(data_raw['volume'][5:])) - np.log(np.array(data_raw['volume'][:-5]))

    logVol_20 = np.log(np.array(data_raw['volume'][20:])) - np.log(np.array(data_raw['volume'][:-20]))

    logMoney_5 = np.log(np.array(data_raw['money'][5:])) - np.log(np.array(data_raw['money'][:-5]))

    logMoney_20 = np.log(np.array(data_raw['money'][20:])) - np.log(np.array(data_raw['money'][:-20]))

    std = pd.rolling_std(data_raw['close'].pct_change(),20)

    data_len = len(data_raw['close']) - 50

    Train_Data = np.column_stack([logRet_5[-data_len:], \
                              logRet_20[-data_len:], \
                              logVol_5[-data_len:], \
                              logVol_20[-data_len:], \
                              logMoney_5[-data_len:], \
                              logMoney_20[-data_len:], \
                              std[-data_len:]])
    
    
    Date = pd.to_datetime(data_raw.index[-data_len:])
    
    model = GaussianHMM(n_components= n_state, covariance_type="full", n_iter=2000).fit(Train_Data)

    hidden_states = model.predict(Train_Data)
    
    state_pd = pd.concat([pd.DataFrame(Date[-data_len:]),pd.DataFrame(list(hidden_states[-data_len:]))],axis = 1,names = ['date','state'])
    state_pd.columns = ['date','state']

    state_choose = state_pd[(state_pd['date'] > datetime.datetime.strptime("2008-1-1", "%Y-%m-%d"))&(state_pd['date'] < datetime.datetime.strptime("2008-10-31", "%Y-%m-%d"))]['state'].value_counts()
    state_BigLoss = state_choose[state_choose == state_choose.max()].index[0]

    state_choose = state_pd[(state_pd['date'] > datetime.datetime.strptime("2007-1-1", "%Y-%m-%d"))&(state_pd['date'] < datetime.datetime.strptime("2007-10-1", "%Y-%m-%d"))]['state'].value_counts()
    state_BigBonus = state_choose[state_choose == state_choose.max()].index[0]

    state_choose = state_pd[(state_pd['date'] > datetime.datetime.strptime("2011-1-1", "%Y-%m-%d"))&(state_pd['date'] < datetime.datetime.strptime("2013-1-1", "%Y-%m-%d"))]['state'].value_counts()
    state_MinorLoss = state_choose[state_choose == state_choose.max()].index[0]

    state_choose = state_pd[(state_pd['date'] > datetime.datetime.strptime("2014-6-1", "%Y-%m-%d"))&(state_pd['date'] < datetime.datetime.strptime("2014-12-31", "%Y-%m-%d"))]['state'].value_counts()
    state_MinorBonus = state_choose[state_choose == state_choose.max()].index[0]

    return model,state_BigLoss,state_BigBonus,state_MinorLoss,state_MinorBonus


def single_state_estimation(Sec,model_read,enddate = "2018-12-20"):
    HMM_model = model_read[0]
    state_BigLoss = model_read[1]
    state_BigBonus = model_read[2]
    state_MinorLoss = model_read[3]
    state_MinorBonus =  model_read[4]

    # 生成模型对应的参数
    data_raw = get_price(Sec, count = 100, end_date=enddate, frequency='daily', fields=['close','volume','money'],fq = "pre")

    logRet_5 = np.log(np.array(data_raw['close'][5:])) - np.log(np.array(data_raw['close'][:-5]))

    logRet_20 = np.log(np.array(data_raw['close'][20:])) - np.log(np.array(data_raw['close'][:-20]))

    logVol_5 = np.log(np.array(data_raw['volume'][5:])) - np.log(np.array(data_raw['volume'][:-5]))

    logVol_20 = np.log(np.array(data_raw['volume'][20:])) - np.log(np.array(data_raw['volume'][:-20]))

    logMoney_5 = np.log(np.array(data_raw['money'][5:])) - np.log(np.array(data_raw['money'][:-5]))

    logMoney_20 = np.log(np.array(data_raw['money'][20:])) - np.log(np.array(data_raw['money'][:-20]))

    std = pd.rolling_std(data_raw['close'].pct_change(),20)

    data_len = len(data_raw['close']) - 50


    Train_Data = np.column_stack([logRet_5[-data_len:], \
                                  logRet_20[-data_len:], \
                                  logVol_5[-data_len:], \
                                  logVol_20[-data_len:], \
                                  logMoney_5[-data_len:], \
                                  logMoney_20[-data_len:], \
                                  std[-data_len:]])
    
    

    hidden_states = HMM_model.predict(Train_Data)
    current_states = hidden_states[-1]
    
    if current_states == state_BigLoss:
        return 'bl'
    elif current_states == state_BigBonus:
        return 'bb'
    elif current_states == state_MinorLoss:
        return 'ml'
    elif current_states == state_MinorBonus:
        return 'mb'
    else:
        return 0
    


其中需要调整

1、操作时间实际是在T时刻

2、判断的依据是T时刻往前追溯的状态

3、市场收益率应该是T时刻往后5天的收益率

4、调整后的策略收益率应该是往后5天的收益率乘以仓位系数

In [75]:
# 生成状态序列
model_HMM = GHMM_Generate('000300.XSHG')

state_BigLoss = model_HMM[1]
state_BigBonus = model_HMM[2]
state_MinorLoss = model_HMM[3]
state_MinorBonus = model_HMM[4]

operate_date_list = get_price('000300.XSHG', start_date = '2015-1-1', end_date= '2018-8-31', frequency='daily', \
                        fields=['close','volume','money'],fq = "pre")['close'].index

State_List = []

for date_i in list(operate_date_list):
    
    operate_date = date_i
    
    current_state = single_state_estimation('000300.XSHG',model_HMM,enddate = operate_date)
    
    State_List.append([operate_date,current_state])

# 转化为pandas数组
State_pd = pd.DataFrame(State_List)
State_pd.columns = ['date','state']
State_pd = State_pd.set_index("date")
 
    
# 生成价格序列
Date = State_pd.index

s_date = Date[0]
e_date = Date[-1]
market_close = get_price('000300.XSHG', start_date = s_date, end_date=e_date, frequency='daily', fields=['close'],fq = "pre")['close']

market_close_pre = market_close.shift(1)
market_close_diff = market_close.diff()

data_total_pd = pd.concat([market_close,market_close_pre,market_close_diff],axis =1)

data_total_pd.columns = ['price_raw','price_pre','price_diff']

# 相应的状态取自markov预测的状态
data_total_pd['state'] = State_pd

# 仓位的关键参数，初始值默认为1，每次循环要重新赋值
alpha = np.ones(len(market_close_diff))
data_total_pd['alpha'] = alpha

# 评分，评分初始值默认为0，每次循环要重新赋值
score = np.zeros(len(market_close_diff))
data_total_pd['score'] = score

# 评分，评分初始值默认为0，每次循环要重新赋值
score = np.zeros(len(market_close_diff))
data_total_pd['market_yield'] = np.log(data_total_pd['price_raw']/data_total_pd['price_pre'])

# 通过仓位调整后的收益率
data_total_pd['adjust_yield'] = np.zeros(len(market_close_diff))

In [76]:
data_total_pd

Unnamed: 0,price_raw,price_pre,price_diff,state,alpha,score,market_yield,adjust_yield
2015-01-05,3641.541,,,bb,1,0,,0
2015-01-06,3641.059,3641.541,-0.482,bb,1,0,-0.000132,0
2015-01-07,3643.790,3641.059,2.731,bb,1,0,0.000750,0
2015-01-08,3559.259,3643.790,-84.531,bb,1,0,-0.023472,0
2015-01-09,3546.723,3559.259,-12.536,bb,1,0,-0.003528,0
2015-01-12,3513.576,3546.723,-33.147,bb,1,0,-0.009390,0
2015-01-13,3514.040,3513.576,0.464,bb,1,0,0.000132,0
2015-01-14,3502.423,3514.040,-11.617,bb,1,0,-0.003311,0
2015-01-15,3604.121,3502.423,101.698,bb,1,0,0.028623,0
2015-01-16,3635.146,3604.121,31.025,bb,1,0,0.008571,0


In [77]:
write_file('data_total.csv', data_total_pd.to_csv(), append=False)


In [66]:
def cal_score(data_total_pd,n,score_BigBonus,score_BigLoss,score_MinorBonus,score_MinorLoss):
    # 每个日期进行循环比对
    date_operate = data_total_pd.index
    
    score_list = []
    for date_i in date_operate[:]:

        # 状态的pandas数组
        States_pd_current = data_total_pd[(data_total_pd.index <= date_i)&(data_total_pd.index > (date_i - timedelta(days = n)))]

        score_current = 0
        for index, row in States_pd_current.iterrows():
            if row['state'] == "bb":
                score_current += score_BigBonus
            elif row['state'] == "bl":
                score_current += score_BigLoss
            elif row['state'] == "mb":
                score_current += score_MinorBonus
            elif row['state'] == "ml":
                score_current += score_MinorLoss
        score_list.append(score_current)

    data_total_pd['score'] = np.array(score_list)
    return data_total_pd


In [None]:
# 上限总分值：100分
total_score = 10

# 开始循环，寻找合理参数
# 存储参数的数组
para_list = []
i = 0
for score_BigLoss in range(-10,3):
    for score_BigBonus in range(-3,10):
        for score_MinorLoss in range(-10,3):
            for score_MinorBonus in range(-3,10):
                for n_windows in range(0,10):

                    # 标识目前的进度                   
                    # i += 1
                    # print "%d、正在计算：n为%d,score_BigBonus为%d,score_BigLoss为%d,score_MinorBonus为%d,score_MinorLoss为%d" \
                    #      %(i,n_windows,score_BigBonus,score_BigLoss,score_MinorBonus,score_MinorLoss)
    
                    # 列表中几个参数需要赋初值

                    # 仓位的关键参数，初始值默认为1，每次循环要重新赋值
                    alpha = np.ones(len(market_close_diff))
                    data_total_pd['alpha'] = alpha

                    # 评分，评分初始值默认为0，每次循环要重新赋值
                    score = np.zeros(len(market_close_diff))
                    data_total_pd['score'] = score

                    # 通过仓位调整后的收益率，每次循环要重新赋值
                    data_total_pd['adjust_yield'] = np.zeros(len(market_close_diff))

                    data_total_pd = cal_score(data_total_pd,n_windows,score_BigBonus,score_BigLoss,score_MinorBonus,score_MinorLoss)

                    data_total_pd['alpha'] = np.where(data_total_pd['score'] > 0,data_total_pd['score']/total_score,0)

                    data_total_pd['adjust_yield'] =  \
                            np.log( 1 + data_total_pd['alpha'] * data_total_pd['price_diff'] / data_total_pd['price_pre'])

                    # 计算胜率
                    win_rate = \
                        float(sum(data_total_pd['adjust_yield'] > data_total_pd['market_yield'])) / float(len(data_total_pd['adjust_yield']))

                    if sum(data_total_pd['adjust_yield']) > sum(data_total_pd['market_yield']):
                        if win_rate > 0.6:
                            para_list.append([n_windows,score_BigBonus,score_BigLoss,score_MinorBonus,score_MinorLoss,win_rate,sum(data_total_pd['adjust_yield'])])
                            # 提示                   
                            print "找到参数组：n为%d,score_BigBonus为%d,score_BigLoss为%d,score_MinorBonus为%d,score_MinorLoss为%d，胜率为%f，收益率为%f" \
                                    %(n_windows,score_BigBonus,score_BigLoss,score_MinorBonus,score_MinorLoss,win_rate,sum(data_total_pd['adjust_yield']))



In [None]:
para_pd = pd.DataFrame(para_list)
para_pd.columns = ["n_windows",'score_BigBonus','score_BigLoss','score_MinorBonus','score_MinorLoss','胜率','收益率']

write_file('best_para_HMM.csv', para_pd.to_csv(), append=False)


In [None]:
para_pd