In [None]:
from jqdata import *
import numpy as np
import pandas as pd
import empyrical as ep
from dateutil.parser import parse
import itertools
from datetime import datetime,timedelta
from dateutil.relativedelta import relativedelta
import calendar

股票池过滤

In [None]:
def get_stockpool(symbol, dt):
    '''获取股票池'''

    if symbol == 'CSI300':
        #stockList = get_index_stocks('000001.XSHG', date=dt) + get_index_stocks('399107.XSHE', date=dt)
        stockList = get_index_stocks('000300.XSHG', date=dt)
    else:
        stockList = get_index_stocks(symbol, date=dt)
        
    stockList = del_st_stock(stockList, dt)  # 过滤ST
    stockList = del_iponday(stockList, dt)   # 过滤上市不足60日
    stockList = del_pause(stockList,dt)      # 过滤当日停牌的股票
    return stockList

def del_st_stock(securities, dt):
    '''去除ST股票'''

    no_st = get_extras('is_st', securities, end_date=dt, df=True, count=1)
    
    return no_st[no_st == False].dropna(axis=1).columns.tolist()
    
def del_iponday(securities, dt, N = 60):
    '''返回上市大于N日的股票'''
    
    dic = {x:get_security_info(x, date = dt).start_date for x in securities if get_security_info(
        x, date=dt) != None}
    
    #return [x for x,y in dic.items() if (dt - y).days > N]
    return [x for x,y in dic.items() if (dt - y).days > N]

def del_pause(securities, dt, N = 10):   
    '''去除停牌天数大于N天的股票'''
    
    paused = get_price(securities, end_date=dt, count=N, fields='paused', panel=False)
    paused_days = paused.groupby('code')['paused'].sum()
    
    return paused_days[paused_days < N].index.tolist()

def high_limit_filter(context, stocklist):
    '''剔除涨停股'''
    current_data = get_current_data()
    stocklist = [stock for stock in stocklist if not (current_data[stock].day_open >= current_data[stock].high_limit)]
    return stocklist

构造Q因子函数

In [None]:
'''通过dt前N天的分钟线计算Q因子,前20%记为聪明钱'''
def get_Q (stocklist, dt, N=10, k=0.2):
    data = get_price(stocklist,end_date=dt,count=240*N,
                     frequency='1m',fields=['open','close','volume'],panel=False)
    data['S_factor'] = 100000*abs(data['close']/data['open']-1)/(data['volume']**0.5) #计算Sfactor
    g_dict = {code: group_data for code, group_data in data.groupby('code')} #存入字典
    Q_df = pd.DataFrame(columns=['Q_factor'],index=stocklist) #用于存放Qfactor
    
    for i in stocklist:
        df = g_dict[i].sort_values(by='S_factor',ascending=False).reset_index(drop=True) #按照Sfactor值从大到小排序
        target_vol = sum(df['volume'])*k #取前k*100%
        accumulated_vol = 0
        count = -1
        
        for vol in df['volume']: #判断聪明钱
            if accumulated_vol <= target_vol:
                accumulated_vol += vol
                count += 1
            else:
                break
                
        df_count = df[:count] #聪明钱数据，前count行是聪明钱
        
        '''聪明钱的情绪因子Q的构造'''
        VWAP_smart = (df_count['close']*df_count['volume']).sum()/df_count['volume'].sum()
        VWAP_all = (df['close']*df['volume']).sum()/df['volume'].sum()
        Q_factor = VWAP_smart/VWAP_all
        Q_df.loc[i,'Q_factor'] = Q_factor
    
    return Q_df

对Q因子的选股能力进行判断



In [None]:
'''用于获取下一个月的最后一天'''
def next_lastday(dt):
    if dt.month == 12:
        n = calendar.monthrange(dt.year+1,1)[1]
    else:
        n = calendar.monthrange(dt.year,dt.month+1)[1]
        
    return dt+timedelta(days=n)

In [None]:
def get_rankIC(start_date,end_date):
    date_list = pd.date_range(start=start_date, end=end_date, freq='M')
    RankIC_dict = {'month':[],'RankIC':[]}
    
    for dt in date_list:
        trade_day = get_trade_days(end_date=dt, count=1)[0]
        stocklist = get_stockpool('A',trade_day)
        
        Q_df1 = get_Q(stocklist,dt).T #翻转一下
        Q_df1 = Q_df1.iloc[0].astype('float64')
        
        next_month_data = get_bars(stocklist, count=1, unit='1M',
                                   fields=['date','open','close'], 
                                   include_now=True, end_dt=next_lastday(dt), df=True)
        next_month_data['pct_change'] = next_month_data['close']/next_month_data['open']-1
        next_month_data['date'] = dt
        df_reset = next_month_data.reset_index(level=1, drop=True).reset_index() #给出的数据是双重index，要进行处理
        next_month_return = df_reset.pivot(index='date', columns='index', values='pct_change')
        next_month_return = next_month_return.iloc[0].astype('float64')
        
        corr = Q_df1.corr(next_month_return, method='pearson')#求相关系数
        RankIC_dict['month'].append(dt.strftime('%Y-%m'))
        RankIC_dict['RankIC'].append(corr)
        
    RankIC_df = pd.DataFrame(RankIC_dict)

    IC_pst = RankIC_df[RankIC_df['RankIC']>0.05]
    IC_ngt = RankIC_df[RankIC_df['RankIC']<-0.05]
    IC_uncorr = RankIC_df[(RankIC_df['RankIC']>=-0.05)&(RankIC_df['RankIC']<=0.05)]
    print('显著为负: ',IC_ngt.shape[0])
    print('显著为正: ',IC_pst.shape[0])
    print('不显著:',IC_uncorr.shape[0])
    
    fig = plt.figure(figsize=(14,5))
    ax1 = fig.add_subplot(111)
    ax1.bar(IC_uncorr.index, IC_uncorr['RankIC'], align='center', width=0.5, color='pink')
    ax1.bar(IC_pst.index, IC_pst['RankIC'], align='center', width=0.5, color='r')
    ax1.bar(IC_ngt.index, IC_ngt['RankIC'], align='center', width=0.5, color='b')
    ax1.set_ylabel(u"Rank IC")
    ax1.set_title(u"因子 RankIC 的月度序列")
    ax1.set_xlabel(u"时间" )
    ax1.grid(axis='y')
    plt.xticks(RankIC_df.index, RankIC_df['month'],rotation = 90)
    
    return RankIC_df

输出RankIC图

In [None]:
M = get_rankIC('2013-04-30','2016-05-31')
M

构建多空策略


In [None]:
'''将股票按照Qfactor的值从低到高分为5组，做多第一组，做空第五组'''
'''平台不支持做空，因此仅做多第一组，钱均等分配在每一只股票上'''
def initialize(context):
    set_option('use_real_price', True) #真实价格交易
    set_option('avoid_future_data', True) #防止使用未来数据
    set_benchmark('000300.XSHG')
    #set_slippage(FixedSlippage(0)) #滑点设置为0.00246
    #set_order_cost(OrderCost(open_tax=0,close_tax=0.001,open_commission=0.0003,
                             #close_commission=0.0003,close_today_commission=0,
        #min_commission=5),type='stock')
        
    run_monthly(market_open, monthday=1, time='9:30', reference_security='000300.XSHG')
    
def market_open(context):
    stocklist = get_stockpool('CSI300',context.current_dt)
    Q_df = get_Q(stocklist, dt=context.current_dt).sort_values(by='Q_factor',ascending=True)
    g.size = len(Q_df)//5
    g.list = Q_df.iloc[0:g.size-1].index.tolist()
    g.list = high_limit_filter(context, g.list)
    
    for stock in context.portfolio.positions: #先平仓
        if stock not in g.list:
            order1 = order_target(stock,0)    
            if order1 != None:
                print('卖出股票：%s 下单数量：%s 成交数量：%s'%(stock, order1.amount, order1.filled))
            else:
                print('卖出股票%s失败!' % (stock))
    cash = context.portfolio.available_cash #计算可用现金
    print('可用现金为%s' %cash)
    value = cash/g.size
    for stock in g.list: #再开仓
        if stock not in context.portfolio.positions:
            order2 = order_value(stock, value)
            if order2 != None:
                print('买入股票：%s 下单数量：%s 成交数量：%s'%(stock, order2.amount, order2.filled))
            else:
                print('买入股票%s失败!' % (stock))
    print('----------------------------')
    print('当前仓位情况为')
    print(context.portfolio.positions)
            
        

    
    
    
    
    