# 模拟投资策略

## 1. 数据预处理

In [1]:
import pandas as pd
import numpy as np
from keras.models import load_model

# 全局变量定义
model_path = '../models/lstm_model.h5'
cols = [0,1,2,3,4,6,7,9,12,13,14]
feanum=6 # 一共有多少特征
window=7 # 时间窗设置

In [2]:
# 读取测试集数据
df = pd.read_csv("../data/test.csv", usecols=cols)
stocks_code = df["kdcode"].unique()
stock_num = len(stocks_code)

# 根据股票代码划分数据
for i, stock_i in enumerate(stocks_code):
    stock_i_data = df[df['kdcode'].isin([stock_i])]
    exec("stockDf%s = stock_i_data" % i)
    exec("stockDf%s.set_index(['dt'], inplace = True)" % i)
    exec("stockDf%s.index = pd.to_datetime(stockDf%s.index)" % (i, i))

In [3]:
stockDf0

Unnamed: 0_level_0,kdcode,price_mean1,price_mean5,price_mean10,pct_mean3,pct_mean5,close_mean5,adj_close,adj_open,label
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,000001.SZ,0.007344,0.007478,0.007507,0.194480,0.198148,0.120051,0.116052,0.117903,1.0
2020-01-03,000001.SZ,0.007521,0.007545,0.007550,0.208026,0.201087,0.121080,0.118188,0.119959,1.0
2020-01-06,000001.SZ,0.007516,0.007584,0.007582,0.208543,0.195127,0.121717,0.117430,0.120456,0.0
2020-01-07,000001.SZ,0.007502,0.007650,0.007616,0.195698,0.198194,0.122557,0.117981,0.121306,1.0
2020-01-08,000001.SZ,0.007338,0.007677,0.007628,0.166769,0.190307,0.122861,0.114606,0.120385,0.0
...,...,...,...,...,...,...,...,...,...,...
2020-12-25,000001.SZ,0.007945,0.008300,0.008513,0.190352,0.178923,0.133137,0.126231,0.131527,0.0
2020-12-28,000001.SZ,0.008214,0.008337,0.008498,0.214577,0.197138,0.133976,0.131906,0.129796,1.0
2020-12-29,000001.SZ,0.008499,0.008442,0.008535,0.216137,0.211104,0.135848,0.134148,0.135926,1.0
2020-12-30,000001.SZ,0.008430,0.008546,0.008545,0.224525,0.209616,0.137616,0.134358,0.136864,1.0


In [4]:
# 读取并处理出各股票的开盘价、收盘价未归一化的数据
# 读取数据
df2 = pd.read_csv("strategyTestData.csv")
stocks_code2 = df2["kdcode"].unique()
stock_num2 = len(stocks_code2)

# 根据股票代码划分数据
for i, stock_i in enumerate(stocks_code2):
    stock_i_data = df2[df2['kdcode'].isin([stock_i])]
    exec("PriceMsgDf%s = stock_i_data" % i)
    exec("PriceMsgDf%s.set_index(['dt'], inplace = True)" % i)
    exec("PriceMsgDf%s.index = pd.to_datetime(stockDf%s.index)" % (i, i))

In [5]:
PriceMsgDf81

Unnamed: 0_level_0,kdcode,adj_close,adj_open
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-02,603833.SH,118.470896,118.795807
2020-01-03,603833.SH,118.328747,118.795807
2020-01-06,603833.SH,115.749761,115.749761
2020-01-07,603833.SH,118.389668,117.272784
2020-01-08,603833.SH,118.186598,117.029100
...,...,...,...
2020-12-25,603833.SH,171.287863,176.289910
2020-12-28,603833.SH,180.231351,170.671565
2020-12-29,603833.SH,179.686715,178.726437
2020-12-30,603833.SH,182.653546,180.947977


## 2. 模拟投资策略

In [6]:
model = load_model(model_path) # 加载模型

In [7]:
now_money = 1000000 # 初始总钱
position_num = 5 # 最多持有股票的数量
threshold = 0.55 # 表示预测为涨的阈值

# 判断index股票在day日是否可以交易
# param: index 第几只股票
#        day 哪一天
#        pList 所有股票预测出的概率
def canBuyOrSell(stock_index, day, pList):
    # 判断是否停牌
    day = pd.to_datetime(day)
    res = nowData[nowData.index.isin([day])]
    if len(res) == 0:
        return False
    # 判断pList中index股票p是否等于nan
    for [p,ind] in pList:
        if ind == stock_index and np.isnan(p):
            return False
    return True

# 策略
PriorList = [] # 每天最优持有的股票
tot = len(stockDf0.index[:-window + 1]) # 总天数
allPSetList = [] # 每天股票与概率的字典的列表
for (t,st_date) in enumerate(stockDf0.index[:-window + 1]):
    date_period = pd.date_range(start = st_date, periods = window + 1, freq = "B")
    today = date_period[-1]
    pList = [] # 当天所有股票的概率 [[p,index],...[]]
    pDict = dict()
    for stock_index in range(stock_num):
        # 1. 处理为LSTM输入的数据
        exec("inputData = stockDf%s[stockDf%s.index.isin(date_period[:-1])]" % (stock_index, stock_index))
        inputData = inputData.iloc[:,[1,2,3,4,5,6]]
        inputData = inputData.values
        inputData = inputData.reshape((1,inputData.shape[0],feanum))
        # 2. 模型预测结果
        if inputData.shape[1] == 0: # Todo 包含停牌数据预测的处理
            p = np.nan
        else:
            p = model.predict(inputData)[:,0][0]
        pList.append([p,stock_index])
        pDict[stock_index] = p
    pList.sort(reverse = True) # 从高到低排序
    allPSetList.append(pDict)
    
    # 将canBuyOrSell并且p大于threshold的股票加入到PriList中
    tmpList = []
    for i in range(stock_num):
        exec("nowData = stockDf%s.copy()" % i)
        if canBuyOrSell(pList[i][1], today, pList) and pList[i][0] > threshold:
            tmpList.append(pList[i][1])
    tmpList = tmpList[:min(5, len(tmpList))]
    PriorList.append(tmpList)
    print("tot:", tot, t + 1, tmpList)

tot: 237 1 []
tot: 237 2 [74]
tot: 237 3 [45]
tot: 237 4 [0]
tot: 237 5 [43, 45]
tot: 237 6 []
tot: 237 7 [22]
tot: 237 8 [22, 36, 3, 0, 74]
tot: 237 9 []
tot: 237 10 []
tot: 237 11 []
tot: 237 12 []
tot: 237 13 []
tot: 237 14 []
tot: 237 15 []
tot: 237 16 [74, 3, 56, 16, 22]
tot: 237 17 [54, 58, 3, 41, 35]
tot: 237 18 []
tot: 237 19 []
tot: 237 20 []
tot: 237 21 []
tot: 237 22 []
tot: 237 23 [80, 39, 79]
tot: 237 24 [80]
tot: 237 25 []
tot: 237 26 [80, 39]
tot: 237 27 [40]
tot: 237 28 [15, 39, 46, 8]
tot: 237 29 [80, 39, 9, 50, 8]
tot: 237 30 [80, 50, 74, 3, 39]
tot: 237 31 [13, 20, 45, 39, 8]
tot: 237 32 [50, 51, 15]
tot: 237 33 [13, 51, 14, 39, 80]
tot: 237 34 [48, 75, 21, 8]
tot: 237 35 [50, 14, 13, 48, 8]
tot: 237 36 [51, 13, 50, 8, 20]
tot: 237 37 [48, 23, 31]
tot: 237 38 [48, 13, 14]
tot: 237 39 [40, 68, 14, 23, 20]
tot: 237 40 [14, 40, 45, 39, 80]
tot: 237 41 [80, 14, 48, 31, 1]
tot: 237 42 [40, 77, 23, 17, 3]
tot: 237 43 [77, 14, 45, 3, 39]
tot: 237 44 [3, 49, 40, 32, 7]


  pList.sort(reverse = True)
  pList.sort(reverse = True)


tot: 237 45 [14, 39, 21, 40, 36]
tot: 237 46 [14, 40, 36, 7, 21]
tot: 237 47 [40, 35, 21, 13, 37]
tot: 237 48 [0, 7, 4]
tot: 237 49 [4, 63, 68, 49, 51]
tot: 237 50 [63, 51, 36, 80, 50]
tot: 237 51 [74, 68, 50, 15, 23]
tot: 237 52 [51]
tot: 237 53 [51, 22, 5, 13]
tot: 237 54 [16]
tot: 237 55 []
tot: 237 56 [22]
tot: 237 57 [15]
tot: 237 58 []
tot: 237 59 []
tot: 237 60 [36]
tot: 237 61 [51, 13, 48]
tot: 237 62 [7, 8]
tot: 237 63 []
tot: 237 64 []
tot: 237 65 [23, 7]
tot: 237 66 [7, 3, 39]
tot: 237 67 [7]
tot: 237 68 [23]
tot: 237 69 [39, 77, 8, 15, 50]
tot: 237 70 [75]
tot: 237 71 [39]
tot: 237 72 [39, 38, 56]
tot: 237 73 []
tot: 237 74 []
tot: 237 75 []
tot: 237 76 []
tot: 237 77 []
tot: 237 78 []
tot: 237 79 []
tot: 237 80 [17, 74, 39, 38]
tot: 237 81 []
tot: 237 82 [14, 36]
tot: 237 83 []
tot: 237 84 [39, 31, 18]
tot: 237 85 [51, 14, 31, 23, 36]
tot: 237 86 [80, 39, 13, 0, 56]
tot: 237 87 [48, 51, 14]
tot: 237 88 []
tot: 237 89 [19, 48, 50, 14, 80]
tot: 237 90 [19, 39, 31, 48, 38]
to

In [44]:
# 回测
now_money = 1000000 # 初始总钱
nowStock = [] # 当前持有的股票
buyOrSellNum = 1300 # 买或卖buyOrSellNum股
for (t,st_date) in enumerate(stockDf0.index[:-window]):
    date_period = pd.date_range(start = st_date, periods = window + 1, freq = "B")
    today = date_period[-1]
    pNowStock = [] # 当前持有股票的涨跌概率
    pSet = allPSetList[t] # 当天所有股票涨跌概率
    # 求当前持有股票的涨跌概率
    for stock in nowStock:
        p = pSet[stock]
        pNowStock.append([p, stock])
    # 卖掉所有持有的可以操作的股票
    print(t, "卖前", now_money, nowStock)
    for [p, stock_idx] in pNowStock:
        if canBuyOrSell(stock_idx, today, pNowStock):
            exec("res = PriceMsgDf%s[PriceMsgDf%s.index.isin([today])]" % (stock_idx, stock_idx))
            if len(res) == 0:
                continue
            open_price = res['adj_open'].values[0]
            nowStock.remove(stock_idx)
            now_money += open_price * buyOrSellNum
    print(t, "卖后", now_money, nowStock)
    
    # 尽可能多的买下所有当天最优的持有股票
    DestList = PriorList[t]
    for stock_idx in DestList:
        exec("res = PriceMsgDf%s[PriceMsgDf%s.index.isin([today])]" % (stock_idx, stock_idx))
        if len(res) == 0:
            continue
        open_price = res['adj_open'].values[0]
        if now_money > (open_price * buyOrSellNum):
            now_money -= open_price * buyOrSellNum
            nowStock.append(stock_idx)
    print(t, "买后", now_money, nowStock)
    
# 计算最终受益
print(today)
for stock_idx in nowStock:
    exec("res = PriceMsgDf%s[PriceMsgDf%s.index.isin([today])]" % (stock_idx, stock_idx))
    if len(res) == 0:
            continue
    open_price = res['adj_open'].values[0]
    now_money += open_price * buyOrSellNum

print("final total money:", now_money)

0 卖前 1000000 []
0 卖后 1000000 []
0 买后 1000000 []
1 卖前 1000000 []
1 卖后 1000000 []
1 买后 988271.33123416 [74]
2 卖前 988271.33123416 [74]
2 卖后 999792.27153412 []
2 买后 302092.07014306996 [45]
3 卖前 302092.07014306996 [45]
3 卖后 1004858.3490928699 []
3 买后 1004858.3490928699 []
4 卖前 1004858.3490928699 []
4 卖后 1004858.3490928699 []
4 买后 196953.53318935994 [43, 45]
5 卖前 196953.53318935994 [43, 45]
5 卖后 1012463.6486453699 []
5 买后 1012463.6486453699 []
6 卖前 1012463.6486453699 []
6 卖后 1012463.6486453699 []
6 买后 707189.8114313099 [22]
7 卖前 707189.8114313099 [22]
7 卖后 1012028.9404400098 []
7 买后 419416.88917682995 [22, 36, 74]
8 卖前 419416.88917682995 [22, 36, 74]
8 卖后 1006036.4828873699 []
8 买后 1006036.4828873699 []
9 卖前 1006036.4828873699 []
9 卖后 1006036.4828873699 []
9 买后 1006036.4828873699 []
10 卖前 1006036.4828873699 []
10 卖后 1006036.4828873699 []
10 买后 1006036.4828873699 []
11 卖前 1006036.4828873699 []
11 卖后 1006036.4828873699 []
11 买后 1006036.4828873699 []
12 卖前 1006036.4828873699 []
12 卖后 1006036.48

138 卖后 1295746.4865984211 []
138 买后 880022.446696341 [51]
139 卖前 880022.446696341 [51]
139 卖后 1307013.888746241 []
139 买后 86692.7433389813 [38, 71, 68]
140 卖前 86692.7433389813 [38, 71, 68]
140 卖后 1280966.4666465814 []
140 买后 136143.63310258137 [38]
141 卖前 136143.63310258137 [38]
141 卖后 1237020.0417460212 []
141 买后 123982.84860386129 [22, 45]
142 卖前 123982.84860386129 [22, 45]
142 卖后 1232336.6514366313 []
142 买后 150294.42489343137 [38]
143 卖前 150294.42489343137 [38]
143 卖后 1239907.2540455514 []
143 买后 1239907.2540455514 []
144 卖前 1239907.2540455514 []
144 卖后 1239907.2540455514 []
144 买后 1239907.2540455514 []
145 卖前 1239907.2540455514 []
145 卖后 1239907.2540455514 []
145 买后 168943.9581495514 [38]
146 卖前 168943.9581495514 [38]
146 卖后 1162354.7395151514 []
146 买后 461248.9518380514 [22, 43, 19]
147 卖前 461248.9518380514 [22, 43, 19]
147 卖后 1176231.4154105713 []
147 买后 160847.42159469135 [38]
148 卖前 160847.42159469135 [38]
148 卖后 1158505.1263750517 []
148 买后 233389.37869685178 [23]
149 卖前 2333

final total money: 1191629.0830539805


## 单元测试

In [None]:
# # 单元测试1 测试canBuyOrSell函数

# ppList = [[np.nan, 22], [0.59513134, 56], [0.5911934, 3], [0.56857383, 74], [0.5606358, 17], [0.5588775, 63], [0.558492, 19], [0.557015, 0], [0.5553074, 9], [0.55123156, 12], [0.54988277, 16], [0.5480421, 2], [0.54735094, 70], [0.5437275, 49], [0.54151696, 47], [0.54024804, 45], [0.54002637, 39], [0.53971076, 30], [0.5383594, 31], [0.5382879, 36], [0.5356826, 21], [0.5341861, 81], [0.53320944, 40], [0.53318095, 37], [0.53308034, 42], [0.52598095, 55], [0.52560496, 10], [0.52555954, 69], [0.5246366, 33], [0.5243099, 52], [0.5231771, 41], [0.5219507, 66], [0.5217557, 35], [0.5212795, 38], [0.5198239, 25], [0.5172703, 53], [0.514381, 32], [0.51411617, 61], [0.51250756, 6], [0.5116291, 24], [0.50913256, 11], [0.5075126, 54], [0.5067256, 71], [0.50646174, 48], [0.5046982, 7], [0.501216, 34], [0.5010961, 68], [0.5009133, 57], [0.49952868, 60], [0.49559373, 64], [0.4954788, 65], [0.49384275, 72], [0.4913066, 4], [0.4911093, 58], [0.48906785, 18], [0.48707077, 23], [0.48694274, 44], [0.48640633, 8], [0.48550946, 26], [0.4818525, 75], [0.48141715, 20], [0.47996283, 14], [0.47836575, 5], [0.47822124, 1], [0.47653505, 28], [0.4753328, 46], [0.47372493, 43], [0.47319898, 15], [0.47246543, 62], [0.4718764, 76], [0.47153977, 73], [0.47098902, 78], [0.4689083, 80], [0.46864882, 51], [0.46791682, 59], [0.46549064, 67], [0.4626018, 27], [0.46060893, 13], [0.46038687, 79], [0.4581696, 50], [0.4568532, 29], [0.45347258, 77]]

# # 判断index股票在day日是否可以交易
# # param: index 第几只股票
# #        day 哪一天
# #        pList 所有股票预测出的概率
# def canBuyOrSell(stock_index, day, pList):
#     # 判断是否停牌
#     day = pd.to_datetime(day)
#     res = nowData[nowData.index.isin([day])]
#     if len(res) == 0:
#         return False
#     # 判断pList中index股票p是否等于nan
#     for [p,ind] in pList:
#         if ind == stock_index and np.isnan(p):
#             return False
#     return True

# input_day = pd.to_datetime('2020-01-02')
# exec("nowData = stockDf%s.copy()" % 21)
# canBuyOrSell(21, input_day, ppList)