In [10]:
from __future__ import division      #除数可以显示为float

from six import StringIO    #使用聚宽readfile函数
import numpy as np
import pandas as pd

import time                 #使用time stamp
import datetime             

import matplotlib.pyplot as plt

import math

import talib



In [11]:
# 特征向量
Col_Name = ['EMA_5', 'EMA_10', 'EMA_gap', 'KDJ_K', 'KDJ_D', 'KDJ_J', 'RSI', 'MACD_dif', 'MACD_dea', 'MACD_macd',
                'MOM_12', 'MOM_25', 'MOM_gap', 'Long_Short_Rate_OBV', 'Volume']

# 生成总的列表
Col_Total = []
for col in Col_Name:
    Col_Total.append(col)
    
    # 生成前两天的值
    for day_count_i in range(1, 3):
        Col_Total.append(str(str(col) + "_pre" + str(day_count_i)))

        
Data_Total_pd = pd.read_csv("price_list.csv")

stock_list = set(list(Data_Total_pd['code']))

In [12]:
# 计算相应的值
for stock_name in stock_list:
    # 作为样本不需要生成价格、胜败关系，直接从后续MACD等指标开始

    ################################   3、生成MACD信息   ###################################################
    Data_Total_pd["MACD_dif"] = None
    Data_Total_pd["MACD_dea"] = None
    Data_Total_pd["MACD_macd"] = None


    for stock_name in stock_list:
        dif = []
        dea = []
        macd = []

        macd_price = Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"close"]

        dif, dea, macd = talib.MACD(np.array(macd_price), fastperiod=5, slowperiod=10, signalperiod=5)

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"MACD_dif"] = np.array(dif)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"MACD_dea"] = np.array(dea)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"MACD_macd"] = np.array(macd)
    

    ################################   4、生成均线信息   ###################################################
    Data_Total_pd["EMA_5"] = None
    Data_Total_pd["EMA_10"] = None
    Data_Total_pd["EMA_gap"] = None

    for stock_name in stock_list:
        ema_5 = []
        ema_10 = []
        ema_gap = []


        ema_price = Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"close"]
        ema_5 = talib.EMA(np.array(ema_price), timeperiod=5)
        ema_10 = talib.EMA(np.array(ema_price), timeperiod=10)
        ema_gap = talib.EMA(np.array(ema_price), timeperiod=5) - talib.EMA(np.array(ema_price), timeperiod=10)


        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"EMA_5"] = np.array(ema_5)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"EMA_10"] = np.array(ema_10)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"EMA_gap"] = np.array(ema_gap)


    ################################   5、生成KDJ指标   ###################################################
    Data_Total_pd["KDJ_K"] = None
    Data_Total_pd["KDJ_D"] = None    
    Data_Total_pd["KDJ_J"] = None

    for stock_name in set(stock_list):
        K_values = []
        D_values = []
        J_values = []

        kdj_price = Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,['high','low','close']]
        kdj_price = kdj_price.fillna(0)

            #                  (Today's Close - LowestLow)
            # FASTK(Kperiod) = --------------------------- * 100
            #                   (HighestHigh - LowestLow)

            # FASTD(FastDperiod) = MA Smoothed FASTK over FastDperiod

            # SLOWK(SlowKperiod) = MA Smoothed FASTK over SlowKperiod

            # SLOWD(SlowDperiod) = MA Smoothed SLOWK over SlowDperiod

        K_values, D_values = talib.STOCH(kdj_price['high'].values,
                                           kdj_price['low'].values,
                                           kdj_price['close'].values,
                                           fastk_period=9,
                                           slowk_period=3,
                                           slowk_matype=0,
                                           slowd_period=3,
                                           slowd_matype=0)


        J_values = 3 * np.array(K_values) - 2 * np.array(D_values)

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"KDJ_K"] = np.array(K_values)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"KDJ_D"] = np.array(D_values)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"KDJ_J"] = np.array(J_values)

    
    ################################   6、开始计算RSI指标   ###################################################
    Data_Total_pd["RSI"] = None

    for stock_name in set(stock_list):
        rsi_values = []

        rsi_price = Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,['high','low','close']]

        rsi_price = rsi_price.fillna(0)
        
        rsi_values = talib.RSI(np.array(rsi_price['close']), 12)       #RSI的天数一般是6、12、24

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"RSI"] = np.array(rsi_values)

    Data_Total_pd = Data_Total_pd.fillna(0)
    
    ################################   7、开始计算动量指标“MOM”   ###################################################
    Data_Total_pd["MOM_12"] = None
    Data_Total_pd["MOM_25"] = None
    # 选择一条10日均线作为中间线，判断
    Data_Total_pd["MOM_gap"] = None


    for stock_name in set(stock_list):

        MOM_values = []
        MOM_gap_values = []

        mom_price = list(Data_Total_pd[Data_Total_pd['code'] == stock_name]['close'])


        MOM_12_values = talib.MOM(np.array(mom_price), timeperiod = 12)

        MOM_25_values = talib.MOM(np.array(mom_price), timeperiod = 25)

        MOM_gap_values = MOM_25_values - MOM_12_values

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"MOM_12"] = np.array(MOM_12_values)
        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"MOM_25"] = np.array(MOM_25_values)

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"MOM_gap"] = np.array(MOM_gap_values)
    
   
    ################################   8、开始计算能量指标OBV   ###################################################
    Data_Total_pd["Long_Short_Rate_OBV"] = None

    for stock_name in set(stock_list):
        OBV_values = []


        obv_price = Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,['high','low','close','volume']]

        obv_price = obv_price.fillna(0)

        # 通过价格进行调整
        Long_Short_Rate_OBV_values = []


        OBV_gap = 0 
        for i in range(0,len(obv_price['close'])):
            OBV_gap = ((obv_price['close'].values[i]-obv_price['low'].values[i]) \
                           -(obv_price['high'].values[i]-obv_price['close'].values[i])) \
                          /(obv_price['high'].values[i]-obv_price['low'].values[i])


            if np.isnan(OBV_gap):
                OBV_gap = 0

            if i == 0:
                Long_Short_Rate_OBV_values.append(obv_price['volume'].values[i])
            else:
                Long_Short_Rate_OBV_values.append(float(OBV_gap)*float(obv_price['volume'].values[i]))

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"Long_Short_Rate_OBV"] = np.array(Long_Short_Rate_OBV_values)       

    
    ################################  10、开始计算成交量   ###################################################
    Data_Total_pd["Volume"] = None

    for stock_name in set(stock_list):

        Volume_price = Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,['high','low','close','volume']]

        Data_Total_pd.loc[Data_Total_pd['code'] == stock_name,"Volume"] = np.array(Volume_price['volume'])       
    
    Data_Total_pd = Data_Total_pd.fillna(0)

    
    ################################   9、写入文件   ###################################################    
    file_name = "raw_data_sample.csv"
    Data_Total_pd.to_csv(file_name)
    



In [13]:
# 初始化赋值
Data_Total = pd.DataFrame(columns = Col_Total)

file_name = "raw_data_sample.csv"
Data_Total[Col_Name] = pd.read_csv(file_name)[Col_Name]

Data_Total[['code','date']] =  pd.read_csv(file_name).loc[:,['code','date']]

Stock_list = set(list(Data_Total['code']))

In [14]:
##############################################  1、 赋值前1-2天的值      ############################################################

for col in Col_Name:
        
    # 修改列属性为float,之后数据透视需要
    Data_Total[col] = Data_Total[col].astype(float)
    
    for day_count_i in range(1, 3):

        Data_Total.loc[:, str(str(col) + "_pre" + str(day_count_i))] = None
            
        # 修改列属性为float,之后数据透视需要
        Data_Total[str(str(col) + "_pre" + str(day_count_i))] = Data_Total[str(str(col) + "_pre" + str(day_count_i))].astype(float)
            
        for stock_name in Stock_list:
            temp = []
            temp = list(Data_Total.loc[Data_Total['code'] == stock_name, col])
            for i in range(0, day_count_i):
                temp.insert(0,0)
                    
            Data_Total.loc[Data_Total['code'] == stock_name, str(str(col) + "_pre" + str(day_count_i))] = np.array(temp[:-day_count_i])

############################################  2、 删除无效数据，更改数据类型      ############################################################

# 删除前60的数据，后10的数据不用删除，没有胜败的关系
for stock_name in Stock_list:
    Data_Total = Data_Total.drop(Data_Total[Data_Total['code'] == stock_name].iloc[:60].index, axis=0)



In [15]:
# normalize_by_list使用索引表对数据进行归一化，生成模型可识别的样本
# file_name：索引文件名，记录均值方差，需遵守相应表格列名称规则
# sample_data_pd：pandas数据结构，仅一行，记录样本归一化前的原始数据
# stock_name：股票名，用于去索引文件中查找对应的数据
# cols：列名，归一化的相应列名

def normalize_by_list(file_name,sample_data_pd,stock_name,cols):
    # ret_pd是返回的pd数组
    ret_pd = pd.DataFrame(columns = cols)
    
    # 创建列，赋值为None
    ret_pd[cols] = None
    
    # 提取平均值
    MS_pd = pd.DataFrame()
    MS_pd = pd.read_csv(file_name)
        
    # 逐列计算对应归一化的值
    for col in cols:
        mean = 0
        std = 0
        
        # 查找表中的数据
        mean = MS_pd.loc[MS_pd['code']==stock_name,str(col+"_mean")]
        std = MS_pd.loc[MS_pd['code']==stock_name,str(col+"_std")]

        # 计算
        val = (float(sample_data_pd.loc[:,col].values[0]) - mean)/(0.5*std)
        
        temp = 0
        if abs(math.floor(val)) <=3:
            temp = math.floor(val)
        else:
            temp = 4*val/abs(val)

        ret_pd.loc[:,col] = np.array([float(temp)])

    return ret_pd



In [46]:
for stock_name in Stock_list:
    file_name = "MS_List_Index.csv"
#     print stock_name
#     print Data_Total.loc[Data_Total['code'] == stock_name,'date'].iloc[-1:,]
#     print str(Data_Total.loc[Data_Total['code'] == stock_name,'date'].iloc[-1:,])
    print Data_Total.loc[Data_Total['code'] == stock_name,'date'].iloc[100:101,].values[0].replace("/","-")
#     print str.replace("is", "was")
    normalize_by_list(file_name,Data_Total.loc[Data_Total['code'] == stock_name,Col_Total].iloc[-70:-71,:],stock_name,Col_Total).to_csv(str(stock_name)+str(Data_Total.loc[Data_Total['code'] == stock_name,'date'].iloc[-50:-51,].values[0].replace("/","-"))+".csv")

IndexError: index 0 is out of bounds for axis 0 with size 0

In [35]:



# 提取最后一行作为相应的特征向量

# 结合MS_List_Index归一化向量

In [None]:
# 读取模块进行预测

In [None]:
# 进入聚宽进行验证