In [1]:
import numpy as np
import pandas as pd
import TAF
import datetime
import talib 
import matplotlib.pylab as plt
import seaborn as sns
% matplotlib inline

In [2]:
factors = pd.read_csv('HS300_15m.csv')

index = factors['index']
High = factors.high.values
Low = factors.low.values
Close = factors.close.values
Open = factors.open.values
Volume = factors.volume.values

factors = TAF.get_factors(index, Open, Close, High, Low, Volume, drop=True)

factors = factors.iloc[-700 * 16 - 11 * 16:]

In [3]:
start_date = factors.index[11*16][:10]
end_date = factors.index[-1][:10]

print ('开始时间', start_date)
print ('结束时间', end_date)

开始时间 2014-02-25
结束时间 2016-12-30


In [4]:
rolling = 88

targets = pd.read_csv('HS300_1d.csv')
targets.rename(columns={'Unnamed: 0':'tradeDate'}, inplace=True)
targets['returns'] = targets.close.shift(-5)/ targets.close - 1.
targets['labels'] = 1
targets['upper_boundary']= targets.returns.rolling(rolling).mean() + 0.5 * targets.returns.rolling(rolling).std()
targets['lower_boundary']= targets.returns.rolling(rolling).mean() - 0.5 * targets.returns.rolling(rolling).std()

targets.dropna(inplace=True)
targets.loc[targets['returns']>=targets['upper_boundary'], 'labels'] = 2
targets.loc[targets['returns']<=targets['lower_boundary'], 'labels'] = 0

targets.set_index('tradeDate', inplace=True)
targets= targets.loc[start_date:end_date, 'labels']

###  输入数据

In [5]:
inputs = np.array(factors).reshape(-1, 1, 58)

def dense_to_one_hot(labels_dense):
    """标签 转换one hot 编码
    输入labels_dense 必须为非负数
    2016-11-21
    """
    num_classes = len(np.unique(labels_dense)) # np.unique 去掉重复函数
    raws_labels = labels_dense.shape[0]
    index_offset = np.arange(raws_labels) * num_classes
    labels_one_hot = np.zeros((raws_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot  

targets = dense_to_one_hot(targets)
targets = np.expand_dims(targets, axis=1)

train_inputs = inputs[:-100*16]
test_inputs = inputs[-100*16 - 11 * 16:]

train_targets = targets[:-100]
test_targets = targets[-100:]

train_gather_list = np.arange(train_inputs.shape[0])
train_gather_list = train_gather_list.reshape([-1,16])[11:]
train_gather_list = train_gather_list[:,-1]

test_gather_list = np.arange(test_inputs.shape[0])
test_gather_list = test_gather_list.reshape([-1,16])[11:]
test_gather_list = test_gather_list[:,-1]

### 1层LSTM

In [6]:
import os
import tensorflow as tf

from FixDNCore import DNCore_L1
from FixACT import ACTCore

In [7]:
class Classifier_DNC_BasicLSTM_L1(object):
    
    def __init__(self, 
                 inputs, 
                 targets,
                 gather_list=None,
                 batch_size=1, 
                 hidden_size=10, 
                 memory_size=10, 
                 threshold=0.99,
                 pondering_coefficient = 1e-2,
                 num_reads=3,
                 num_writes=1):
        
        self._tmp_inputs = inputs
        self._tmp_targets = targets
        self._in_length = inputs.shape[0]
        self._in_width = inputs.shape[2]
        self._out_length = targets.shape[0]
        self._out_width = targets.shape[2]
        self._batch_size = batch_size
        
        # 
        self._sess = tf.InteractiveSession()
        
        self._inputs = tf.placeholder(dtype=tf.float32, 
                                      shape=[self._in_length, self._batch_size, self._in_width], 
                                      name='inputs')
        self._targets = tf.placeholder(dtype=tf.float32, 
                                       shape=[self._out_length, self._batch_size, self._out_width],
                                       name='targets')
        
        act_core = DNCore_L1( hidden_size=hidden_size, 
                              memory_size=memory_size, 
                              word_size=self._in_width, 
                              num_read_heads=num_reads, 
                              num_write_heads=num_writes)        
        self._InferenceCell = ACTCore(core=act_core, 
                                      output_size=self._out_width, 
                                      threshold=threshold, 
                                      get_state_for_halting=self._get_hidden_state)
        
        self._initial_state = self._InferenceCell.initial_state(self._batch_size)
        
        tmp, act_final_cumul_state = \
        tf.nn.dynamic_rnn(cell=self._InferenceCell, 
                          inputs=self._inputs, 
                          initial_state=self._initial_state, 
                          time_major=True)
        act_output, (act_final_iteration, act_final_remainder) = tmp
        
        self._pred_outputs = act_output
        if gather_list is not None:
            out_sequences = tf.gather(act_output, gather_list)
        else:
            out_sequences = act_core
        
        pondering_cost = (act_final_iteration + act_final_remainder) * pondering_coefficient
        rnn_cost = tf.nn.softmax_cross_entropy_with_logits(
            labels=self._targets, logits=out_sequences)
        self._cost = tf.reduce_mean(rnn_cost) + tf.reduce_mean(pondering_cost)
        
        self._pred = tf.nn.softmax(out_sequences, dim=2)
        correct_pred = tf.equal(tf.argmax(self._pred,2), tf.argmax(self._targets,2))
        self._accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
    def _get_hidden_state(self, state):
        controller_state = state[0]
        next_state, next_cell = controller_state
        return next_state
        
    def fit(self, 
            training_iters =1e2,             
            learning_rate = 1e-4,
            optimizer_epsilon = 1e-10,
            max_gard_norm = 50):

        # Set up optimizer with global norm clipping.
        trainable_variables = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self._cost, trainable_variables), max_gard_norm)
        global_step = tf.get_variable(
            name="global_step",
            shape=[],
            dtype=tf.int64,
            initializer=tf.zeros_initializer(),
            trainable=False,
            collections=[tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP])
        
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=learning_rate, epsilon=optimizer_epsilon)
        self._train_step = optimizer.apply_gradients(
            zip(grads, trainable_variables), global_step=global_step)  
        
        self._sess.run(tf.global_variables_initializer())
        for scope in range(np.int(training_iters)):
            _, loss, acc = self._sess.run([self._train_step, self._cost, self._accuracy], 
                                     feed_dict = {self._inputs:self._tmp_inputs, 
                                                  self._targets:self._tmp_targets})
            print (scope, '  loss--', loss, '  acc--', acc)
        print ("Optimization Finished!") 
            
            
    def close(self):
        self._sess.close()
        print ('结束进程，清理tensorflow内存/显存占用')
        
        
    def pred(self, inputs, gather_list=None):
        
        output_pred = self._pred_outputs
        if gather_list is not None:
            output_pred = tf.gather(output_pred, gather_list)
        probability = tf.nn.softmax(output_pred)
        classification = tf.argmax(probability, axis=-1)
        
        return self._sess.run([probability, classification],feed_dict = {self._inputs:inputs})

In [8]:
a = Classifier_DNC_BasicLSTM_L1(train_inputs, train_targets, train_gather_list)
a.fit(5,learning_rate = 1e-1)

0   loss-- 1.12566   acc-- 0.338333
1   loss-- 1.12085   acc-- 0.353333
2   loss-- 1.11663   acc-- 0.378333
3   loss-- 1.11299   acc-- 0.391667
4   loss-- 1.1098   acc-- 0.395


In [9]:
b,c = a.pred(train_inputs, train_gather_list)

b = np.squeeze(b)

b = pd.DataFrame(b)

t = np.argmax(train_targets, axis=-1)

tmp = pd.DataFrame([t.flatten(),c.flatten()]).T

tmp.columns = ['targets','pred']

tmp = pd.concat([tmp, b], axis=1)

In [10]:
tmp.head()

Unnamed: 0,targets,pred,0,1,2
0,2,1,0.257946,0.373783,0.36827
1,1,2,0.252376,0.336031,0.411593
2,2,1,0.281975,0.368138,0.349887
3,1,1,0.321836,0.351986,0.326178
4,0,0,0.368033,0.334293,0.297674


### 3层LSTM

In [12]:
tf.reset_default_graph()
from FixDNCore import DNCore_L3
from FixACT import ACTCore

In [13]:
class Classifier_DNC_BasicLSTM_L3(object):
    
    def __init__(self, 
                 inputs, 
                 targets,
                 gather_list=None,
                 batch_size=1, 
                 hidden_size=10, 
                 memory_size=10, 
                 threshold=0.99,
                 pondering_coefficient = 1e-2,
                 num_reads=3,
                 num_writes=1):
        
        self._tmp_inputs = inputs
        self._tmp_targets = targets
        self._in_length = None
        self._in_width = inputs.shape[2]
        self._out_length = None
        self._out_width = targets.shape[2]
        self._batch_size = batch_size
        
        # 
        self._sess = tf.InteractiveSession()
        
        self._inputs = tf.placeholder(dtype=tf.float32, 
                                      shape=[self._in_length, self._batch_size, self._in_width], 
                                      name='inputs')
        self._targets = tf.placeholder(dtype=tf.float32, 
                                       shape=[self._out_length, self._batch_size, self._out_width],
                                       name='targets')
        
        act_core = DNCore_L3( hidden_size=hidden_size, 
                              memory_size=memory_size, 
                              word_size=self._in_width, 
                              num_read_heads=num_reads, 
                              num_write_heads=num_writes)        
        self._InferenceCell = ACTCore(core=act_core, 
                                      output_size=self._out_width, 
                                      threshold=threshold, 
                                      get_state_for_halting=self._get_hidden_state)
        
        self._initial_state = self._InferenceCell.initial_state(self._batch_size)
        
        tmp, act_final_cumul_state = \
        tf.nn.dynamic_rnn(cell=self._InferenceCell, 
                          inputs=self._inputs, 
                          initial_state=self._initial_state, 
                          time_major=True)
        act_output, (act_final_iteration, act_final_remainder) = tmp
        
        self._pred_outputs = act_output
        if gather_list is not None:
            out_sequences = tf.gather(act_output, gather_list)
        else:
            out_sequences = act_core
        
        pondering_cost = (act_final_iteration + act_final_remainder) * pondering_coefficient
        rnn_cost = tf.nn.softmax_cross_entropy_with_logits(
            labels=self._targets, logits=out_sequences)
        self._cost = tf.reduce_mean(rnn_cost) + tf.reduce_mean(pondering_cost)
        
        self._pred = tf.nn.softmax(out_sequences, dim=2)
        correct_pred = tf.equal(tf.argmax(self._pred,2), tf.argmax(self._targets,2))
        self._accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
    # 待处理函数
    def _get_hidden_state(self, state):
        controller_state, access_state, read_vectors = state
        layer_1, layer_2, layer_3 = controller_state
        L1_next_state, L1_next_cell = layer_1
        L2_next_state, L2_next_cell = layer_2
        L3_next_state, L3_next_cell = layer_3
        return tf.concat([L1_next_state, L2_next_state, L3_next_state], axis=-1)
        
    def fit(self, 
            training_iters =1e2,             
            learning_rate = 1e-4,
            optimizer_epsilon = 1e-10,
            max_gard_norm = 50):

        # Set up optimizer with global norm clipping.
        trainable_variables = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self._cost, trainable_variables), max_gard_norm)
        global_step = tf.get_variable(
            name="global_step",
            shape=[],
            dtype=tf.int64,
            initializer=tf.zeros_initializer(),
            trainable=False,
            collections=[tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP])
        
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=learning_rate, epsilon=optimizer_epsilon)
        self._train_step = optimizer.apply_gradients(
            zip(grads, trainable_variables), global_step=global_step)  
        
        self._sess.run(tf.global_variables_initializer())
        for scope in range(np.int(training_iters)):
            _, loss, acc = self._sess.run([self._train_step, self._cost, self._accuracy], 
                                     feed_dict = {self._inputs:self._tmp_inputs, 
                                                  self._targets:self._tmp_targets})
            print (scope, '  loss--', loss, '  acc--', acc)
        print ("Optimization Finished!") 
            
            
    def close(self):
        self._sess.close()
        print ('结束进程，清理tensorflow内存/显存占用')
        
        
    def pred(self, inputs, gather_list=None):
        
        output_pred = self._pred_outputs
        if gather_list is not None:
            output_pred = tf.gather(output_pred, gather_list)
        probability = tf.nn.softmax(output_pred)
        classification = tf.argmax(probability, axis=-1)
        
        return self._sess.run([probability, classification],feed_dict = {self._inputs:inputs})

In [14]:
la = Classifier_DNC_BasicLSTM_L3(train_inputs, train_targets, train_gather_list)
la.fit(5,learning_rate = 1e-1)



  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


0   loss-- 1.14674   acc-- 0.283333
1   loss-- 1.13386   acc-- 0.331667
2   loss-- 1.12391   acc-- 0.358333
3   loss-- 1.11623   acc-- 0.388333
4   loss-- 1.11026   acc-- 0.4
Optimization Finished!


In [15]:
b,c = la.pred(train_inputs, train_gather_list)

b = np.squeeze(b)

b = pd.DataFrame(b)

t = np.argmax(train_targets, axis=-1)

tmp = pd.DataFrame([t.flatten(),c.flatten()]).T

tmp.columns = ['targets','pred']

tmp = pd.concat([tmp, b], axis=1)

tmp.head()

Unnamed: 0,targets,pred,0,1,2
0,2,2,0.255015,0.369511,0.375474
1,1,2,0.276327,0.344161,0.379513
2,2,0,0.336804,0.334562,0.328634
3,1,1,0.27673,0.370372,0.352898
4,0,1,0.333317,0.377212,0.289471
