In [1]:
import numpy as np
import datetime
import pandas as pd
import time
import sys
sys.path.append("..")

from emulator.env_factor import get_factors

from params import Smp_Size
from params import ROLLING_PERIOD_PARAM


def fix_data(path):
    tmp = pd.read_csv(path, encoding="gbk", engine='c')
    tmp.rename(columns={'Unnamed: 0': 'trading_time'}, inplace=True)
    tmp['trading_point'] = pd.to_datetime(tmp.trading_time)
    del tmp['trading_time']
    return tmp.set_index(tmp.trading_point)


def high2low(tmp, freq):
    """处理从RiceQuant下载的分钟线数据，
    从分钟线数据合成低频数据
    2017-08-11
    """
    # 分别处理bar数据
    tmp_open = tmp['open'].resample(freq).ohlc()
    tmp_open = tmp_open['open'].dropna()

    tmp_high = tmp['high'].resample(freq).ohlc()
    tmp_high = tmp_high['high'].dropna()

    tmp_low = tmp['low'].resample(freq).ohlc()
    tmp_low = tmp_low['low'].dropna()

    tmp_close = tmp['close'].resample(freq).ohlc()
    tmp_close = tmp_close['close'].dropna()

    tmp_price = pd.concat([tmp_open, tmp_high, tmp_low, tmp_close], axis=1)

    # 处理成交量
    tmp_volume = tmp['volume'].resample(freq).sum()
    tmp_volume.dropna(inplace=True)
    tmp = pd.concat([tmp_price, tmp_volume], axis=1)
    return tmp.dropna()



#Samples_Size = 350

start = time.clock()
quotes = fix_data('./HS300.csv')
quotes = high2low(quotes, '15min')
daily_quotes = high2low(quotes, '15min')
end = time.clock()
print('reading csv time %s '%(end-start))

today = datetime.date.today()
print(today)
today = datetime.date.today()+datetime.timedelta(days = +1)
print(today)
today = datetime.date.today()+datetime.timedelta(days = -60)
print(today)

Index = quotes.index
High = quotes.high.values
Low = quotes.low.values
Close = quotes.close.values
Open = quotes.open.values
#Volume = quotes.volume.values
#factors = get_factors(Index, Open, Close, High, Low, Volume, rolling=188, drop=True)
print('init quotes shape')
print(quotes.shape)

start = time.clock()
factors = get_factors(Index, Open, Close, High, Low, rolling=150, drop=True)

print('init factors shape')
print(factors.shape)

daily_quotes['returns'] = np.log(daily_quotes['close'].shift(-1) / daily_quotes['close'])
daily_quotes.dropna(inplace=True)

start_date = pd.to_datetime('2014-03-11')
print(start_date)
end_date = pd.to_datetime('2018-12-29')
daily_quotes = daily_quotes.loc[start_date:end_date]

print('init daily shape')
print(daily_quotes.shape)

daily_quotes = daily_quotes.iloc[100:]
factors = factors.loc[start_date:end_date]

print('smp daily shape')
print(daily_quotes.shape)
print('smp factors shape')
print(factors.shape)

end = time.clock()
print('gen state array time %s '%(end-start))

ic = 0


fac_list = []
for i in range(len(daily_quotes)):
    s = i 
    e = i + ROLLING_PERIOD_PARAM
    f = np.array(factors.iloc[s:e])
    #print(ic,np.expand_dims(f, axis=0).shape)
    '''ic += 1
    print(ic,factors.iloc[s:e,0:1])
    print('  ')
    print(daily_quotes.iloc[i])'''
    fac_list.append(np.expand_dims(f, axis=0))
    

fac_array = np.concatenate(fac_list, axis=0)
shape = [fac_array.shape[0], 1, ROLLING_PERIOD_PARAM, fac_array.shape[2]]
fac_array = fac_array.reshape(shape)
fac_array = np.transpose(fac_array, [0, 2, 3, 1])

print(fac_array.shape)

end = time.clock()
print('reshape state array time %s '%(end-start))


DATE_QUOTES = daily_quotes
DATA_FAC = fac_array
print(DATA_FAC.shape)

reading csv time 0.8873440000000001 
2018-07-23
2018-07-24
2018-05-24
init quotes shape
(26244, 5)
init factors shape
(26045, 42)
2014-03-11 00:00:00
init daily shape
(12419, 6)
smp daily shape
(12319, 6)
smp factors shape
(12420, 42)
gen state array time 0.17076599999999997 
(12319, 56, 42, 1)
reshape state array time 1.5743400000000003 
(12319, 56, 42, 1)


In [2]:


from IPython.display import clear_output

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

from params import *


In [3]:

class Account(object):
    def __init__(self):
        self.data_close = DATE_QUOTES['close']
        self.data_open = DATE_QUOTES['open']
        self.data_observation = DATA_FAC
        self.action_space = ['long', 'short', 'close']
        self.free = 0
        self.step_counter = 0
        self.cash = 1e5
        self.position = 0
        self.total_value = self.cash + self.position
        self.flags = 0

    def reset(self):
        self.step_counter = 0
        self.cash = 1e5
        self.position = 0
        self.total_value = self.cash + self.position
        self.flags = 0
        self.buffer_reward = []
        self.buffer_value = []
        self.buffer_action = []
        self.buffer_cash = []
        return self._get_initial_state()

    def _get_initial_state(self):
        #print(self.data_observation.shape)
        return np.transpose(self.data_observation[0], [2, 0, 1])

    def get_action_space(self):
        return self.action_space

    def long(self):
        self.flags = 1
        quotes = self.data_open[self.step_counter] * 10
        self.cash -= quotes * (1 + self.free) - 6
        self.position = quotes

    def short(self):
        self.flags = -1
        quotes = self.data_open[self.step_counter] * 10
        self.cash += quotes * (1 - self.free) - 6
        self.position = - quotes

    def keep(self):
        quotes = self.data_open[self.step_counter] * 10
        self.position = quotes * self.flags

    def close_long(self):
        self.flags = 0
        quotes = self.data_open[self.step_counter] * 10
        self.cash += quotes * (1 - self.free) - 6
        self.position = 0

    def close_short(self):
        self.flags = 0
        quotes = self.data_open[self.step_counter] * 10
        self.cash -= quotes * (1 + self.free) - 6
        self.position = 0

    def step_op(self, action, printable):

        if action == 'long':
            if self.flags == 0:
                self.long()
            elif self.flags == -1:
                self.close_short()
                #self.long()
            else:
                self.keep()

        elif action == 'close':
            if self.flags == 1:
                self.close_long()
            elif self.flags == -1:
                self.close_short()
            else:
                pass

        elif action == 'short':
            if self.flags == 0:
                self.short()
            elif self.flags == 1:
                self.close_long()
                #self.short()
            else:
                self.keep()
        else:
                raise ValueError("action should be elements of ['long', 'short', 'close']")
        
        position = self.data_close[self.step_counter] * 10 * self.flags
        reward = self.cash + position - self.total_value
        self.step_counter += 1
        self.total_value = position + self.cash
        next_observation = self.data_observation[self.step_counter]
        
        done = False
        if self.total_value < 4000:
            done = True
        if self.step_counter > Smp_Size:
            done = True
            
        if printable == True:
            print(action,self.data_open[self.step_counter-1],self.data_close[self.step_counter-1],reward)
            
        self.buffer_action.append(action)
        self.buffer_reward.append(reward)
        self.buffer_value.append(self.total_value)
        self.buffer_cash.append(self.cash)    
            
        return np.transpose(next_observation, [2, 0, 1]), reward, done

    def step(self, action, printable = False):
        if action == 0:
            return self.step_op('long',printable)
        elif action == 1:
            return self.step_op('short',printable)
        elif action == 2:
            return self.step_op('close',printable)
        else:
            raise ValueError("action should be one of [0,1,2]")
            
    def plot_data(self):
        df = pd.DataFrame([self.buffer_value, self.buffer_reward, self.buffer_cash, self.buffer_action]).T
        length = df.shape[0]
        df.index = self.data_close.index[:length]
        df.columns = ["value", "reward", "cash", "action"]
        return df
    



In [4]:
from agent.main import Agent
import sys
sys.path.append("..")
env = Account()
state = env.reset()
image_shape = state.shape
print(image_shape)
agent = Agent(image_shape, 3)
agent.load_model()

(1, 56, 42)
(1, 56, 42)
3
(1, 56, 42)
3


In [20]:
state = env.reset()
ss = DATA_FAC[-1:]
print(ss.shape)
ss = np.transpose(ss[0], [2, 0, 1])
print(ss.shape)
action = agent.get_epsilon_policy(np.expand_dims(ss, 0),1)
#next_state, reward, done = env.step(action)
#state = next_state
print(action)

(1, 56, 42, 1)
(1, 56, 42)
[1]
