In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
import random
import requests
sns.set()

In [2]:
def get_state(data, t, n):
    d = t - n + 1
    block = data[d : t + 1] if d >= 0 else -d * [data[0]] + data[0 : t + 1]
    res = []
    for i in range(n - 1):
        res.append(block[i + 1] - block[i])
    return np.array([res])

In [3]:
class Deep_Evolution_Strategy:

    inputs = None

    def __init__(
        self, weights, reward_function, population_size, sigma, learning_rate
    ):
        self.weights = weights
        self.reward_function = reward_function
        self.population_size = population_size
        self.sigma = sigma
        self.learning_rate = learning_rate

    def _get_weight_from_population(self, weights, population):
        weights_population = []
        for index, i in enumerate(population):
            jittered = self.sigma * i
            weights_population.append(weights[index] + jittered)
        return weights_population

    def get_weights(self):
        return self.weights

    def train(self, epoch = 100, print_every = 1):
        lasttime = time.time()
        for i in range(epoch):
            population = []
            rewards = np.zeros(self.population_size)
            for k in range(self.population_size):
                x = []
                for w in self.weights:
                    x.append(np.random.randn(*w.shape))
                population.append(x)
            for k in range(self.population_size):
                weights_population = self._get_weight_from_population(
                    self.weights, population[k]
                )
                rewards[k] = self.reward_function(weights_population)
            rewards = (rewards - np.mean(rewards)) / np.std(rewards)
            for index, w in enumerate(self.weights):
                A = np.array([p[index] for p in population])
                self.weights[index] = (
                    w
                    + self.learning_rate
                    / (self.population_size * self.sigma)
                    * np.dot(A.T, rewards).T
                )
            if (i + 1) % print_every == 0:
                print(
                    'iter %d. reward: %f'
                    % (i + 1, self.reward_function(self.weights))
                )
        print('time taken to train:', time.time() - lasttime, 'seconds')


class Model:
    def __init__(self, input_size, layer_size, output_size):
        self.weights = [
            np.random.randn(input_size, layer_size),
            np.random.randn(layer_size, output_size),
            np.random.randn(layer_size, 1),
            np.random.randn(1, layer_size),
        ]

    def predict(self, inputs):
        feed = np.dot(inputs, self.weights[0]) + self.weights[-1]
        decision = np.dot(feed, self.weights[1])
        buy = np.dot(feed, self.weights[2])
        return decision, buy

    def get_weights(self):
        return self.weights

    def set_weights(self, weights):
        self.weights = weights

In [4]:
class Agent:

    POPULATION_SIZE = 15
    SIGMA = 0.1
    LEARNING_RATE = 0.03

    def __init__(self, model, money, max_buy, max_sell):
        self.model = model
        self.initial_money = money
        self.max_buy = max_buy
        self.max_sell = max_sell
        self.es = Deep_Evolution_Strategy(
            self.model.get_weights(),
            self.get_reward,
            self.POPULATION_SIZE,
            self.SIGMA,
            self.LEARNING_RATE,
        )

    def act(self, sequence):
        decision, buy = self.model.predict(np.array(sequence))
        return np.argmax(decision[0]), int(buy[0])

    def get_reward(self, weights):
        initial_money = self.initial_money
        starting_money = initial_money
        self.model.weights = weights
        state = get_state(close, 0, window_size + 1)
        inventory = []
        quantity = 0
        for t in range(0, l, skip):
            action, buy = self.act(state)
            next_state = get_state(close, t + 1, window_size + 1)
            if action == 1 and initial_money >= close[t]:
                if buy < 0:
                    buy = 1
                if buy > self.max_buy:
                    buy_units = self.max_buy
                else:
                    buy_units = buy
                total_buy = buy_units * close[t]
                initial_money -= total_buy
                inventory.append(total_buy)
                quantity += buy_units
            elif action == 2 and len(inventory) > 0:
                if quantity > self.max_sell:
                    sell_units = self.max_sell
                else:
                    sell_units = quantity
                quantity -= sell_units
                total_sell = sell_units * close[t]
                initial_money += total_sell

            state = next_state
        return ((initial_money - starting_money) / starting_money) * 100

    def fit(self, iterations, checkpoint):
        self.es.train(iterations, print_every = checkpoint)

    def buy(self):
        time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
        initial_money = self.initial_money
        state = get_state(close, 0, window_size + 1)
        starting_money = initial_money
        states_sell = []
        states_buy = []
        inventory = []
        quantity = 0
        for t in range(0, l, skip):
            action, buy = self.act(state)
            next_state = get_state(close, t + 1, window_size + 1)
            if action == 1 and initial_money >= close[t]:
                if buy < 0:
                    buy = 1
                if buy > self.max_buy:
                    buy_units = self.max_buy
                else:
                    buy_units = buy
                total_buy = buy_units * close[t]
                initial_money -= total_buy
                inventory.append(total_buy)
                quantity += buy_units
                states_buy.append(t)
                if t>=l-1:
                    print(
                        '\r%s: buy %d units at price %f, total balance %f'
                        % (time_now, buy_units, total_buy, initial_money), end= "" 
                    )
            elif action == 2 and len(inventory) > 0:
                bought_price = inventory.pop(0)
                if quantity > self.max_sell:
                    sell_units = self.max_sell
                else:
                    sell_units = quantity
                if sell_units < 1:
                    if t>=l-1:
                        print('\r%s, nothing to do,'% (time_now),end="")
                    continue
                quantity -= sell_units
                total_sell = sell_units * close[t]
                initial_money += total_sell
                states_sell.append(t)
                try:
                    invest = ((total_sell - bought_price) / bought_price) * 100
                except:
                    invest = 0
                if t>=l-1:
                    print(
                        '\r%s, sell %d units at price %f, investment %f %%, total balance %f, '
                        % (time_now, sell_units, total_sell, invest, initial_money),end=""
                    )
            else:
                if t>=l-1:
                    print('\r%s, nothing to do,'% (time_now),end="")
            state = next_state

#         invest = ((initial_money - starting_money) / starting_money) * 100
#         print(
#             '\ntotal gained %f, total investment %f %%, hold %f units'
#             % (initial_money - starting_money, invest, quantity)
#         )
        
#         plt.figure(figsize = (15, 5))
        
#         plt.plot(close, label = 'true close', c = 'g')
#         plt.plot(close, 'X', label = 'predict buy', 
#                  markevery = states_buy, c = 'b')
#         plt.plot(close, 'o', label = 'predict sell', 
#                  markevery = states_sell, c = 'r')
#         plt.legend()
#         plt.show()

In [5]:
# data
# filename = '47#IFL8' # 沪深主连
filename = '47#IH50' # 上证50 ???
# filename = '47#IC500' # 中证500
# filename = 'NYKAU' # 纽约黄金期货价格
# filename = '30#AUL9' # 黄金指数
# filename = 'SH#601952' # SKNF
# filename = 'SH#512880'
if filename == '47#IC500':
    url_sina = 'http://hq.sinajs.cn/list=sh000905'
    url_126 = 'http://img1.money.126.net/data/hs/kline/day/history/'+time.strftime('%Y',time.localtime(time.time()))+'/0000905.json'
if filename == '47#IH50':
    url_sina = 'http://hq.sinajs.cn/list=sh000016'
    url_126 = 'http://img1.money.126.net/data/hs/kline/day/history/'+time.strftime('%Y',time.localtime(time.time()))+'/0000016.json'
if filename == 'SH#601952':
    url_sina = 'http://hq.sinajs.cn/list=sh601952'
    url_126 = 'http://img1.money.126.net/data/hs/kline/day/history/'+time.strftime('%Y',time.localtime(time.time()))+'/0601952.json'
if filename == 'SH#512880':
    url_sina = 'http://hq.sinajs.cn/list=sh512880'
    url_126 = 'http://img1.money.126.net/data/hs/kline/day/history/'+time.strftime('%Y',time.localtime(time.time()))+'/0512880.json'


In [6]:
df = pd.read_csv('../dataset/'+filename+'.csv')
date = df.Date.values.tolist()

In [7]:
# download data to csv
import json

headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
response = requests.get(url_126,headers = headers)
response.encoding = 'utf-8'
data_now = response.json()

for ind, dd in enumerate(date):
    if data_now['data'][0][0] == dd.replace('/',''):
        break
        
df = df.drop(labels=range(ind,len(df)),axis=0) 
        
for _, dd in enumerate(data_now['data']):
    dins = {'Date':dd[0][:4]+'/'+dd[0][4:6]+'/'+dd[0][6:],
            'Open':dd[1],
            'High':dd[3],
            'Low':dd[4],
            'Close':dd[2],
            'Adj Close':dd[2],
            'Volume':dd[5]}
    df = df.append(pd.DataFrame(dins, index=[0]))
df.to_csv('../dataset/'+filename+'.csv',index=False,sep=',')

In [8]:
money = 10000 # set the parameter initmoney
if money == 10000:
    pars = ''
else:
    pars = str(money)
pars = ''
# weight
# filename = '47#IFL8' # 沪深主连
# filename = '47#IH50' # 上证50 ???
filename = '47#IC500' # 中证500
# filename = 'NYKAU' # 纽约黄金期货价格
# filename = '30#AUL9' # 黄金指数
# filename = 'SH#601952' # SKNF
# filename = 'SH#512880'

In [9]:
df = pd.read_csv('../dataset/'+filename+'.csv')
df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
1290,2020/08/19,6782.59,6782.59,6661.99,6666.27,6666.27,19430913600
1291,2020/08/20,6622.01,6672.49,6570.69,6593.78,6593.78,15764625800
1292,2020/08/21,6628.9,6686.02,6589.2,6643.94,6643.94,14296349300
1293,2020/08/24,6667.3,6702.75,6589.93,6688.96,6688.96,14302035400
1294,2020/08/25,6698.12,6734.44,6631.69,6652.55,6652.55,14709813000


In [10]:
close = (1*df.Close.values).tolist()#[1000:]
date = (df.Date.values).tolist()#[1000:]
while True:
    time_now = float(time.strftime('%H',time.localtime(time.time())))+\
               float(time.strftime('%M',time.localtime(time.time())))/60
    response = requests.get(url_sina,headers = headers)
    response.encoding = 'utf-8'
    data_now = response.text.split(',')
    date_now = time.strftime('%Y/%m/%d',time.localtime(time.time()))

    if date[-1].replace('-','/') == date_now:
        close.pop()
        date.pop()
    close.append(float(data_now[3]))
    date.append(date_now)

    close.append(close[-1])
    date.append(date[-1])

    window_size = 30
    skip = 1
    l = len(close) - 1
    initial_money = money
    
    model = Model(window_size, 500, 3)
    model.set_weights(np.load('../log/free_agent/'+pars+filename+'_evolution_strategy_agent.npy',
                              allow_pickle=True).tolist())
    agent = Agent(model, money, 5, 5)
    
    agent.buy()
    
    close.pop()
    date.pop()
    
    if time_now>=15 or time_now<=9.5 or (time_now>11.5 and time_now<13.5):
        print("\nNot trading time.")
        break
    time.sleep( 5 )

2020-08-25 08:16:14: buy 5 units at price 16554.194500, total balance 213880.285500
Not trading time.


In [11]:
print(date[-10:])
print(close[-10:])

print(time.strftime('%Y/%m/%d',time.localtime(time.time())))
date_now = time.strftime('%Y/%m/%d',time.localtime(time.time()))
print(','.join([date_now, data_now[1],data_now[4],data_now[5],data_now[3],data_now[3],data_now[8]]))

['2020/08/12', '2020/08/13', '2020/08/14', '2020/08/17', '2020/08/18', '2020/08/19', '2020/08/20', '2020/08/21', '2020/08/24', '2020/08/25']
[6528.4, 6552.66, 6623.56, 6744.55, 6789.4, 6666.27, 6593.78, 6643.94, 6688.96, 3310.8389]
2020/08/25
2020/08/25,3312.0329,3335.0343,3300.1984,3310.8389,3310.8389,28449955


In [12]:
print("balance:"+str(1000000-(3298.72)*5-6652.55*5))
print("total:"+str(1000000+(3310.84-3298.72)*5+(6652.55-6652.55)*5))
print(time.strftime('%H:%M:%S',time.localtime(time.time())))
 

balance:950243.65
total:1000060.6
08:16:16
