In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [8]:
df = pd.read_csv('../dataset/gold_data.csv', encoding='GBK')
df.head()

Unnamed: 0,Date,Close,Open,High,Low,Vol,Per
0,2012年1月20日,1663.7,1652.8,1664.3,1652.8,0.01K,0.58%
1,2012年1月23日,1678.0,1676.9,1680.0,1669.9,0.01K,0.86%
2,2012年1月24日,1664.2,1666.9,1668.2,1664.4,0.01K,-0.82%
3,2012年1月25日,1699.8,1673.0,1706.2,1658.3,0.24K,2.14%
4,2012年1月26日,1726.3,1729.0,1729.0,1721.0,0.02K,1.56%


In [4]:
from collections import deque
import random


class Agent:
    def __init__(self, state_size, window_size, trend, skip, batch_size):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        self.action_size = 3
        self.batch_size = batch_size
        self.memory = deque(maxlen = 1000)
        self.inventory = []

        self.gamma = 0.95
        self.epsilon = 0.5
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.999

        tf.reset_default_graph()
        self.sess = tf.InteractiveSession()
        self.X = tf.placeholder(tf.float32, [None, self.state_size])
        self.Y = tf.placeholder(tf.float32, [None, self.action_size])
        feed = tf.layers.dense(self.X, 256, activation = tf.nn.relu)
        #输入feed 输出action的概率分布
        self.logits = tf.layers.dense(feed, self.action_size)
        #计算损失
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        #梯度下降
        self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize(
            self.cost
        )
        self.sess.run(tf.global_variables_initializer())

    def act(self, state):
        #随机采取行动和预测行动相结合
        if random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        return np.argmax(
            self.sess.run(self.logits, feed_dict = {self.X: state})[0]
        )
    
    def get_state(self, t):
        window_size = self.window_size + 1
        d = t - window_size + 1
        block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
        res = []
        for i in range(window_size - 1):
            res.append(block[i + 1] - block[i])
        return np.array([res])

    def replay(self, batch_size):
        """
       核心函数 
        """
        mini_batch = []
        l = len(self.memory)
        #当sell.memmory 长度大于batch_size时进入循环
        for i in range(l - batch_size, l):
            mini_batch.append(self.memory[i])
        replay_size = len(mini_batch)
        X = np.empty((replay_size, self.state_size))
        Y = np.empty((replay_size, self.action_size))
        states = np.array([a[0][0] for a in mini_batch])
        new_states = np.array([a[3][0] for a in mini_batch])
        Q = self.sess.run(self.logits, feed_dict = {self.X: states})
        Q_new = self.sess.run(self.logits, feed_dict = {self.X: new_states})
        #跟新当前网络mini_batch次
        for i in range(len(mini_batch)):
            state, action, reward, next_state, done = mini_batch[i]
            target = Q[i]
            target[action] = reward
            if not done:
                target[action] += self.gamma * np.amax(Q_new[i])
            X[i] = state
            Y[i] = target
        cost, _ = self.sess.run(
            [self.cost, self.optimizer], feed_dict = {self.X: X, self.Y: Y}
        )
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
        return cost
    
    def buy(self, initial_money):
        starting_money = initial_money
        states_sell = []
        states_buy = []
        inventory = []
        state = self.get_state(0)
        for t in range(0, len(self.trend) - 1, self.skip):
            action = self.act(state)
            next_state = self.get_state(t + 1)
            
            if action == 1 and initial_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
                inventory.append(self.trend[t])
                initial_money -= self.trend[t]
                states_buy.append(t)
                print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))
                
                
            elif action == 2 and len(inventory):
                bought_price = inventory.pop(0)
                initial_money += self.trend[t]
                states_sell.append(t)
                try:
                    invest = ((close[t] - bought_price) / bought_price) * 100
                except:
                    invest = 0
                print(
                    'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
                    % (t, close[t], invest, initial_money)
                )
            
            state = next_state
        invest = ((initial_money - starting_money) / starting_money) * 100
        total_gains = initial_money - starting_money
        return states_buy, states_sell, total_gains, invest
        
    def train(self, iterations, checkpoint, initial_money):
        for i in range(iterations):
            total_profit = 0#总利润
            inventory = []#投资列表
            state = self.get_state(0)#第一个状态
            starting_money = initial_money#开始资金
            for t in range(0, len(self.trend) - 1, self.skip):#完成一局游戏的循环
                action = self.act(state)#预测当前状态下的action
                next_state = self.get_state(t + 1)# 获取下一个状态
                
                if action == 1 and starting_money >= self.trend[t] and t < (len(self.trend) - self.half_window):#如果是买信号 and 资金充足 
                    inventory.append(self.trend[t])#更新inventory
                    starting_money -= self.trend[t]#更新starting_money
                
                elif action == 2 and len(inventory) > 0:#如果是卖信号且有库存
                    bought_price = inventory.pop(0)#最晚的一笔交易
                    total_profit += self.trend[t] - bought_price#最旺那笔交易的利润
                    starting_money += self.trend[t]#更新starting_money
                    
                invest = ((starting_money - initial_money) / initial_money)#最晚那笔交易的盈利率
                
                #更新记忆额，类似于Q-learning中的Q表 （state, action, invest,next_state）
                self.memory.append((state, action, invest, 
                                    next_state, starting_money < initial_money))
                state = next_state
                batch_size = min(self.batch_size, len(self.memory))
                cost = self.replay(batch_size)
                
            #隔一个checkpoint, 打印total_profit,cost,starting_money
            if (i+1) % checkpoint == 0:
                print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost,
                                                                                  starting_money))

0


In [10]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
close = df.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size, 
              window_size = window_size, 
              trend = close, 
              skip = skip, 
              batch_size = batch_size)
agent.train(iterations = 200, checkpoint = 10, initial_money = initial_money)

Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
epoch: 10, total rewards: 1850.650000.3, cost: 1.138304, total money: 11850.650000
epoch: 20, total rewards: 870.750000.3, cost: 0.465532, total money: 10870.750000
epoch: 30, total rewards: 379.800000.3, cost: 0.267367, total money: 10379.800000
epoch: 40, total rewards: 918.150000.3, cost: 0.198401, total money: 10918.150000
epoch: 50, total rewards: 82.950000.3, cost: 0.156209, total money: 10082.950000
epoch: 60, total rewards: 564.660000.3, cost: 0.113436, total money: 10564.660000
epoch: 70, total rewards: 586.510000.3, cost: 0.095100, total money: 10586.510000
epoch: 80, total rewards: 1112.460000.3, cost: 0.088213, total money: 11112.460000
epoch: 90, total rewards: 236.160000.3, cost: 0.063571, total money: 10236.160000
epoch: 100, total rewards: 711.060000.3, cost: 0.053318, total money: 10711.060000
epoch: 110, total rewards: 561.660000.3, cost: 0

In [11]:
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)

day 1: buy 1 unit at price 1678.000000, total balance 8322.000000
day 3, sell 1 unit at price 1699.800000, investment 1.299166 %, total balance 10021.800000,
day 4: buy 1 unit at price 1726.300000, total balance 8295.500000
day 7, sell 1 unit at price 1737.800000, investment 0.666165 %, total balance 10033.300000,
day 8: buy 1 unit at price 1747.100000, total balance 8286.200000
day 10: buy 1 unit at price 1737.900000, total balance 6548.300000
day 11: buy 1 unit at price 1722.800000, total balance 4825.500000
day 12, sell 1 unit at price 1746.400000, investment -0.040066 %, total balance 6571.900000,
day 14, sell 1 unit at price 1739.000000, investment 0.063295 %, total balance 8310.900000,
day 15: buy 1 unit at price 1723.300000, total balance 6587.600000
day 16: buy 1 unit at price 1723.000000, total balance 4864.600000
day 17: buy 1 unit at price 1715.900000, total balance 3148.700000
day 18, sell 1 unit at price 1726.300000, investment 0.203158 %, total balance 4875.000000,
day 19

day 443, sell 1 unit at price 1322.700000, investment 1.208968 %, total balance 8165.900000,
day 445, sell 1 unit at price 1315.700000, investment 3.354281 %, total balance 9481.600000,
day 449: buy 1 unit at price 1352.400000, total balance 8129.200000
day 450: buy 1 unit at price 1352.000000, total balance 6777.200000
day 452, sell 1 unit at price 1349.000000, investment -0.251405 %, total balance 8126.200000,
day 453, sell 1 unit at price 1323.600000, investment -2.100592 %, total balance 9449.800000,
day 462: buy 1 unit at price 1268.300000, total balance 8181.500000
day 463: buy 1 unit at price 1286.200000, total balance 6895.300000
day 465, sell 1 unit at price 1272.200000, investment 0.307498 %, total balance 8167.500000,
day 467, sell 1 unit at price 1257.900000, investment -2.200280 %, total balance 9425.400000,
day 471: buy 1 unit at price 1241.400000, total balance 8184.000000
day 472, sell 1 unit at price 1237.800000, investment -0.289995 %, total balance 9421.800000,
day 4

day 732, sell 1 unit at price 1314.500000, investment -0.053224 %, total balance 9425.700000,
day 735: buy 1 unit at price 1332.500000, total balance 8093.200000
day 737, sell 1 unit at price 1325.800000, investment -0.502814 %, total balance 9419.000000,
day 739: buy 1 unit at price 1310.800000, total balance 8108.200000
day 741, sell 1 unit at price 1350.200000, investment 3.005798 %, total balance 9458.400000,
day 742: buy 1 unit at price 1345.500000, total balance 8112.900000
day 743: buy 1 unit at price 1342.300000, total balance 6770.600000
day 744, sell 1 unit at price 1333.500000, investment -0.891862 %, total balance 8104.100000,
day 745: buy 1 unit at price 1317.700000, total balance 6786.400000
day 747: buy 1 unit at price 1302.300000, total balance 5484.100000
day 748, sell 1 unit at price 1303.900000, investment -2.860761 %, total balance 6788.000000,
day 751: buy 1 unit at price 1289.700000, total balance 5498.300000
day 752: buy 1 unit at price 1285.600000, total balance

day 1173, sell 1 unit at price 1398.800000, investment 0.967230 %, total balance 4127.050000,
day 1175, sell 1 unit at price 1416.200000, investment 2.645503 %, total balance 5543.250000,
day 1176: buy 1 unit at price 1429.400000, total balance 4113.850000
day 1177: buy 1 unit at price 1423.900000, total balance 2689.950000
day 1178, sell 1 unit at price 1427.900000, investment 2.564287 %, total balance 4117.850000,
day 1180, sell 1 unit at price 1405.300000, investment 1.737494 %, total balance 5523.150000,
day 1181, sell 1 unit at price 1410.600000, investment 1.336207 %, total balance 6933.750000,
day 1182, sell 1 unit at price 1415.600000, investment -0.965440 %, total balance 8349.350000,
day 1185, sell 1 unit at price 1410.600000, investment -0.934054 %, total balance 9759.950000,
day 1192: buy 1 unit at price 1390.400000, total balance 8369.550000
day 1193, sell 1 unit at price 1384.000000, investment -0.460299 %, total balance 9753.550000,
day 1194: buy 1 unit at price 1383.200

day 1641, sell 1 unit at price 1414.700000, investment -0.576288 %, total balance 8519.130000,
day 1642: buy 1 unit at price 1414.200000, total balance 7104.930000
day 1643, sell 1 unit at price 1413.800000, investment 0.604853 %, total balance 8518.730000,
day 1646, sell 1 unit at price 1420.400000, investment 0.438410 %, total balance 9939.130000,
day 1648: buy 1 unit at price 1389.800000, total balance 8549.330000
day 1649, sell 1 unit at price 1392.100000, investment 0.165491 %, total balance 9941.430000,
day 1650: buy 1 unit at price 1390.000000, total balance 8551.430000
day 1654: buy 1 unit at price 1389.200000, total balance 7162.230000
day 1660: buy 1 unit at price 1396.700000, total balance 5765.530000
day 1661, sell 1 unit at price 1395.000000, investment 0.359712 %, total balance 7160.530000,
day 1662, sell 1 unit at price 1390.100000, investment 0.064785 %, total balance 8550.630000,
day 1666: buy 1 unit at price 1394.800000, total balance 7155.830000
day 1667: buy 1 unit 

day 2112: buy 1 unit at price 1599.600000, total balance 3865.980000
day 2113, sell 1 unit at price 1606.900000, investment 0.180798 %, total balance 5472.880000,
day 2114: buy 1 unit at price 1586.250000, total balance 3886.630000
day 2115, sell 1 unit at price 1587.050000, investment -0.254541 %, total balance 5473.680000,
day 2116, sell 1 unit at price 1624.000000, investment 1.984426 %, total balance 7097.680000,
day 2117: buy 1 unit at price 1632.200000, total balance 5465.480000
day 2118, sell 1 unit at price 1640.800000, investment 2.575644 %, total balance 7106.280000,
day 2119, sell 1 unit at price 1669.100000, investment 5.223010 %, total balance 8775.380000,
day 2120: buy 1 unit at price 1697.100000, total balance 7078.280000
day 2121: buy 1 unit at price 1670.200000, total balance 5408.080000
day 2122, sell 1 unit at price 1662.900000, investment 1.880897 %, total balance 7070.980000,
day 2123, sell 1 unit at price 1661.400000, investment -2.103588 %, total balance 8732.380

In [1]:
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.show()

NameError: name 'plt' is not defined