# Cryptocurrency Trader Agent
Deep Q-Learning Implementation

In [3]:
from environment.env import Coin
from environment.portfolio import Portfolio
from environment.simulator import Simulator
from environment.simulator import Action

### Neural Networks

In [4]:
# Q Value Function Approximator
# Neural Network Implementation

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import backend as K

class QValue_NN:
    
    # init neural network
    def __init__(self, state_size, action_size, units):
        # define input and target shapes
        self._state_size = state_size
        self._action_size = action_size

        self._units = units
        
        # init model
        self._model = self._build_model()
    
    
    # define loss function
    def _huber_loss(self, target, prediction):
        # sqrt(1+error^2)-1
        error = prediction - target
        return K.mean(K.sqrt(1+K.square(error))-1, axis=-1)

    
    # neural net for Deep-Q Learning Model
    def _build_model(self):
        model = Sequential()
        model.add(Dense(self._units, input_dim=self._state_size, activation='relu'))
        model.add(Dense(self._units, activation='relu'))
        model.add(Dense(self._action_size, activation='linear'))
        model.compile(loss=self._huber_loss, optimizer='adam')
        return model

    
    # online training
    def train(self, state, qvalues):
        state_reshape = np.reshape(state, [1, len(state)])
        self._model.fit(state_reshape, qvalues, epochs=1, verbose=0)
    
    
    # get q-values based on state
    def predict(self, state):
        state_reshape = np.reshape(state, [1, len(state)])
        return self._model.predict(state_reshape)
    
    def set_weights(self, model_weights):
        self._model.set_weights(model_weights)
        
    def get_weights(self):
        return self._model.get_weights()
    

### Deep Q Learning Implementation

In [5]:
# Cryptocurrency Trader Q-Learning Implementation

import random
import numpy as np
from collections import deque

class Crypto_Trader:

    def __init__(self, gamma = 0.95, epsilon = 1.0, epsilon_min = 0.01, epsilon_decay = 0.99, num_episodes = 1000,
                num_neutron = 24, num_coins_per_order = 100, init_capital = 1000, coin_name = 'ethereum'):

        self.memory = deque(maxlen=2000)
        self.batch_size = 32
        
        # Reward Discount Rate
        self.gamma = gamma

        # Esiplon (exploration factor)
        self.epsilon = epsilon
        
        self.epsilon_min = epsilon_min
        
        # Reduce exploration overtime
        self.epsilon_decay = epsilon_decay

        # number of episodes for training
        self.num_episodes = num_episodes
        
        # init simulator
        self.simulator = Simulator(num_coins_per_order, init_capital, Coin(coin_name))
        
        # init NN model
        self.model = QValue_NN(self.simulator.get_state_size(), self.simulator.get_action_size(), num_neutron)
        self.target_model = QValue_NN(self.simulator.get_state_size(), self.simulator.get_action_size(), num_neutron)

        
        
    def act(self, state):
        # Choose action by e-greedy
        if np.random.rand() <= self.epsilon:
            #print 'random'
            return self.simulator.get_ran_action()
        
        # Get Q values, choose action by Q values
        act_values = self.model.predict(state)
        return Action(np.argmax(act_values))
    
    
    
    def remember(self, state, action, reward, next_state, isDone):
        self.memory.append((state, action, reward, next_state, isDone))
        
        
    def update_target_model(self):
        # copy weights from model to target_model
        self.target_model.set_weights(self.model.get_weights())
        
        
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, self.batch_size)
        
        for state, action, reward, next_state, isDone in minibatch:
            target = self.model.predict(state)
            if isDone:
                target[0][action.value] = reward
            else:
                a = self.model.predict(next_state)[0]
                t = self.target_model.predict(next_state)[0]
                target[0][action.value] = reward + self.gamma * t[np.argmax(a)]
            self.model.train(state, target)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    
        
    def train(self):
        
        for i in range(self.num_episodes):
            
            self.simulator.reset()
            state = self.simulator.get_current_state()
            
            while (True):
                
                action = self.act(state)
                #print action
                
                # step to the next state and reward based on action
                next_state, reward, isDone = self.simulator.act_and_step(action)
                #print next_state
                #print reward
                
                self.remember(state, action, reward, next_state, isDone)
                state = next_state
                
                if isDone:
                    self.update_target_model()
                    print("episode: {}/{}, reward: {}, epsilon: {:.2}"
                          .format(i+1, self.num_episodes, reward, self.epsilon))
                    #print self.simulator.get_current_holdings()
                    break
                    
            if len(self.memory) > self.batch_size:
                self.replay(self.batch_size)
                
        self.epsilon = 0.0 # now that we are done training, make our action deterministic

In [6]:
trader = Crypto_Trader(num_episodes = 1000)

In [7]:
trader.train()

episode: 1/1000, reward: 2414.55667324, epsilon: 1.0
episode: 2/1000, reward: 4247.31506444, epsilon: 0.99
episode: 3/1000, reward: 3201.67729264, epsilon: 0.98
episode: 4/1000, reward: 2754.5856353, epsilon: 0.97
episode: 5/1000, reward: 3118.05127385, epsilon: 0.96
episode: 6/1000, reward: 2905.47488433, epsilon: 0.95
episode: 7/1000, reward: 2626.69140139, epsilon: 0.94
episode: 8/1000, reward: 6893.2645439, epsilon: 0.93
episode: 9/1000, reward: 3033.42422418, epsilon: 0.92
episode: 10/1000, reward: 508.103779441, epsilon: 0.91
episode: 11/1000, reward: 5296.03828463, epsilon: 0.9
episode: 12/1000, reward: 1222.58480338, epsilon: 0.9
episode: 13/1000, reward: 8744.80971638, epsilon: 0.89
episode: 14/1000, reward: 1096.58088646, epsilon: 0.88
episode: 15/1000, reward: 1264.00893576, epsilon: 0.87
episode: 16/1000, reward: 9803.19565723, epsilon: 0.86
episode: 17/1000, reward: 17431.1580759, epsilon: 0.85
episode: 18/1000, reward: 399.808508645, epsilon: 0.84
episode: 19/1000, reward

episode: 150/1000, reward: -4.26007555417, epsilon: 0.22
episode: 151/1000, reward: 7173.61179912, epsilon: 0.22
episode: 152/1000, reward: 227.550569166, epsilon: 0.22
episode: 153/1000, reward: 29.6716724155, epsilon: 0.22
episode: 154/1000, reward: 4969.84266571, epsilon: 0.21
episode: 155/1000, reward: 4554.49306222, epsilon: 0.21
episode: 156/1000, reward: 2844.18532996, epsilon: 0.21
episode: 157/1000, reward: 1638.33798202, epsilon: 0.21
episode: 158/1000, reward: 76.4242469313, epsilon: 0.21
episode: 159/1000, reward: 505.332110553, epsilon: 0.2
episode: 160/1000, reward: 7966.73763601, epsilon: 0.2
episode: 161/1000, reward: 21348.6955903, epsilon: 0.2
episode: 162/1000, reward: 16460.0037354, epsilon: 0.2
episode: 163/1000, reward: 19126.2341649, epsilon: 0.2
episode: 164/1000, reward: 77.3528857677, epsilon: 0.19
episode: 165/1000, reward: 440.775466972, epsilon: 0.19
episode: 166/1000, reward: 1154.59038838, epsilon: 0.19
episode: 167/1000, reward: 22078.3022668, epsilon: 0

episode: 296/1000, reward: 21078.3746652, epsilon: 0.052
episode: 297/1000, reward: 21058.7497081, epsilon: 0.051
episode: 298/1000, reward: 21123.9474348, epsilon: 0.051
episode: 299/1000, reward: -3.76856135569, epsilon: 0.05
episode: 300/1000, reward: 18652.9113229, epsilon: 0.05
episode: 301/1000, reward: 20613.3288424, epsilon: 0.049
episode: 302/1000, reward: 18328.5451657, epsilon: 0.049
episode: 303/1000, reward: 18891.4671471, epsilon: 0.048
episode: 304/1000, reward: 20330.5333182, epsilon: 0.048
episode: 305/1000, reward: 34.6411673511, epsilon: 0.047
episode: 306/1000, reward: 24.9232941725, epsilon: 0.047
episode: 307/1000, reward: 19641.1182231, epsilon: 0.046
episode: 308/1000, reward: 20881.6045978, epsilon: 0.046
episode: 309/1000, reward: 18903.3946062, epsilon: 0.045
episode: 310/1000, reward: 20657.8498216, epsilon: 0.045
episode: 311/1000, reward: 19963.1105311, epsilon: 0.044
episode: 312/1000, reward: 20167.3371613, epsilon: 0.044
episode: 313/1000, reward: -6.50

episode: 441/1000, reward: 20656.0184222, epsilon: 0.012
episode: 442/1000, reward: 20438.3436288, epsilon: 0.012
episode: 443/1000, reward: 21475.4434519, epsilon: 0.012
episode: 444/1000, reward: 22473.1572828, epsilon: 0.012
episode: 445/1000, reward: 20906.5904139, epsilon: 0.012
episode: 446/1000, reward: 19893.1309483, epsilon: 0.011
episode: 447/1000, reward: 20761.4338997, epsilon: 0.011
episode: 448/1000, reward: 20939.6043334, epsilon: 0.011
episode: 449/1000, reward: 20392.3731821, epsilon: 0.011
episode: 450/1000, reward: 21032.0633472, epsilon: 0.011
episode: 451/1000, reward: 20654.8089692, epsilon: 0.011
episode: 452/1000, reward: 20293.5651619, epsilon: 0.011
episode: 453/1000, reward: 20654.8089692, epsilon: 0.011
episode: 454/1000, reward: 20060.5369261, epsilon: 0.011
episode: 455/1000, reward: 19707.546377, epsilon: 0.01
episode: 456/1000, reward: 21086.9130237, epsilon: 0.01
episode: 457/1000, reward: 20327.624395, epsilon: 0.01
episode: 458/1000, reward: 20242.203

episode: 583/1000, reward: 18862.9924393, epsilon: 0.0099
episode: 584/1000, reward: 20526.7611188, epsilon: 0.0099
episode: 585/1000, reward: 21561.616001, epsilon: 0.0099
episode: 586/1000, reward: 20063.0299767, epsilon: 0.0099
episode: 587/1000, reward: 20654.8089692, epsilon: 0.0099
episode: 588/1000, reward: 20647.7372749, epsilon: 0.0099
episode: 589/1000, reward: 20025.1279076, epsilon: 0.0099
episode: 590/1000, reward: 20224.1479811, epsilon: 0.0099
episode: 591/1000, reward: 9.27672256447, epsilon: 0.0099
episode: 592/1000, reward: 8.57152061907, epsilon: 0.0099
episode: 593/1000, reward: -3.32668186089, epsilon: 0.0099
episode: 594/1000, reward: 139.896026124, epsilon: 0.0099
episode: 595/1000, reward: 20855.6445978, epsilon: 0.0099
episode: 596/1000, reward: 6.60896900585, epsilon: 0.0099
episode: 597/1000, reward: 19707.546377, epsilon: 0.0099
episode: 598/1000, reward: 19917.4676772, epsilon: 0.0099
episode: 599/1000, reward: 20783.3228624, epsilon: 0.0099
episode: 600/10

episode: 726/1000, reward: 21030.4319681, epsilon: 0.0099
episode: 727/1000, reward: 20678.5455349, epsilon: 0.0099
episode: 728/1000, reward: 20790.4036929, epsilon: 0.0099
episode: 729/1000, reward: -1.91593662628, epsilon: 0.0099
episode: 730/1000, reward: 20710.7952508, epsilon: 0.0099
episode: 731/1000, reward: 20649.0011383, epsilon: 0.0099
episode: 732/1000, reward: -0.41843811861, epsilon: 0.0099
episode: 733/1000, reward: 87.3623861765, epsilon: 0.0099
episode: 734/1000, reward: 20507.1011819, epsilon: 0.0099
episode: 735/1000, reward: -16.6392320485, epsilon: 0.0099
episode: 736/1000, reward: 16445.5775156, epsilon: 0.0099
episode: 737/1000, reward: 20351.7764338, epsilon: 0.0099
episode: 738/1000, reward: 19025.7087038, epsilon: 0.0099
episode: 739/1000, reward: 20811.8636276, epsilon: 0.0099
episode: 740/1000, reward: 2429.59183673, epsilon: 0.0099
episode: 741/1000, reward: 20339.70602, epsilon: 0.0099
episode: 742/1000, reward: 3881.44889253, epsilon: 0.0099
episode: 743/

episode: 869/1000, reward: 20835.3313785, epsilon: 0.0099
episode: 870/1000, reward: 20791.7683148, epsilon: 0.0099
episode: 871/1000, reward: 21002.9983528, epsilon: 0.0099
episode: 872/1000, reward: 20348.2758475, epsilon: 0.0099
episode: 873/1000, reward: 20635.2727715, epsilon: 0.0099
episode: 874/1000, reward: 20654.8089692, epsilon: 0.0099
episode: 875/1000, reward: 20654.8089692, epsilon: 0.0099
episode: 876/1000, reward: 20708.2246975, epsilon: 0.0099
episode: 877/1000, reward: 20654.8089692, epsilon: 0.0099
episode: 878/1000, reward: 0.0, epsilon: 0.0099
episode: 879/1000, reward: 20654.8089692, epsilon: 0.0099
episode: 880/1000, reward: 20980.7387742, epsilon: 0.0099
episode: 881/1000, reward: 4719.93904711, epsilon: 0.0099
episode: 882/1000, reward: 581.668194317, epsilon: 0.0099
episode: 883/1000, reward: 14090.3119029, epsilon: 0.0099
episode: 884/1000, reward: 2225.87959343, epsilon: 0.0099
episode: 885/1000, reward: 5056.97984444, epsilon: 0.0099
episode: 886/1000, rewar

## Benchmarking Crypto_Trader

In [11]:
print "epsilon is now %.4f" % trader.epsilon

epsilon is now 0.0000


In [21]:
def run_crypto_trader(trader, num_coins_per_order, init_capital, coin_name):
    simulator = Simulator(num_coins_per_order, init_capital, Coin(coin_name))
    state = simulator.get_current_state()

    i = 0
    while True:
        action = trader.act(state)
                
        next_state, reward, isDone = simulator.act_and_step(action)
        print("time: {}, action: {}, reward: {}, state: {}, next_state: {}"
              .format(i+1, action, reward, str(state), str(next_state)))
        state = next_state
        i += 1
        
        if isDone:
            break
    
    return simulator

In [22]:
simulator_crpyto = run_crypto_trader(trader, 100, 1000, "ethereum")
print simulator_crpyto.get_current_holdings()

time: 1, action: Action.BUY, reward: 0.0, state: [1000.0, 0.0, 0.0, 0, 0, 0], next_state: [1000.0, 0.0, 0.0, 0, 0, 0]
time: 2, action: Action.BUY, reward: -0.4, state: [1000.0, 0.0, 0.0, 0, 0, 0], next_state: [996.0, -0.002, 0.002, -15.874507866387544, 0, 0]
time: 3, action: Action.BUY, reward: -42.07728, state: [996.0, -0.002, 0.002, -15.874507866387544, 0, 0], next_state: [579.22720000000004, -0.14081552878179385, 0.19632159516620867, -11.386303261562263, 0, 0]
time: 4, action: Action.BUY, reward: -41.84169, state: [579.22720000000004, -0.14081552878179385, 0.19632159516620867, -11.386303261562263, 0, 0], next_state: [581.58309999999994, -0.10459481761146303, 0.18122485011583575, -9.1620506453460848, 0, 0]
time: 5, action: Action.BUY, reward: -42.07777, state: [581.58309999999994, -0.10459481761146303, 0.18122485011583575, -9.1620506453460848, 0, 0], next_state: [579.2222999999999, -0.084487707115848826, 0.16700640888761442, -8.0308341467670203, 0, 0]
time: 6, action: Action.BUY, rew

time: 83, action: Action.BUY, reward: -39.2357408431, state: [494.5837209278688, -0.0024400451051689366, 0.11060888757038101, -0.35019351579408781, 1, 0], next_state: [607.64259156918024, 0.00034349759572426658, 0.11279303838766294, 0.048343899263258966, 0, 0]
time: 84, action: Action.BUY, reward: -29.5335583607, state: [607.64259156918024, 0.00034349759572426658, 0.11279303838766294, 0.048343899263258966, 0, 0], next_state: [704.66441639344271, 0.0022402325075168966, 0.11344344403868015, 0.3134829770417451, 0, 0]
time: 85, action: Action.BUY, reward: -15.579807541, state: [704.66441639344271, 0.0022402325075168966, 0.11344344403868015, 0.3134829770417451, 0, 0], next_state: [844.20192459016391, 0.0045435215601366723, 0.11473291555474373, 0.6286440852544769, 0, 0]
time: 86, action: Action.BUY, reward: -26.7428081967, state: [844.20192459016391, 0.0045435215601366723, 0.11473291555474373, 0.6286440852544769, 0, 0], next_state: [732.57191803278693, 0.002953115437824878, 0.115002498220464

time: 150, action: Action.BUY, reward: -34.5269080915, state: [660.9898740154099, 0.0016122639921012313, 0.093994766131300811, 0.27229066552016729, 0, 0], next_state: [654.73091908524589, 0.0015383884770983913, 0.093685266366577466, 0.26067236533972965, 0, 0]
time: 151, action: Action.BUY, reward: -32.1816314225, state: [654.73091908524589, 0.0015383884770983913, 0.093685266366577466, 0.26067236533972965, 0, 0], next_state: [678.18368577540991, 0.0017654221037244375, 0.093415926992215501, 0.30000459210133851, 0, 0]
time: 152, action: Action.BUY, reward: -33.5001213374, state: [678.18368577540991, 0.0017654221037244375, 0.093415926992215501, 0.30000459210133851, 0, 0], next_state: [664.99878662590163, 0.0016259029664470116, 0.093123913699142233, 0.27716199207683934, 0, 0]
time: 153, action: Action.BUY, reward: -33.7177300814, state: [664.99878662590163, 0.0016259029664470116, 0.093123913699142233, 0.27716199207683934, 0, 0], next_state: [662.82269918557392, 0.0015938884509075673, 0.0928

time: 217, action: Action.BUY, reward: 728.852798689, state: [6830.3610262295078, 0.013418442075036121, 0.095745794373305854, 2.2247573971166976, 0, 0], next_state: [8288.5279868852467, 0.014340399236291329, 0.096481152548667515, 2.3594948284724713, 1, 0]
time: 218, action: Action.BUY, reward: 682.805420984, state: [8288.5279868852467, 0.014340399236291329, 0.096481152548667515, 2.3594948284724713, 1, 0], next_state: [7828.0542098360665, 0.014019775590457169, 0.096375413019862174, 2.3092719493698382, 0, 0]
time: 219, action: Action.BUY, reward: 676.526233115, state: [7828.0542098360665, 0.014019775590457169, 0.096375413019862174, 2.3092719493698382, 0, 0], next_state: [7765.2623311475418, 0.013919130997010646, 0.096166607848359786, 2.2976723360539419, 0, 0]
time: 220, action: Action.BUY, reward: 842.575867869, state: [7765.2623311475418, 0.013919130997010646, 0.096166607848359786, 2.2976723360539419, 0, 0], next_state: [9425.7586786885258, 0.014827846210603422, 0.096885613988427335, 2.

time: 276, action: Action.BUY, reward: 553.733225902, state: [6502.447881967214, 0.010948287661859879, 0.091935397279650458, 1.8904435479079393, 0, 0], next_state: [6537.3322590163934, 0.01092805765897978, 0.091769309707760427, 1.890365502631008, 0, 0]
time: 277, action: Action.BUY, reward: 562.105476393, state: [6537.3322590163934, 0.01092805765897978, 0.091769309707760427, 1.890365502631008, 0, 0], next_state: [6621.0547639344268, 0.010934840231725335, 0.091603580665076179, 1.894960939473342, 0, 0]
time: 278, action: Action.BUY, reward: 547.454038033, state: [6621.0547639344268, 0.010934840231725335, 0.091603580665076179, 1.894960939473342, 0, 0], next_state: [6474.5403803278687, 0.010815907150405705, 0.0914601003008437, 1.877290781187223, 0, 0]
time: 279, action: Action.BUY, reward: 557.919351148, state: [6474.5403803278687, 0.010815907150405705, 0.0914601003008437, 1.877290781187223, 0, 0], next_state: [6579.1935114754097, 0.01083507520041661, 0.091296605570633849, 1.88398555923134

time: 330, action: Action.BUY, reward: 767.923300984, state: [8790.8630163934431, 0.01060490618978316, 0.089733533890638831, 1.8760842177149237, 0, 0], next_state: [8679.2330098360653, 0.010534290071940143, 0.08960662577745003, 1.866231086852346, 0, 0]
time: 331, action: Action.BUY, reward: 752.574175082, state: [8679.2330098360653, 0.010534290071940143, 0.08960662577745003, 1.866231086852346, 0, 0], next_state: [8525.7417508196722, 0.010449035759155409, 0.089484568917946566, 1.8536525611133674, 0, 0]
time: 332, action: Action.BUY, reward: 745.597299672, state: [8525.7417508196722, 0.010449035759155409, 0.089484568917946566, 1.8536525611133674, 0, 0], next_state: [8455.9729967213116, 0.010392914247665043, 0.089355535139177045, 1.8463590277007365, 0, 0]
time: 333, action: Action.BUY, reward: 716.294422951, state: [8455.9729967213116, 0.010392914247665043, 0.089355535139177045, 1.8463590277007365, 0, 0], next_state: [8162.9442295081963, 0.010257639834469246, 0.089255306683781094, 1.82437

time: 399, action: Action.BUY, reward: 705.829109836, state: [8155.9673540983604, 0.0088448288404099889, 0.08487163585890832, 1.6543489893060936, 0, 0], next_state: [8058.2910983606571, 0.0087926461919272773, 0.08477160621103684, 1.646529272934073, 0, 0]
time: 400, action: Action.BUY, reward: 696.061484262, state: [8058.2910983606571, 0.0087926461919272773, 0.08477160621103684, 1.646529272934073, 0, 0], next_state: [7960.6148426229511, 0.0087403615461444286, 0.08467201660843901, 1.638663441322977, 0, 0]
time: 401, action: Action.BUY, reward: 713.503672787, state: [7960.6148426229511, 0.0087403615461444286, 0.08467201660843901, 1.638663441322977, 0, 0], next_state: [8135.0367278688527, 0.0087732050453627475, 0.084568925708105203, 1.646826081092029, 0, 0]
time: 402, action: Action.BUY, reward: 751.876487541, state: [8135.0367278688527, 0.0087732050453627475, 0.084568925708105203, 1.646826081092029, 0, 0], next_state: [8518.7648754098373, 0.0088687189913214358, 0.084485328193226628, 1.666

time: 472, action: Action.BUY, reward: 573.96616459, state: [6614.0778885245909, 0.007089804760382035, 0.07889879966332243, 1.4264749517115418, 0, 0], next_state: [6739.6616459016395, 0.0071150114065934118, 0.078817074590490713, 1.4330309153218854, 0, 0]
time: 473, action: Action.BUY, reward: 568.384664262, state: [6739.6616459016395, 0.0071150114065934118, 0.078817074590490713, 1.4330309153218854, 0, 0], next_state: [6683.8466426229515, 0.0070824604871313444, 0.07873689027748719, 1.4279275485749297, 0, 0]
time: 474, action: Action.BUY, reward: 571.873101967, state: [6683.8466426229515, 0.0070824604871313444, 0.07873689027748719, 1.4279275485749297, 0, 0], next_state: [6718.7310196721319, 0.0070785295719276589, 0.078653837096074999, 1.4286419775651338, 0, 0]
time: 475, action: Action.BUY, reward: 592.106040656, state: [6718.7310196721319, 0.0070785295719276589, 0.078653837096074999, 1.4286419775651338, 0, 0], next_state: [6921.0604065573771, 0.0071270257757255137, 0.078578093811028357,

time: 549, action: Action.BUY, reward: 698.154546885, state: [7765.2623311475418, 0.0065369572219249146, 0.075255476062174448, 1.3789159842129308, 0, 0], next_state: [7981.5454688524587, 0.0065757836213278928, 0.075192399607595689, 1.3882696837605264, 0, 0]
time: 550, action: Action.BUY, reward: 691.875359016, state: [7981.5454688524587, 0.0065757836213278928, 0.075192399607595689, 1.3882696837605264, 0, 0], next_state: [7918.7535901639349, 0.0065495237731670545, 0.075126531434044311, 1.3839380665339411, 0, 0]
time: 551, action: Action.BUY, reward: 697.456859344, state: [7918.7535901639349, 0.0065495237731670545, 0.075126531434044311, 1.3839380665339411, 0, 0], next_state: [7974.5685934426228, 0.0065504292802026493, 0.075058330582586055, 1.3853870760205438, 0, 0]
time: 552, action: Action.BUY, reward: 705.131422295, state: [7974.5685934426228, 0.0065504292802026493, 0.075058330582586055, 1.3853870760205438, 0, 0], next_state: [8051.3142229508194, 0.006555996975393173, 0.074990426030016

time: 612, action: Action.BUY, reward: 2989.36043148, state: [29163.339213114752, 0.0084170518205008964, 0.076702468776436911, 1.7420111434194181, 0, 0], next_state: [30893.604314754102, 0.0085002431468410421, 0.076667360324030043, 1.7600342066093182, 0, 0]
time: 613, action: Action.BUY, reward: 2919.59167738, state: [30893.604314754102, 0.0085002431468410421, 0.076667360324030043, 1.7600342066093182, 0, 0], next_state: [30195.916773770496, 0.0084495354758528443, 0.076615070560558801, 1.7507288892036528, 0, 0]
time: 614, action: Action.BUY, reward: 2934.94080328, state: [30195.916773770496, 0.0084495354758528443, 0.076615070560558801, 1.7507288892036528, 0, 0], next_state: [30349.408032786887, 0.0084440528110674099, 0.076552775366088024, 1.7510166304546797, 0, 0]
time: 615, action: Action.BUY, reward: 2932.15005311, state: [30349.408032786887, 0.0084440528110674099, 0.076552775366088024, 1.7510166304546797, 0, 0], next_state: [30321.500531147543, 0.008428827456529276, 0.076491442402365

time: 680, action: Action.BUY, reward: 25099.0786052, state: [250127.9603180328, 0.011083116378226114, 0.077480069607464647, 2.2707648434183727, 0, 0], next_state: [251990.78605245904, 0.011077769870323479, 0.077423203353238698, 2.2713364641625207, 0, 0]
time: 681, action: Action.BUY, reward: 25706.0667659, state: [251990.78605245904, 0.011077769870323479, 0.077423203353238698, 2.2713364641625207, 0, 0], next_state: [258060.66765901641, 0.011096874044519662, 0.07736794114556246, 2.2768786619332557, 0, 0]
time: 682, action: Action.BUY, reward: 26378.6375554, state: [258060.66765901641, 0.011096874044519662, 0.07736794114556246, 2.2768786619332557, 0, 0], next_state: [264786.37555409834, 0.011118817787502695, 0.077313319726915367, 2.2829929054410227, 0, 0]
time: 683, action: Action.BUY, reward: 25844.208899, state: [264786.37555409834, 0.011118817787502695, 0.077313319726915367, 2.2829929054410227, 0, 0], next_state: [259442.08899016396, 0.01107298732288748, 0.077265971111456833, 2.27497

time: 741, action: Action.BUY, reward: 20120.3803128, state: [209271.37791803278, 0.010156316025761497, 0.077381698319721873, 2.0835226176908268, 0, 0], next_state: [202203.80312786886, 0.010097033149366996, 0.077346280203795109, 2.0723095116465284, 0, 0]
time: 742, action: Action.BUY, reward: 21026.6764285, state: [202203.80312786886, 0.010097033149366996, 0.077346280203795109, 2.0723095116465284, 0, 0], next_state: [211266.76428524591, 0.01014383084503055, 0.077304639396784822, 2.0830356858431891, 0, 0]
time: 743, action: Action.BUY, reward: 20949.2331115, state: [211266.76428524591, 0.01014383084503055, 0.077304639396784822, 2.0830356858431891, 0, 0], next_state: [210492.33111475411, 0.010125244713424303, 0.077254258869659351, 2.0805749638157356, 0, 0]
time: 744, action: Action.BUY, reward: 20564.1095889, state: [210492.33111475411, 0.010125244713424303, 0.077254258869659351, 2.0805749638157356, 0, 0], next_state: [206641.09588852461, 0.010087043684443188, 0.077209345195163953, 2.07