## Training Notebook
Code to train the agent

In [1]:
import numpy as np
from agent import Agent
from environment import CryptoEnv
from datetime import date, timedelta

In [2]:
agent = Agent()
env = CryptoEnv()

batchSize = 64
startDate = date(2019, 10, 1)
endDate = date(2022, 5, 31)
scoreArr = []

output_dir = 'model_output/btcbusd/'

### Helper functions

In [3]:
def flatten(state):
    """Returns flattened state"""
    state = np.concatenate((state[0].flatten(), [state[1]], [state[2]]))
    return np.reshape(state, (1, -1))

In [4]:
from random import shuffle

lastDayofMonth = {
    1: 31,
    2: 28,
    3: 31,
    4: 30,
    5: 31,
    6: 30,
    7: 31,
    8: 31,
    9: 30,
    10: 31,
    11: 30,
    12: 31
}


def shuffleDates(startDate, endDate, multi=1):
    """Generates an array of shuffle dates, each date is inserted for multiple times"""
    datesList = []
    count = startDate
    delta = timedelta(days=1)
    while (count != endDate + delta):
        # Due to bugs in binance data, we don't add the first and last date of a month
        if (count.day != 1 and not(count.day >= lastDayofMonth[count.month])):
            for i in range(multi):
                datesList.append(count)
        count += delta

    shuffle(datesList)

    return datesList


In [5]:
actionMap = {
    0: 0,
    1: 10,
    2: 50,
    3: 100,
    4: -10,
    5: -50,
    6: -100
}

## Main Training Loop

In [6]:
# We will iterate over the shuffled 1948 training days (see README)

episodes = shuffleDates(startDate, endDate)
print(f"Training for {len(episodes)} episodes")

for i, e in enumerate(episodes):
    print(f"Episode ({e}): ", end="")
    state = env.reset(e)
    state = flatten(state)

    done = False
    while not done:
        # Trains for 1440 timesteps per episode
        action = agent.act(state)
        nextState, done, reward, value = env.step(actionMap[action])
        nextState = flatten(nextState)
        agent.remember(state, action, nextState, done, reward)
        state = nextState
        if done:
            profitPercent = (value - 1000) / 1000 * 100
            print(f"{i}/{len(episodes)}, epsilon:{agent.epsilon:.4f}")
            print(
                f"Final BTC: {nextState[0,1440 * 5 + 0]:.5f}, BUSD: {nextState[0,1440 * 5 + 1]:.5f}, Total Value: ${value:.3f}, profit: {profitPercent:.2f}%")
            scoreArr.append(value)

    if len(agent.memory) > batchSize:
        agent.train(batchSize)

    if i % 50 == 0:
        agent.save(output_dir + f"weights_{i:04d}.hdf5")


Training for 909 episodes
Episode (2021-08-11): 0/909, epsilon:1.0000
Final BTC: 0.01851, BUSD: 154.58799, Total Value: $997.224, profit: -0.28%
Episode (2021-10-26): 1/909, epsilon:0.9685
Final BTC: 0.00958, BUSD: 421.92077, Total Value: $999.545, profit: -0.05%
Episode (2021-04-02): 2/909, epsilon:0.9380
Final BTC: 0.00000, BUSD: 1001.77765, Total Value: $1001.778, profit: 0.18%
Episode (2020-04-13): 3/909, epsilon:0.9084
Final BTC: 0.08035, BUSD: 454.90302, Total Value: $1005.737, profit: 0.57%
Episode (2022-05-23): 4/909, epsilon:0.8798
Final BTC: 0.00481, BUSD: 854.25761, Total Value: $994.208, profit: -0.58%
Episode (2020-08-08): 5/909, epsilon:0.8521
Final BTC: 0.02810, BUSD: 667.44544, Total Value: $998.144, profit: -0.19%
Episode (2019-12-08): 6/909, epsilon:0.8253
Final BTC: 0.07535, BUSD: 440.00000, Total Value: $1006.516, profit: 0.65%
Episode (2021-06-28): 7/909, epsilon:0.7993
Final BTC: 0.01825, BUSD: 360.00000, Total Value: $989.460, profit: -1.05%
Episode (2020-12-03):