## Training Notebook
Code to train the agent

In [1]:
import numpy as np
from agent import Agent
from environment import CryptoEnv
from datetime import date, timedelta
from keras.models import clone_model

In [2]:
agent = Agent(epsilonDecay=0.99)
env = CryptoEnv()

batchSize = 512
startDate = date(2019, 10, 1)
endDate = date(2022, 5, 31)
scoreArr = []

output_dir = 'model_output/btcbusd/'

### Helper functions

In [3]:
def flatten(state):
    """Returns flattened state"""
    state = np.concatenate((state[0].flatten(), [state[1]], [state[2]]))
    return np.reshape(state, (1, -1))

In [4]:
from random import shuffle

lastDayofMonth = {
    1: 31,
    2: 28,
    3: 31,
    4: 30,
    5: 31,
    6: 30,
    7: 31,
    8: 31,
    9: 30,
    10: 31,
    11: 30,
    12: 31
}


def shuffleDates(startDate, endDate, multi=1):
    """Generates an array of shuffle dates, each date is inserted for multiple times"""
    datesList = []
    count = startDate
    delta = timedelta(days=1)
    while (count != endDate + delta):
        # Due to bugs in binance data, we don't add the first and last date of a month
        if (count.day != 1 and not(count.day >= lastDayofMonth[count.month])):
            for i in range(multi):
                datesList.append(count)
        count += delta

    shuffle(datesList)

    return datesList


In [5]:
actionMap = {
    0: 0,
    1: 10,
    2: 50,
    3: 100,
    4: -10,
    5: -50,
    6: -100
}

## Main Training Loop

In [6]:
episodes = shuffleDates(startDate, endDate)
print(f"Training for {len(episodes)} episodes")

targetNetwork = clone_model(agent.model)
targetNetwork.set_weights(agent.model.get_weights())

for i, e in enumerate(episodes):
    print(f"\nEpisode ({e}): ", end="")
    state = env.reset(e)
    state = flatten(state)

    done = False
    while not done:
        # Trains for 1440 timesteps per episode
        action = agent.act(state)
        nextState, done, reward, value = env.step(actionMap[action])
        nextState = flatten(nextState)
        agent.remember(state, action, nextState, done, reward)
        state = nextState
        if done:
            profitPercent = (value - 1000) / 1000 * 100
            print(f"{i}/{len(episodes)}, epsilon:{agent.epsilon:.4f}")
            print(
                f"Final BTC: {nextState[0,CryptoEnv.stateLen * 5 + 0]:.5f}, BUSD: {nextState[0,CryptoEnv.stateLen * 5 + 1]:.5f}, Total Value: ${value:.3f}, profit: {profitPercent:.2f}%")
            scoreArr.append(value)

    if i > 0:
        # Only train once in a while
        # Speed up training process by gathering more data first
        if i % 5 == 0:
            agent.train(batchSize, targetNetwork)

        # Update DQN target
        if i % 25 == 0:
            targetNetwork.set_weights(agent.model.get_weights())

        if i % 150 == 0:
            agent.save(output_dir + f"weights_{i:04d}.hdf5")

agent.save(output_dir + f"weights_final.hdf5")


Training for 909 episodes

Episode (2021-09-17): 0/909, epsilon:1.0000
Final BTC: 0.00587, BUSD: 723.95700, Total Value: $1002.588, profit: 0.26%

Episode (2020-12-16): 1/909, epsilon:1.0000
Final BTC: 0.05182, BUSD: 0.00000, Total Value: $1022.816, profit: 2.28%

Episode (2020-12-09): 2/909, epsilon:1.0000
Final BTC: 0.03555, BUSD: 330.00000, Total Value: $978.651, profit: -2.13%

Episode (2020-12-24): 3/909, epsilon:1.0000
Final BTC: 0.01232, BUSD: 690.00000, Total Value: $975.948, profit: -2.41%

Episode (2021-05-05): 4/909, epsilon:1.0000
Final BTC: 0.00483, BUSD: 740.00000, Total Value: $1006.324, profit: 0.63%

Episode (2022-01-13): 5/909, epsilon:1.0000
Final BTC: 0.01413, BUSD: 388.40902, Total Value: $1005.421, profit: 0.54%
Training 100.0%

Episode (2022-01-03): 6/909, epsilon:0.9900
Final BTC: 0.02121, BUSD: 0.00000, Total Value: $1005.138, profit: 0.51%

Episode (2022-03-04): 7/909, epsilon:0.9900
Final BTC: 0.01630, BUSD: 310.00000, Total Value: $988.918, profit: -1.11%

E