## Training Notebook
Code to train the agent

In [None]:
import numpy as np
from agent import Agent
from environment import CryptoEnv
from datetime import date, timedelta
from keras.models import clone_model

In [None]:
agent = Agent(epsilonDecay=0.99)
env = CryptoEnv()

batchSize = 512
startDate = date(2019, 10, 1)
endDate = date(2022, 5, 31)
scoreArr = []

output_dir = 'model_output/btcbusd/'

### Helper functions

In [None]:
def flatten(state):
    """Returns flattened state"""
    state = np.concatenate((state[0].flatten(), [state[1]], [state[2]]))
    return np.reshape(state, (1, -1))

In [None]:
from random import shuffle

lastDayofMonth = {
    1: 31,
    2: 28,
    3: 31,
    4: 30,
    5: 31,
    6: 30,
    7: 31,
    8: 31,
    9: 30,
    10: 31,
    11: 30,
    12: 31
}


def shuffleDates(startDate, endDate, multi=1):
    """Generates an array of shuffle dates, each date is inserted for multiple times"""
    datesList = []
    count = startDate
    delta = timedelta(days=1)
    while (count != endDate + delta):
        # Due to bugs in binance data, we don't add the first and last date of a month
        if (count.day != 1 and not(count.day >= lastDayofMonth[count.month])):
            for i in range(multi):
                datesList.append(count)
        count += delta

    shuffle(datesList)

    return datesList


In [None]:
actionMap = {
    0: 0,
    1: 10,
    2: 50,
    3: 100,
    4: -10,
    5: -50,
    6: -100
}

## Main Training Loop

In [1]:
episodes = shuffleDates(startDate, endDate)
print(f"Training for {len(episodes)} episodes")

targetNetwork = clone_model(agent.model)
targetNetwork.set_weights(agent.model.get_weights())

for i, e in enumerate(episodes):
    print(f"\nEpisode ({e}): ", end="")
    state = env.reset(e)
    state = flatten(state)

    done = False
    while not done:
        # Trains for 1440 timesteps per episode
        action = agent.act(state)
        nextState, done, reward, value = env.step(actionMap[action])
        nextState = flatten(nextState)
        agent.remember(state, action, nextState, done, reward)
        state = nextState
        if done:
            profitPercent = (value - 1000) / 1000 * 100
            print(f"{i}/{len(episodes)}, epsilon:{agent.epsilon:.4f}")
            print(
                f"Final BTC: {nextState[0,1440 * 5 + 0]:.5f}, BUSD: {nextState[0,1440 * 5 + 1]:.5f}, Total Value: ${value:.3f}, profit: {profitPercent:.2f}%")
            scoreArr.append(value)

    if i > 0:
        # Only train once in a while
        # Speed up training process by gathering more data first
        if i % 5 == 0:
            agent.train(batchSize, targetNetwork)

        # Update DQN target
        if i % 25 == 0:
            targetNetwork.set_weights(agent.model.get_weights())

        if i % 150 == 0:
            agent.save(output_dir + f"weights_{i:04d}.hdf5")
            
agent.save(output_dir + f"weights_final.hdf5")


IndentationError: expected an indented block after 'if' statement on line 32 (3642103725.py, line 34)