## Training Notebook
Code to train the agent

In [1]:
import numpy as np
from agent import Agent
from environment import CryptoEnv
from datetime import date, timedelta

In [2]:
agent = Agent(epsilonDecay=0.9995)
env = CryptoEnv()

batchSize = 64
startDate = date(2019, 10, 1)
endDate = date(2022, 5, 31)
scoreArr = []

### Helper functions

In [3]:
def flatten(state):
    """Returns flattened state"""
    state = np.concatenate((state[0].flatten(), [state[1]], [state[2]]))
    return np.reshape(state, (1, -1))

In [4]:
from random import shuffle

lastDayofMonth = {
    1: 31,
    2: 28,
    3: 31,
    4: 30,
    5: 31,
    6: 30,
    7: 31,
    8: 31,
    9: 30,
    10: 31,
    11: 30,
    12: 31
}


def shuffleDates(startDate, endDate, multi=1):
    """Generates an array of shuffle dates, each date is inserted for multiple times"""
    datesList = []
    count = startDate
    delta = timedelta(days=1)
    while (count != endDate + delta):
        # Due to bugs in binance data, we don't add the first and last date of a month
        if (count.day != 1 and not(count.day >= lastDayofMonth[count.month])):
            for i in range(multi):
                datesList.append(count)
        count += delta

    shuffle(datesList)

    return datesList


IndentationError: expected an indented block after 'if' statement on line 26 (2452581194.py, line 27)

In [None]:
actionMap = {
    0: 0,
    1: 10,
    2: 50,
    3: 100,
    4: -10,
    5: -50,
    6: -100
}

## Main Training Loop

In [None]:
# We will iterate over the shuffled 1948 training days (see README)

episodes = shuffleDates(startDate, endDate)
print(f"Training for {len(episodes)} episodes")

for i, e in enumerate(episodes):
    print(f"Episode: {e} :", end="")
    state = env.reset(e)
    state = flatten(state)

    done = False
    while not done:
        # Trains for 1440 timesteps per episode
        action = agent.act(state)
        nextState, done, reward, value = env.step(actionMap[action])
        nextState = flatten(nextState)
        agent.remember(state, action, nextState, done, reward)
        state = nextState
        if done:
            profitPercent = (value - 1000) / 1000 * 100
            print(f"{i}/{len(episodes)}, epsilon:{agent.epsilon:.2f}")
            print(
                f"final BTC: {nextState[0,1440 * 5 + 0]}, final BUSD: {nextState[0,1440 * 5 + 1]}, final value: {value:.2f}, profit: {profitPercent:.2f}%")
            scoreArr.append(value)

    if len(agent.memory) > batchSize:
        agent.train(batchSize)


Training for 1948 episodes
Episode: 2019-10-08 :0/1948, epsilon:1.00
final BTC: 0.103666523640776, final BUSD: 150.0, final value: 998.57, profit: -0.14%
Episode: 2021-12-19 :1/1948, epsilon:0.97
final BTC: 0.014007862378640355, final BUSD: 340.0, final value: 994.15, profit: -0.58%
Episode: 2022-01-13 :2/1948, epsilon:0.94
final BTC: 0.007967344266369887, final BUSD: 645.2159380532745, final value: 984.39, profit: -1.56%
Episode: 2021-05-15 :3/1948, epsilon:0.91
final BTC: 0.019209474663532258, final BUSD: 50.0, final value: 948.82, profit: -5.12%
Episode: 2022-04-16 :4/1948, epsilon:0.88
final BTC: 0.024691588576638578, final BUSD: 0.0, final value: 997.20, profit: -0.28%
Episode: 2020-09-04 :5/1948, epsilon:0.85
final BTC: 0.029117485140327644, final BUSD: 691.7054755832003, final value: 996.13, profit: -0.39%
Episode: 2020-04-01 :

AssertionError: 