In [1]:
import numpy as np
import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import tensorflow as tf
#from Plots import plot_action_profit_percent
from Dataset import BitcoinData
from TradingEnvironment import TradingEnvironment, ACTION
from Models import DQNV1, DQNV2

from keras.models import load_model

tf.keras.utils.disable_interactive_logging()

NUM_EPISODES = 200
NUM_EPISODES_TEST = 1

MODEL_DQNV1 = 0
MODEL_DQNV2 = 1
MODEL_DUELING_DQN = 2

In [2]:
def get_environment(start_time=0, end_time=-1):
    bitcoin_data = BitcoinData()

    return TradingEnvironment(bitcoin_data, start_time=start_time, end_time=end_time)

def get_agent(environment, model_type, load_version=-1, load_model_num=-1):
    observation_space = environment.get_observation_space()
    action_space = environment.get_action_space()

    if model_type == MODEL_DQNV1:
        return  DQNV1(
                    environment=environment, observation_space=observation_space, action_space=action_space,
                    model_path="DQN", load_version=load_version, load_model_num=load_model_num
                )
        
    elif model_type == MODEL_DQNV2:
        return  DQNV2(
                    environment=environment, observation_space=observation_space, action_space=action_space, 
                    model_path="DQN", load_version=load_version, load_model_num=load_model_num
                )
    
    return None

def cryptocurrencies_trading(agent, environment, train=True):
    observation_space = environment.get_observation_space()
    episode = 0

    while episode < NUM_EPISODES:
        episode += 1
        state = environment.reset()
        step = 0
        while True:
            step += 1
            action = agent.act(np.reshape(state, [1, observation_space]))
            next_state, reward, done, info = environment.step(action - 1)

            agent.memory.remember(state, action, reward, next_state, done)
            if train:
                agent.learn()

            state = next_state

            """
            if step % 100 == 0 or done:
                print("Step {}, exploration rate {} ====>".format(step, agent.exploration_rate))
                print("    Percent Capital: {:.4f}, interval: [{:.4f}, {:.4f}]".format(
                    environment.profit, environment.min_profit, environment.max_profit
                ))
                print("    Action taked by itseft: SELL: {}, HOLD: {}, BUY: {}, CLOSE: {}".format(
                    agent.actions_taked_by_itself[2], agent.actions_taked_by_itself[0], agent.actions_taked_by_itself[1], agent.actions_taked_by_itself[3]
                ))

                agent.actions_taked_by_itself = [0, 0, 0, 0]
            """
            if step % 100 == 0 or done:
                print("Step {}, exploration rate {} ====>".format(step, agent.exploration_rate))
                print("    Percent Capital: {:.4f}, interval: [{:.4f}, {:.4f}]".format(
                    environment.percent_profit, environment.min_percent_profit, environment.max_percent_profit
                ))
                print("    Action taked by itseft: SELL: {}, HOLD: {}, BUY: {}".format(
                    agent.actions_taked_by_itself[0], agent.actions_taked_by_itself[1], agent.actions_taked_by_itself[2]
                ))

                agent.actions_taked_by_itself = [0, 0, 0]

            if done:
                print("Episode {}/{} ended\n".format(episode, NUM_EPISODES))
                break

        if train: 
            agent.save_model()


In [None]:
environment = get_environment()
agent006 = get_agent(environment, MODEL_DQNV1)

#cryptocurrencies_trading(agent006, environment)

In [4]:
last_episode = 78
# start_time = TradingEnvironment.NUM_OBSERVATIONS * last_episode
start_time = 1000 * last_episode
end_time = start_time + 5000
load_version = 15
load_model_num = -1

# beta 59, 89

environment = get_environment(start_time=start_time, end_time=end_time)
environment.reset()
agent006 = get_agent(environment, MODEL_DQNV1, load_version=load_version, load_model_num=load_model_num)
agent006.exploration_rate = 0.0
agent006.memory.current_index = 0

cryptocurrencies_trading(agent006, environment, train=False)

Reading dataset...
Dataset read!
Dataset length: 1051198


KeyboardInterrupt: 

In [None]:
agent.memory.current_index

In [None]:
agent006.memory.current_index
# 4779

In [None]:
start_time = TradingEnvironment.NUM_OBSERVATIONS * last_episode
end_time = start_time + 200
plot_action_profit_percent(agent006.memory.states, agent006.memory.actions, agent006.memory.rewards, agent006.memory.dones, start_time=start_time, end_time=end_time)

In [None]:
agent = DoubleDQN(environment.get_observation_space(), environment.get_action_space(), model_path="DoubleDQN")

In [None]:
state = environment.reset()
for i in range(0, 100):
    next_state, reward, done, info = environment.step(0)
    agent.memory.remember(state, 0, reward, next_state, done)
    state = next_state



In [None]:
path_to_memory = os.path.join(agent.model_path, agent.MEMORY_FILENAME)
agent.save_model()

In [None]:
np.set_printoptions(suppress=True)

In [None]:
environment.step(3)

In [None]:
np.mean(agent006.memory.actions)

In [None]:
sum = 0
for i in range(len(agent006.model.weights)):
    sum += np.mean(agent006.model.weights[i])
sum = sum / len(agent006.model.weights)
print(sum)

In [None]:
environment.action_space.n

In [None]:
agent006.memory.current_index
# 6725

In [None]:
import Plots

start_time = 0
plot_action_profit(agent006.memory.states, agent006.memory.dones, start_time=start_time)

In [None]:
environment.reset()

In [None]:
for i in range(0, 100):
    state, reward, done, info = environment.step(0)

print(state)
print(reward)

In [None]:
agent006.save_model()

In [None]:
import importlib
import Plots

importlib.reload(Plots)

start_time = 7400
end_time = start_time + 1000
Plots.plot_value_action(agent006.memory.states, agent006.memory.dones, start_time=start_time, end_time=end_time)

In [None]:
agent006.model = load_model("Models/DQN Version alpha.keras")