In [1]:
import os

# Replace with your local folder path
path = "archive (4)"

for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        print(os.path.join(dirname, filename))


archive (4)\BNBUSDT.csv
archive (4)\BNBUSDT_norm.csv
archive (4)\BTCUSDT.csv
archive (4)\BTCUSDT_norm.csv
archive (4)\ETHUSDT.csv
archive (4)\ETHUSDT_norm.csv
archive (4)\XRPUSDT.csv
archive (4)\XRPUSDT_norm.csv


Environment

In [4]:
from re import X
import os
os.environ['PYTHONHASHSEED'] = str(1000)

import copy
import time
import json
# os.system('clear')
os.environ['MKL_DEBUG_CPU_TYPE'] = '5'  # use string, not export

import pandas as pd
import numpy as np
import random

from collections import deque
from datetime import datetime

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Flatten, Conv1D, Conv2D,
    MaxPooling1D, Activation, Concatenate, LSTM
)
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam, RMSprop

from tensorboardX import SummaryWriter  # external library, ok if installed

# Set seeds
random.seed(2002)
np.random.seed(32)
tf.random.set_seed(100)

# Set channels first
K.set_image_data_format("channels_first")


class CustomEnv:
    """A custom Bitcoin trading environment"""
    def __init__(self, df, df_normalized, initial_balance=1000,
                 stocks=['USDCUSDT', 'BTCUSDT', 'BNBBTC', 'BNBBTC'],
                 lookback_window_size=50, model=''):
        self.xarray = df_normalized  # Normalized dataset
        self.df = df                 # Raw dataset
        self.df_total_steps = self.xarray.shape[0]
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size
        self.normalize_value = 40000
        self.weights = [1] + [0] * (self.xarray.shape[2] - 1)
        self.quants = [0] * self.xarray.shape[2]
        self.quants_ubah = [0] * self.xarray.shape[2]
        self.cash = 0
        self.stocks = stocks
        self.market_state = dict.fromkeys(self.stocks)
        self.model = model
        self.ubah = initial_balance

        self.orders_history = deque(maxlen=self.lookback_window_size)
        self.market_history = deque(maxlen=self.lookback_window_size)

    def reset(self, env_steps_size=0):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.weights = [1] + [0] * (self.xarray.shape[2] - 1)
        self.quants = [0] * self.xarray.shape[2]
        self.quants_ubah = [0] * self.xarray.shape[2]
        self.short_sell = [1, 1, 1]
        self.cash = self.initial_balance
        self.ubah = self.initial_balance

        if env_steps_size > 0:
            self.start_step = random.randint(self.lookback_window_size,
                                             self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        # Buy and Hold quantities
        self.quants_ubah = [
            (self.initial_balance / len(self.weights)) /
            np.array([self.df[self.current_step, 2, x] for x in range(len(self.stocks))])
        ]

        # Init orders history
        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append(
                [self.net_worth / self.normalize_value,
                 self.cash / self.normalize_value] +
                [number for number in self.quants] +
                [number for number in self.weights]
            )

        # Init market history
        for j in range(len(self.stocks)):
            self.market_state[str(j)] = deque(maxlen=self.lookback_window_size)
            for i in reversed(range(self.lookback_window_size)):
                current_step = self.current_step - i
                self.market_state[str(j)].append(self.xarray[current_step, :, j])

        if self.model == "EIIE":
            state = np.stack([self.market_state[str(x)] for x in range(len(self.stocks))])
        else:
            state = np.concatenate(
                [self.market_state[str(x)] for x in range(len(self.stocks))], axis=1
            )
            state = np.concatenate((state, self.orders_history), axis=1)

        return state, self.orders_history

    def _next_observation(self):
        for j in range(len(self.stocks)):
            self.market_state[str(j)].append(self.xarray[self.current_step, :, j])

        if self.model == "EIIE":
            obs = np.stack([self.market_state[str(x)] for x in range(len(self.stocks))])
        else:
            obs = np.concatenate(
                [self.market_state[str(x)] for x in range(len(self.stocks))], axis=1
            )
            obs = np.concatenate((obs, self.orders_history), axis=1)

        return obs

    def step(self, prediction):
        prices_ant = np.array([self.df[self.current_step, 2, x] for x in range(len(self.stocks))])
        self.current_step += 1
        prices = np.array([self.df[self.current_step, 2, x] for x in range(len(self.stocks))])

        self.balance = self.cash + np.dot(prices[1:], self.quants[1:])
        quants_ant = self.quants

        self.quants = [self.balance * prediction[x] / prices[x] for x in range(len(self.stocks))]

        tax = np.sum(
            abs(np.dot(np.array(self.quants), prices) -
                np.dot(np.array(quants_ant), prices_ant))
        ) * 0.001

        self.cash = self.quants[0] * prices[0]
        self.prev_net_worth = self.net_worth
        self.net_worth = np.dot(self.quants, prices) - tax

        self.orders_history.append(
            [self.net_worth / self.normalize_value,
             self.cash / self.normalize_value] +
            [number / self.normalize_value for number in self.quants] +
            prediction.tolist()
        )

        reward = np.log(self.net_worth / self.prev_net_worth)

        done = self.net_worth <= self.initial_balance / 2
        obs = self._next_observation()

        return obs, self.orders_history, reward, done, prices

    def render(self):
        print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')


Agent

In [5]:
class CustomAgent:
    # A custom Bitcoin trading agent
    def __init__(self, lookback_window_size=50, lr=0.00005, epochs=1, stocks=[], optimizer=Adam, batch_size=32, model="", shape=[], depth=0, comment=""):
        self.lookback_window_size = lookback_window_size
        self.model = model
        self.comment = comment
        self.depth = depth
        self.stocks = stocks
        self.shape = shape

        # Action space goes from 0 to the number of assets in the portfolio
        self.action_space = np.array(range(0, len(self.stocks)))

        # Folder to save models
        self.log_name = datetime.now().strftime("%Y_%m_%d_%H_%M") + "_Crypto_trader"

        # State size contains Market + Orders + Indicators history for the last lookback_window_size steps
        if self.model == "EIIE":
            self.state_size = (len(stocks), lookback_window_size, self.shape[1])
        else:
            self.state_size = (lookback_window_size, self.shape[1] * self.shape[2] + 2 + 2 * self.shape[2])  # OHLC info + market + indicators

        # Neural Networks configuration
        self.lr = lr
        self.epochs = epochs
        self.optimizer = optimizer
        self.batch_size = batch_size

        # Create shared Actor-Critic network model
        self.Actor = self.Critic = Shared_Model(
            input_shape=self.state_size,
            action_space=self.action_space.shape[0],
            lr=self.lr,
            optimizer=self.optimizer,
            model=self.model
        )

    # Create TensorBoard writer
    def create_writer(self, initial_balance, normalize_value, train_episodes):
        self.replay_count = 0
        self.writer = SummaryWriter('runs/' + self.log_name)

        # Create folder to save models
        if not os.path.exists(self.log_name):
            os.makedirs(self.log_name)

        self.start_training_log(initial_balance, normalize_value, train_episodes)

    def start_training_log(self, initial_balance, normalize_value, train_episodes):
        # Save training parameters to Parameters.json for future use
        current_date = datetime.now().strftime('%Y-%m-%d %H:%M')
        params = {
            "training start": current_date,
            "initial balance": initial_balance,
            "training episodes": train_episodes,
            "lookback window size": self.lookback_window_size,
            "depth": self.depth,
            "lr": self.lr,
            "epochs": self.epochs,
            "batch size": self.batch_size,
            "normalize value": normalize_value,
            "model": self.model,
            "comment": self.comment,
            "saving time": "",
            "Actor name": "",
            "Critic name": "",
        }
        with open(self.log_name + "/Parameters.json", "w") as write_file:
            json.dump(params, write_file, indent=4)

    def get_gaes(self, rewards, dones, values, next_values, gamma=0.99, lamda=0.95, normalize=True):
        deltas = [r + gamma * (1 - d) * nv - v for r, d, nv, v in zip(rewards, dones, next_values, values)]
        deltas = np.stack(deltas)
        gaes = copy.deepcopy(deltas)
        for t in reversed(range(len(deltas) - 1)):
            gaes[t] = gaes[t] + (1 - dones[t]) * gamma * lamda * gaes[t + 1]

        target = gaes + values
        if normalize:
            gaes = (gaes - gaes.mean()) / (gaes.std() + 1e-8)
        return np.vstack(gaes), np.vstack(target)

    def replay(self, states, orders, rewards, predictions, dones, next_states, orders_history):
        # Reshape memory to a format suitable for training
        states = np.vstack(states)
        order = np.vstack(orders)
        next_states = np.vstack(next_states)
        orders_history = np.vstack(orders_history)
        predictions = np.vstack(predictions)

        # Get Critic predictions
        if self.model == "EIIE":
            values = self.Critic.critic_predict(states, np.expand_dims(order, axis=1))
        else:
            values = self.Critic.critic_predict(states, np.expand_dims(np.expand_dims(order, axis=0), axis=0))

        next_values = self.Critic.critic_predict(next_states, np.expand_dims(orders_history, axis=1))

        # Compute advantages
        advantages, target = self.get_gaes(rewards, dones, np.squeeze(values), np.squeeze(next_values))

        # Stack everything into numpy arrays
        y_true = np.hstack([advantages, predictions])

        # Train Actor and Critic networks
        if self.model == "EIIE":
            a_loss = self.Actor.Actor.fit([states, np.expand_dims(order, axis=1)], y_true,
                                          epochs=self.epochs, verbose=0, shuffle=True, batch_size=self.batch_size)
            c_loss = self.Critic.Critic.fit([states, np.expand_dims(order, axis=1)], target,
                                            epochs=self.epochs, verbose=0, shuffle=True, batch_size=self.batch_size)
        else:
            a_loss = self.Actor.Actor.fit(states, y_true,
                                          epochs=self.epochs, verbose=0, shuffle=True, batch_size=self.batch_size)
            c_loss = self.Critic.Critic.fit(states, target,
                                            epochs=self.epochs, verbose=0, shuffle=True, batch_size=self.batch_size)

        self.writer.add_scalar('Data/actor_loss_per_replay', np.sum(a_loss.history['loss']), self.replay_count)
        self.writer.add_scalar('Data/critic_loss_per_replay', np.sum(c_loss.history['loss']), self.replay_count)
        self.replay_count += 1

        return np.sum(a_loss.history['loss']), np.sum(c_loss.history['loss'])

    def act(self, state, order):
        # Use the neural network to predict the next action
        prediction = self.Actor.actor_predict(np.expand_dims(state, axis=0),
                                              np.expand_dims(np.expand_dims(order, axis=0), axis=0))[0]
        return prediction

    def save(self, name="Crypto_trader", score="", args=[]):
        # Save Actor and Critic model weights
        self.Actor.Actor.save_weights(f"{self.log_name}/{score}_{name}_Actor.h5")
        self.Critic.Critic.save_weights(f"{self.log_name}/{score}_{name}_Critic.h5")

        # Update JSON log with model details
        if score != "":
            with open(self.log_name + "/Parameters.json", "r") as json_file:
                params = json.load(json_file)
            params["saving time"] = datetime.now().strftime('%Y-%m-%d %H:%M')
            params["Actor name"] = f"{score}_{name}_Actor.h5"
            params["Critic name"] = f"{score}_{name}_Critic.h5"
            with open(self.log_name + "/Parameters.json", "w") as write_file:
                json.dump(params, write_file, indent=4)

        # Append run arguments to log file
        if len(args) > 0:
            with open(f"{self.log_name}/log.txt", "a+") as log:
                current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                arguments = ""
                for arg in args:
                    arguments += f", {arg}"
                log.write(f"{current_time}{arguments}\n")

    def load(self, folder, name):
        # Load Actor and Critic model weights
        self.Actor.Actor.load_weights(os.path.join(folder, f"{name}_Actor.h5"))
        self.Critic.Critic.load_weights(os.path.join(folder, f"{name}_Critic.h5"))
