In [1]:
# Import optuna for HPO
import optuna
# Import PPO for algos
from stable_baselines3 import PPO
# Evaluate Policy
from stable_baselines3.common.evaluation import evaluate_policy
# Import wrappers
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import os
LOG_DIR = './logs/'
if not os.path.exists(LOG_DIR):
    os.makedirs(LOG_DIR)

OPT_DIR = './opt_modeldata/'
if not os.path.exists(OPT_DIR):
    os.makedirs(OPT_DIR)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import argparse
from typing import Optional, Tuple
import gym
from gym import Env
from gym.spaces import Box, MultiBinary, Discrete
import numpy as np
import os
from stable_baselines3 import PPO , SAC
from stable_baselines3.common.callbacks import BaseCallback
import gym
from gym import spaces
from gym.wrappers import FrameStack
from collections import deque
import pandas as pd
import matplotlib.pyplot as plt
import uuid
from torch.utils.tensorboard import SummaryWriter

# Initialize SummaryWriter


# i) can buy (to sell in upper price(buy)) and hold for a time period and then close,
# ii) can buy (to sell in lower price(sell)) and hold for a time period and then close,
# iii) observe the market and do nothing
# 1) buy_open 2) sell_open 3) Close 4) hold 5) Do nothing
class TradingEnv(gym.Env):
    def __init__(self, data):
        super(TradingEnv, self).__init__()
        self.data = data
        #WILL CHANGE max step later
        self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING = 100
        
        log_dir = "./logs"  # Change this to the desired log directory
        self.writer = SummaryWriter(log_dir)

        self.OWN_CURRENCY_AMOUNT = np.random.randint(10, 100)
        self.USED_CURRENCY_AMOUNT = np.random.randint(1000, 100000)
        self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT

        #loss tollaranace is the 5%  of own currency amount
        self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05
        
        #accumulated loss tollarance is the 10%  of own currency amount
        self.ACCUMULATED_LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.1
        
        self.MINIMUM_GAINS = self.OWN_CURRENCY_AMOUNT * 0.05
        self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE = 100
        self.WRONG_STEPS_TOLLARANCE = 10
        self.log_interval = 30  # Log every log_interval episodes   
        self.taking_wrong_action_count = 0
        self.auto_terminated_trades = 0
        self.buy_open_uuids = {}
        self.sell_open_uuids = {}        
        self.previous_trade_details = {}
        self.previous_reward = 0
        self.net_gains = 0
        self.current_step = 0
        self.current_price = data['Close'][self.current_step]

        # Action space:
        # 0: buy
        # 1: sel
        # 2: buy close
        # 3: sell close
        # 4: hold
        # 5: do nothing
        self.action_space = spaces.Discrete(6)

        
        # Observation space:
        # 0: current price
        # 1: own currency amount
        # 2: used currency amount
        # 3: used leverage
        # 4: loss tollarance
        # 5: accumulated loss tollarance
        # 6: maximum amount of time for holding
        # 7: current buy open trades
        # 8: current sell open trades
        # 9: auto terminated trades
        # 10: maximum doing nothing steps tollarance
        # 11: current step
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12 ,), dtype=np.float32)
    def calculate_buy_profit_loss(self, opening_price, closing_price, position_size, leverage):
        # Calculate profit or loss based on opening and closing prices, position size, and leverage
        profit_loss = ((closing_price - opening_price) / opening_price) * position_size * leverage
        return profit_loss
    
    def calculate_sell_profit_loss(self, opening_price, closing_price, position_size, leverage):
        # Calculate profit or loss based on opening and closing prices, position size, and leverage
        profit_loss = ((opening_price - closing_price) / opening_price) * position_size * leverage
        return profit_loss

    def Terminate_lossing_buy_open_trades(self):
        trades_to_close = []
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            # Use calculate_buy_profit_loss function to calculate the profit or loss
            if self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) < self.LOSS_TOLLARANCE:
                trades_to_close.append(buy_open_uuid)

        for buy_open_uuid in trades_to_close:
            self.close_trade(buy_open_uuid, self.buy_open_uuids, 'buy_open')
            self.auto_terminated_trades += 1


    def Terminate_lossing_sell_open_trades(self):
        trades_to_close = []
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            # Use calculate_sell_profit_loss function to calculate the profit or loss
            if self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) < self.LOSS_TOLLARANCE:
                trades_to_close.append(sell_open_uuid)

        for sell_open_uuid in trades_to_close:
            self.close_trade(sell_open_uuid, self.sell_open_uuids, 'sell_open')
            self.auto_terminated_trades += 1
            
    def Terminate_after_maximum_step(self):
        self.update_available_time_for_each_trade()
        trades_to_close = []
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            # Use calculate_buy_profit_loss function to calculate the profit or loss
            if trade_info['available_time'] == 0:
                trades_to_close.append(buy_open_uuid)

        for buy_open_uuid in trades_to_close:
            self.close_trade(buy_open_uuid, self.buy_open_uuids, 'buy_open')
            self.auto_terminated_trades += 1

        trades_to_close = []
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            # Use calculate_sell_profit_loss function to calculate the profit or loss
            if trade_info['available_time'] == 0:
                trades_to_close.append(sell_open_uuid)

        for sell_open_uuid in trades_to_close:
            self.close_trade(sell_open_uuid, self.sell_open_uuids, 'sell_open')
            self.auto_terminated_trades += 1




    def calculate_current_profit_loss(self):
        # Calculate the current profit or loss
        current_profit_loss = 0
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            current_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            current_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        return current_profit_loss

            

    def update_available_time_for_each_trade(self):
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            trade_info['available_time'] -= 1

        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            trade_info['available_time'] -= 1


    def close_trade(self, trade_uuid, trade_dict, trade_type):
        trade_dict[trade_uuid]['close_price'] = self.current_price
        if trade_type == 'buy_open':
            self.net_gains += trade_dict[trade_uuid]['close_price'] - trade_dict[trade_uuid]['open_price']
            self.current_buy_open_trades -= 1
            #add to previous trade details
            self.previous_trade_details[trade_uuid] = trade_dict[trade_uuid]
            #fix currntt amount of money and leverage keep the used currency amount same
            self.OWN_CURRENCY_AMOUNT += self.calculate_buy_profit_loss(trade_dict[trade_uuid]['open_price'], trade_dict[trade_uuid]['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
            self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT
            self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05



        elif trade_type == 'sell_open':
            self.net_gains += trade_dict[trade_uuid]['open_price'] - trade_dict[trade_uuid]['close_price']
            self.current_sell_open_trades -= 1
            #add to previous trade details
            self.previous_trade_details[trade_uuid] = trade_dict[trade_uuid]
            #fix currntt amount of money and leverage keep the used currency amount same
            self.OWN_CURRENCY_AMOUNT += self.calculate_sell_profit_loss(trade_dict[trade_uuid]['open_price'], trade_dict[trade_uuid]['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
            self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT
            self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05


        del trade_dict[trade_uuid]

    def calculate_total_profit_loss(self):
        #use previous and current trade details to calculate total profit loss
        current_profit_loss = 0
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            current_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            current_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        previous_profit_loss = 0
        for buy_open_uuid, trade_info in self.previous_trade_details.items():
            previous_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.previous_trade_details.items():
            previous_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        total_profit_loss = current_profit_loss + previous_profit_loss
        return total_profit_loss



    

    def calculate_number_of_profitable_trades(self):
        profitable_trades = 0
        for buy_open_uuid, trade_info in self.previous_trade_details.items():
            if self.calculate_buy_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) > 0:
                profitable_trades += 1
        for sell_open_uuid, trade_info in self.previous_trade_details.items():
            if self.calculate_sell_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) > 0:
                profitable_trades += 1
        return profitable_trades





    def calculate_number_of_total_trades_profotalbe_and_lossing_trades(self):
        total_trades = len(self.previous_trade_details) + self.current_buy_open_trades + self.current_sell_open_trades
        profitable_trades = self.calculate_number_of_profitable_trades()
        lossing_trades = total_trades - profitable_trades
        return total_trades, profitable_trades, lossing_trades


    

    def reset(self, new_data=None):
        self.current_step = 0
        self.current_price = self.data['Close'][self.current_step]
        self.current_buy_open_trades = 0
        self.current_sell_open_trades = 0
        self.auto_terminated_trades = 0
        self.buy_open_uuids = {}
        self.sell_open_uuids = {}
        self.previous_trade_details = {}
        self.previous_reward = 0
        self.net_gains = 0
        self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE = 100

        # Reset currency amounts
        self.OWN_CURRENCY_AMOUNT = np.random.randint(10, 100)
        self.USED_CURRENCY_AMOUNT = np.random.randint(1000, 100000)
        self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT

        #loss tollaranace is the 5%  of own currency amount
        self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05

        #accumulated loss tollarance is the 10%  of own currency amount
        self.ACCUMULATED_LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.1







        # Reset action and observation spaces
        self.action_space = spaces.Discrete(6)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12 ,), dtype=np.float32)
        
        # Reset the environment and return the initial observation
        obs = self._next_observation()

        
        return obs
    
    def _next_observation(self):
        obs = np.array([
            self.current_price,
            self.OWN_CURRENCY_AMOUNT,
            self.USED_CURRENCY_AMOUNT,
            self.USED_LEVERAGE,
            self.LOSS_TOLLARANCE,
            self.ACCUMULATED_LOSS_TOLLARANCE,
            self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING,
            self.current_buy_open_trades,
            self.current_sell_open_trades,
            self.auto_terminated_trades,
            self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE,
            self.current_step

        ])
        
        return obs

    def step(self, action ):
        self.current_step += 1.
        self.current_price = self.data['Close'][self.current_step]
        self.Terminate_after_maximum_step()
        self.Terminate_lossing_buy_open_trades()
        self.Terminate_lossing_sell_open_trades()
        self.current_buy_open_trades = len(self.buy_open_uuids)
        self.current_sell_open_trades = len(self.sell_open_uuids)
        total_open_trades = self.current_buy_open_trades + self.current_sell_open_trades
        wrong_action = False
        do_nothing = False

        if action == 0:
            if total_open_trades == 0:
                self.buy_open_uuids[str(uuid.uuid4())] = {
                    'open_price': self.current_price,
                    'available_time': self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING
                }
            else:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
        elif action == 1:
            if total_open_trades == 0:
                self.sell_open_uuids[str(uuid.uuid4())] = {
                    'open_price': self.current_price,
                    'available_time': self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING
                }
            else:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
        elif action == 2:
            if self.current_buy_open_trades > 0:
                buy_open_uuid = list(self.buy_open_uuids.keys())[0]
                self.close_trade(buy_open_uuid, self.buy_open_uuids, 'buy_open')
            elif self.current_sell_open_trades > 0:
                action = 4 # Hold
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 5 # Do nothing
                self.taking_wrong_action_count += 1
                wrong_action = True
                self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
                do_nothing = True
            
        elif action == 3:
            if self.current_sell_open_trades > 0:
                sell_open_uuid = list(self.sell_open_uuids.keys())[0]
                self.close_trade(sell_open_uuid, self.sell_open_uuids, 'sell_open')
            elif self.current_buy_open_trades > 0:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 5 # Do nothing
                self.taking_wrong_action_count += 1
                wrong_action = True
                self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
                do_nothing = True

        elif action == 4:
            if total_open_trades == 0:
                action = 5
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 4
        elif action == 5:
            if total_open_trades > 0:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 5
                self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
                do_nothing = True
            
        else:
            action = 5
            self.taking_wrong_action_count += 1
            wrong_action = True
            self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
            do_nothing = True



        obs = self._next_observation()
        done = self.done()
        reward = self.reward(wrong_action,do_nothing,done)

        info = {'action':action,'current_price': self.current_price, 'current_step': self.current_step, 'current_profit_loss': self.calculate_current_profit_loss(), 'total_profit_loss': self.calculate_total_profit_loss(), 'net_gains': self.net_gains, 'current_buy_open_trades': self.current_buy_open_trades, 'current_sell_open_trades': self.current_sell_open_trades, 'auto_terminated_trades': self.auto_terminated_trades, 'previous_reward': self.previous_reward, 'buy_open_uuids': self.buy_open_uuids, 'sell_open_uuids': self.sell_open_uuids, 'previous_trade_details': self.previous_trade_details}      

        #if done then add to tensorboard
        if done:
            self.log_to_tensorboard(reward)
        
        return obs, reward, done, info

    def reward(self,wrong_action:bool = False,do_nothing:bool = False,done:bool = False):
        # 1) reward is proportional to the profit or loss
        # 2) reward is proportional to the number of trades
        # 4) reward is inversely proportional to number of auto terminated trades
        # 5)panalty for doing nothing
        # 6) reward for closing profitable trades
        # 7) panalty for closing lossing trades
        # 8) extra reward for each 2% profit
        # 9) i step == 404 then means done and panalty
        # 10)  if current reward is more than previous reward then reward is positive else negative
        # 11) panalty for done
        total_profit_loss = self.calculate_total_profit_loss() 
        current_profit_loss = self.calculate_current_profit_loss()

        total_profit_loss_percentage = (total_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100
        current_profit_loss_percentage = (current_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100

        current_number_of_open_trade = self.current_buy_open_trades + self.current_sell_open_trades
        auto_terminated_trades = self.auto_terminated_trades
        total_trades, profitable_trades, lossing_trades = self.calculate_number_of_total_trades_profotalbe_and_lossing_trades()
        profit_loss_percentage = total_profit_loss / self.OWN_CURRENCY_AMOUNT            
        K = 0.1  # You can adjust this value based on the desired magnitude of the rewards/penalties

        reward = (
            K * total_profit_loss_percentage +                  # Reward proportional to profit or loss
            K * current_profit_loss_percentage +                # Reward proportional to profit or loss
            K *10* total_trades +                       # Reward proportional to the number of trades
            -K * auto_terminated_trades +            # Penalty inversely proportional to the number of auto-terminated trades
            -K * (100-self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE) +  # Penalty for doing nothing
            K *20* profitable_trades +                  # Reward for closing profitable trades
            -K * lossing_trades +               
                 
                 
                      # Penalty for closing losing trades
            K * (
                
                
                
                   profit_loss_percentage // 0.02)     # Extra reward for each 2% profit
        )
        reward= reward + K * (1 if reward > self.previous_reward else -1)  # Reward for improving or penalizing the reward

        if wrong_action:
            reward = -K * 10
            self.WRONG_STEPS_TOLLARANCE -= 1
        if done:
            reward = 0 

        self.previous_reward = reward
        
        return reward
    


    def done(self):
    #1)if current loss is greater than 3% of own currency amount
    #2)if auto terminated trades are greater than 3
    #3)if all comolative loss is greater than 10% of own currency amount
    #4)maximum doing nothing steps tollarance
    #5) IF 3 loss trades over 10 trades and 2 consecutive loss trades
    #6) if maximum doing nothing steps tollarance is 0
    #7) if total rades are 3 or more but cutent totatal profit is less than 10% of own currency amount
        current_profit_loss = self.calculate_current_profit_loss()
        # print("current loss is ", current_profit_loss)
        # print("loss tollarance is ", self.LOSS_TOLLARANCE)
        # print("accumulated loss tollarance is ", self.ACCUMULATED_LOSS_TOLLARANCE)


        autoterminated_trades = self.auto_terminated_trades
        cumulative_profit_loss = self.calculate_total_profit_loss()
        doing_nothing_steps_tollarance = self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE
        total_trades, profitable_trades, lossing_trades = self.calculate_number_of_total_trades_profotalbe_and_lossing_trades()


        wrong_steps_tollarance = self.WRONG_STEPS_TOLLARANCE

        
        if current_profit_loss < -self.LOSS_TOLLARANCE:
            
            return True
        elif autoterminated_trades > 2:
            
            return True
        elif cumulative_profit_loss < -self.ACCUMULATED_LOSS_TOLLARANCE:
            
            return True
        elif doing_nothing_steps_tollarance == 0:
            
            return True
        elif wrong_steps_tollarance == 0:
            
            return True
        elif lossing_trades > 2 and lossing_trades / total_trades > 0.3:
            
            return True
        elif total_trades >= 4 and cumulative_profit_loss < self.MINIMUM_GAINS:
            
            return True
        
        else:
            return False




    def render(self, mode='human'):
        profit_loss = self.calculate_total_profit_loss()
        print(f'Step: {self.current_step}')
        print(f'Price: {self.current_price}')
        print(f'Profit/Loss: {profit_loss}')
        print(f'Net Gains: {self.net_gains}')
        print(f'Current Buy Open Trades: {self.current_buy_open_trades}')
        print(f'Current Sell Open Trades: {self.current_sell_open_trades}')
        print(f'Auto Terminated Trades: {self.auto_terminated_trades}')
        print(f'Previous Reward: {self.previous_reward}')
        print(f'Action Space: {self.action_space}')
        print(f'Observation Space: {self.observation_space}')
        print(f'Buy Open Trades: {self.buy_open_uuids}')
        print(f'Sell Open Trades: {self.sell_open_uuids}')
        print(f'Previous Trade Details: {self.previous_trade_details}')



    def log_to_tensorboard(self, reward):
        # Log relevant metrics to TensorBoard
    
        total_trades, profitable_trades, lossing_trades = self.calculate_number_of_total_trades_profotalbe_and_lossing_trades()

        total_profit_loss = self.calculate_total_profit_loss()
        current_profit_loss = self.calculate_current_profit_loss()

        total_profit_loss_percentage = (total_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100
        current_profit_loss_percentage = (current_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100


        self.writer.add_scalar("Reward", reward, self.current_step)
        self.writer.add_scalar("Profit/Loss", total_profit_loss_percentage, self.current_step)
        self.writer.add_scalar("Auto Terminated Trades", self.auto_terminated_trades, self.current_step)
        self.writer.add_scalar("Previous Reward", self.previous_reward, self.current_step)
        self.writer.add_scalar("Current Step", self.current_step, self.current_step)
        self.writer.add_scalar("current profit loss", current_profit_loss_percentage, self.current_step)
        self.writer.add_scalar("total trades", total_trades, self.current_step)
        self.writer.add_scalar("number of profitable trades", profitable_trades, self.current_step)
        self.writer.add_scalar("number of lossing trades", lossing_trades, self.current_step)
        self.writer.flush()



In [3]:
import pandas as pd
data = pd.read_csv('Demo_4M.csv')

In [4]:
# Define the frame skip frequency
frame_skip_frequency = 1

# Define the number of training steps
total_timesteps = 10000000

# Define the directory paths
LOG_DIR = './logs/'
OPT_DIR = './opt_modeldata/'
CHECKPOINT_DIR = './train_modeldata/'

# Create the directories if they don't exist
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(OPT_DIR, exist_ok=True)
os.makedirs(CHECKPOINT_DIR, exist_ok=True)


In [5]:
env = TradingEnv(data)
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)
for i in range(20):
    obs = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        env.render()
        print("obs=", obs, "reward=", reward, "done=", done)
        print(info)
        print("")


Action space: Discrete(6)
Observation space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf], (12,), float32)
Step: 1.0
Price: 1.0645
Profit/Loss: 0
Net Gains: 0
Current Buy Open Trades: 0
Current Sell Open Trades: 0
Auto Terminated Trades: 0
Previous Reward: -1.0
Action Space: Discrete(6)
Observation Space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf], (12,), float32)
Buy Open Trades: {}
Sell Open Trades: {}
Previous Trade Details: {}
obs= [1.0645000e+00 1.6000000e+01 3.5907000e+04 2.2441875e+03 8.0000000e-01
 1.6000000e+00 1.0000000e+02 0.0000000e+00 0.0000000e+00 0.0000000e+00
 9.9000000e+01 1.0000000e+00] reward= -1.0 done= False
{'action': 5, 'current_price': 1.0645, 'current_step': 1.0, 'current_profit_loss': 0, 'total_profit_loss': 0, 'net_gains': 0, 'current_buy_open_trades': 0, 'current_sell_open_trades': 0, 'auto_terminated_trades': 0, '

In [6]:
import os
from stable_baselines3.common.callbacks import BaseCallback
from torch.utils.tensorboard import SummaryWriter
from stable_baselines3.common.results_plotter import load_results, ts2xy
import numpy as np

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, tensorboard_log, batch_size, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        self.tensorboard_log = tensorboard_log
        self.batch_size = batch_size
        self.writer = SummaryWriter(tensorboard_log)
        self.data_array = None
        self.current_data_index = 0  # Initialize the index to keep track of the current position in the data

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
        # Assuming data is a NumPy array or can be converted to one
        self.data_array = np.array(self.model.env.get_attr("data"))

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

            # Log the mean reward to TensorBoard using ts2xy
            x, y = ts2xy(load_results(self.tensorboard_log), 'timesteps')
            if len(x) > 0:
                self.writer.add_scalar('reward', y[-1], x[-1])

            # Update your environment with the current batch of data
            self.update_environment_with_current_batch()

        return True

    def update_environment_with_current_batch(self):
        # Ensure the data_array is initialized
        if self.data_array is not None:
            # Calculate the end index of the current batch
            end_idx = min(self.current_data_index + self.batch_size, len(self.data_array))

            # Select the current batch of data
            batch_data = self.data_array[self.current_data_index:end_idx]

            # Update your environment with the current batch of data
            self.model.env.data = batch_data

            # Update the current_data_index for the next batch
            self.current_data_index = (self.current_data_index + self.batch_size) % len(self.data_array)


CHECKPOINT_DIR = './train_modeldata/'
if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

# Define the checkpoint callback function
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR, tensorboard_log=LOG_DIR, batch_size=10000)

env = TradingEnv(data) 
env = Monitor(env, LOG_DIR)

env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, n_stack=1)



# Define the hyperparameters
model_params = {
    'n_steps': 1440,
    'ent_coef': 0.0,
    'learning_rate': 0.0025,
    'gamma': 0.99,
    'gae_lambda': 0.95,
    'clip_range': 0.2,
    'vf_coef': 0.5,
    'max_grad_norm': 0.5,
}



#load the model
model = PPO.load("best_model_10000000", env=env, tensorboard_log=LOG_DIR, verbose=1, **model_params)


model.learn(total_timesteps=total_timesteps, callback=callback)



Logging to ./logs/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 393      |
|    ep_rew_mean     | -378     |
| time/              |          |
|    fps             | 245      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 1440     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 68.3        |
|    ep_rew_mean          | -59.1       |
| time/                   |             |
|    fps                  | 285         |
|    iterations           | 2           |
|    time_elapsed         | 10          |
|    total_timesteps      | 2880        |
| train/                  |             |
|    approx_kl            | 0.054956995 |
|    clip_fraction        | 0.0308      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.269      |
|    explained_variance   | -1.19e-07   |
|    l

In [None]:
# Import os for file path management
import os
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback
# Import Summary Writer for logging
from torch.utils.tensorboard import SummaryWriter
# Import t2xy for plotting
from stable_baselines3.common.results_plotter import load_results, ts2xy

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, tensorboard_log, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        self.tensorboard_log = tensorboard_log
        self.writer = SummaryWriter(tensorboard_log)

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

            # Log the mean reward to TensorBoard use t2xy
            x, y = ts2xy(load_results(self.tensorboard_log), 'timesteps')
            if len(x) > 0:
                self.writer.add_scalar('reward', y[-1], x[-1])
        return True            
    

CHECKPOINT_DIR = './train_modeldata/'
if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR, tensorboard_log=LOG_DIR)

env = TradingEnv(data) 
env = Monitor(env, LOG_DIR)

env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, n_stack=frame_skip_frequency)



# Define the hyperparameters
model_params = {
    'n_steps': 2048,
    'ent_coef': 0.0,
    'learning_rate': 0.0025,
    'gamma': 0.99,
    'gae_lambda': 0.95,
    'clip_range': 0.2,
    'vf_coef': 0.5,
    'max_grad_norm': 0.5,
}



model = PPO('MlpPolicy',env,tensorboard_log= LOG_DIR , batch_size=len(data)-1    , verbose=1, **model_params)

model.learn(total_timesteps=total_timesteps, callback=callback)


