In [1]:
# Import optuna for HPO
import optuna
# Import PPO for algos
from stable_baselines3 import PPO
# Evaluate Policy
from stable_baselines3.common.evaluation import evaluate_policy
# Import wrappers
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import os
LOG_DIR = './logs/'
if not os.path.exists(LOG_DIR):
    os.makedirs(LOG_DIR)

OPT_DIR = './opt_modeldata/'
if not os.path.exists(OPT_DIR):
    os.makedirs(OPT_DIR)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# import pandas as pd

# # Create a sample DataFrame
# df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})

# # Create a new column named 'Index' containing the original index values
# df['Index'] = df.index

# # Print the DataFrame to see the result
# print(df)


In [3]:
import argparse
from typing import Optional, Tuple
import gym
from gym import Env
from gym.spaces import Box, MultiBinary, Discrete
import numpy as np
import os
from stable_baselines3 import PPO , SAC
from stable_baselines3.common.callbacks import BaseCallback
import gym
from gym import spaces
from gym.wrappers import FrameStack
from collections import deque
import pandas as pd
import matplotlib.pyplot as plt
import uuid
from torch.utils.tensorboard import SummaryWriter

# Initialize SummaryWriter


# i) can buy (to sell in upper price(buy)) and hold for a time period and then close,
# ii) can buy (to sell in lower price(sell)) and hold for a time period and then close,
# iii) observe the market and do nothing
# 1) buy_open 2) sell_open 3) Close 4) hold 5) Do nothing
class TradingEnv(gym.Env):
    def __init__(self, data):
        super(TradingEnv, self).__init__()
        full_data = data

        self.batch_size = 1000
    
        start = np.random.randint(0, len(full_data) - self.batch_size)
        end = start + self.batch_size
        # select the data from df self.data
        current_data = full_data[start:end]

        current_data = current_data.reset_index(drop=True)

        
        self.data = current_data

        divergence = self.data['Close'].pct_change()

        # Now, if you want to calculate the divergence for the first 100 rows:
        divergence_first_100_rows = divergence.round(4)

        #WILL CHANGE max step later
        self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING = 100
        
        log_dir = "./logs"  # Change this to the desired log directory
        self.writer = SummaryWriter(log_dir)

        self.OWN_CURRENCY_AMOUNT = np.random.randint(10, 100)
        self.USED_CURRENCY_AMOUNT = np.random.randint(1000, 100000)
        self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT

        #loss tollaranace is the 5%  of own currency amount
        self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05
        
        #accumulated loss tollarance is the 10%  of own currency amount
        self.ACCUMULATED_LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.1
        
        self.pre_calculated_min_profit = 0.01

        self.MINIMUM_GAINS = self.OWN_CURRENCY_AMOUNT * 0.05
        self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE = 100
        self.WRONG_STEPS_TOLLARANCE = 10
        self.log_interval = 30  # Log every log_interval episodes   
        self.taking_wrong_action_count = 0
        self.auto_terminated_trades = 0
        self.buy_open_uuids = {}
        self.sell_open_uuids = {}
        self.pre_calculated_buy_open_uuids = {}
        self.pre_calculated_sell_open_uuids = {}        
        self.previous_trade_details = {}
        self.step_details = {}
        self.previous_reward = 0
        self.net_gains = 0
        self.current_step = 0
        self.current_price = data['Close'][self.current_step]
        # print("current price is ", self.current_price)
        # self.dataframe_index = data.index
        # print("dataframe index is ", self.dataframe_index,data.index)

        # Action space:
        # 0: buy
        # 1: sel
        # 2: buy close
        # 3: sell close
        # 4: hold
        # 5: do nothing
        self.action_space = spaces.Discrete(6)

        
        # Observation space:
        # 0: current price
        # 1: own currency amount
        # 2: used currency amount
        # 3: used leverage
        # 4: loss tollarance
        # 5: accumulated loss tollarance
        # 6: maximum amount of time for holding
        # 7: current buy open trades
        # 8: current sell open trades
        # 9: auto terminated trades
        # 10: maximum doing nothing steps tollarance
        # 11: current step
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12 ,), dtype=np.float32)
        self.pre_calculate_profitable_trades()
        self.pre_calculated_min_trades = self.check_min_loss_profitable_trades()
        self.pre_calculated_min_buy_trades = {}
        self.pre_calculated_min_sell_trades = {}
        self.pre_calculated_min_trades_to_dict()







    #convert pre_calculated_min_trades list to two seperate dict
    def pre_calculated_min_trades_to_dict(self):
        for trade in self.pre_calculated_min_trades:
            if trade['type'] == 'buy':
                self.pre_calculated_min_buy_trades[trade['open_index']] = trade
            else:
                self.pre_calculated_min_sell_trades[trade['open_index']] = trade




















    # def calculate_divergence(self):
    def calculate_current_profit_loss(self):
        # Calculate the current profit or loss
        current_profit_loss = 0
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            current_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            current_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        return current_profit_loss
        


    def calculate_buy_profit_loss(self, opening_price, closing_price, position_size, leverage):
        # Calculate profit or loss based on opening and closing prices, position size, and leverage
        profit_loss = ((closing_price - opening_price) / opening_price) * position_size * leverage
        return profit_loss
    
    def calculate_sell_profit_loss(self, opening_price, closing_price, position_size, leverage):
        # Calculate profit or loss based on opening and closing prices, position size, and leverage
        profit_loss = ((opening_price - closing_price) / opening_price) * position_size * leverage
        return profit_loss

    def pre_calculate_profitable_trades(self):
        #calculate frofitabal trades in the data
        data = self.data
        print('length of data is ', len(data))
        # for in a loop use calculate_buy_profit_loss and calculate_sell_profit_loss function to calculate the profit or loss
        # if profit is minimum 1% of own currency amount maximizing profit is goal  steps shoud stay between 10 to 100
        # if loss is minimum 1% of own currency amount minimizing loss is goal  steps shoud stay between 10 to 100
        # here want to pre deterine the profitable trades  to teach the model
        # for min interval 10 and max interval 100 in range of i and j calculate the profitable trades details with index rangeand store in a dict
        for i in range(5, 20):
            for j in range(i, 20):
                for k in range(len(data)):
                    if k + i < len(data):
                        open_price = data['Close'][k]
                        close_price = data['Close'][k + i]
                        position_size = self.OWN_CURRENCY_AMOUNT
                        leverage = self.USED_LEVERAGE

                        buy_profit_loss = self.calculate_buy_profit_loss(open_price, close_price, position_size, leverage)
                        buy_profit_loss_percentage = (buy_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100
                        sell_profit_loss = self.calculate_sell_profit_loss(open_price, close_price, position_size, leverage)
                        sell_profit_loss_percentage = (sell_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100


                        

                        if buy_profit_loss > self.pre_calculated_min_profit:
                            # Store the details of profitable buy trade
                            self.pre_calculated_buy_open_uuids[k] = {
                                'open_index': k,
                                'close_index': k + i,
                                'profit_loss': buy_profit_loss_percentage
                            }

                        if sell_profit_loss > self.pre_calculated_min_profit:
                            # Store the details of profitable sell trade
                            self.pre_calculated_sell_open_uuids[k] = {
                                'open_index': k,
                                'close_index': k + i,
                                'profit_loss': sell_profit_loss_percentage
                            }

        print("pre calculated buy open uuids are ", self.pre_calculated_buy_open_uuids)
        print("pre calculated sell open uuids are ", self.pre_calculated_sell_open_uuids)



    def check_min_loss_profitable_trades(self):

        start_index = 0
        end_index = len(self.data)




        profitable_trades = []

        for index in range(start_index, end_index):
            if index in self.pre_calculated_buy_open_uuids:
                trade_info = self.pre_calculated_buy_open_uuids[index]
                open_price = self.data['Close'][trade_info['open_index']]
                close_price = self.data['Close'][trade_info['close_index']]
                position_size = self.OWN_CURRENCY_AMOUNT
                leverage = self.USED_LEVERAGE

                buy_profit_loss = self.calculate_buy_profit_loss(open_price, close_price, position_size, leverage)
                buy_profit_loss_percentage = (buy_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100

                # Check if the current loss never went more than 4%
                if buy_profit_loss_percentage < 4.0:
                    profitable_trades.append({
                        'type': 'buy',
                        'open_index': trade_info['open_index'],
                        'close_index': trade_info['close_index'],
                        'profit_loss_percentage': buy_profit_loss_percentage
                    })

            if index in self.pre_calculated_sell_open_uuids:
                trade_info = self.pre_calculated_sell_open_uuids[index]
                open_price = self.data['Close'][trade_info['open_index']]
                close_price = self.data['Close'][trade_info['close_index']]
                position_size = self.OWN_CURRENCY_AMOUNT
                leverage = self.USED_LEVERAGE

                sell_profit_loss = self.calculate_sell_profit_loss(open_price, close_price, position_size, leverage)
                sell_profit_loss_percentage = (sell_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100

                # Check if the current loss never went more than 4%
                if sell_profit_loss_percentage < 4.0:
                    profitable_trades.append({
                        'type': 'sell',
                        'open_index': trade_info['open_index'],
                        'close_index': trade_info['close_index'],
                        'profit_loss_percentage': sell_profit_loss_percentage
                    })

        return profitable_trades




    def record_step_details(self, action):
        #record current action ,step ,price ,total trdes
        self.step_details[self.current_step] = {
            'action': action,
            'step': self.current_step,
            'price': self.current_price,
            'total_trades': len(self.buy_open_uuids) + len(self.sell_open_uuids)
        } 




    def Terminate_lossing_buy_open_trades(self):
        trades_to_close = []
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            # Use calculate_buy_profit_loss function to calculate the profit or loss
            if self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) < self.LOSS_TOLLARANCE:
                trades_to_close.append(buy_open_uuid)

        for buy_open_uuid in trades_to_close:
            self.close_trade(buy_open_uuid, self.buy_open_uuids, 'buy_open')
            self.auto_terminated_trades += 1


    def Terminate_lossing_sell_open_trades(self):
        trades_to_close = []
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            # Use calculate_sell_profit_loss function to calculate the profit or loss
            if self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) < self.LOSS_TOLLARANCE:
                trades_to_close.append(sell_open_uuid)

        for sell_open_uuid in trades_to_close:
            self.close_trade(sell_open_uuid, self.sell_open_uuids, 'sell_open')
            self.auto_terminated_trades += 1
            
    def Terminate_after_maximum_step(self):
        self.update_available_time_for_each_trade()
        trades_to_close = []
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            # Use calculate_buy_profit_loss function to calculate the profit or loss
            if trade_info['available_time'] == 0:
                trades_to_close.append(buy_open_uuid)

        for buy_open_uuid in trades_to_close:
            self.close_trade(buy_open_uuid, self.buy_open_uuids, 'buy_open')
            self.auto_terminated_trades += 1

        trades_to_close = []
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            # Use calculate_sell_profit_loss function to calculate the profit or loss
            if trade_info['available_time'] == 0:
                trades_to_close.append(sell_open_uuid)

        for sell_open_uuid in trades_to_close:
            self.close_trade(sell_open_uuid, self.sell_open_uuids, 'sell_open')
            self.auto_terminated_trades += 1




    def calculate_current_profit_loss(self):
        # Calculate the current profit or loss
        current_profit_loss = 0
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            current_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            current_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        return current_profit_loss

            

    def update_available_time_for_each_trade(self):
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            trade_info['available_time'] -= 1

        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            trade_info['available_time'] -= 1


    def close_trade(self, trade_uuid, trade_dict, trade_type):
        trade_dict[trade_uuid]['close_price'] = self.current_price
        if trade_type == 'buy_open':
            self.net_gains += trade_dict[trade_uuid]['close_price'] - trade_dict[trade_uuid]['open_price']
            self.current_buy_open_trades -= 1
            #add to previous trade details
            self.previous_trade_details[trade_uuid] = trade_dict[trade_uuid]
            #fix currntt amount of money and leverage keep the used currency amount same
            self.OWN_CURRENCY_AMOUNT += self.calculate_buy_profit_loss(trade_dict[trade_uuid]['open_price'], trade_dict[trade_uuid]['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
            self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT
            self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05



        elif trade_type == 'sell_open':
            self.net_gains += trade_dict[trade_uuid]['open_price'] - trade_dict[trade_uuid]['close_price']
            self.current_sell_open_trades -= 1
            #add to previous trade details
            self.previous_trade_details[trade_uuid] = trade_dict[trade_uuid]
            #fix currntt amount of money and leverage keep the used currency amount same
            self.OWN_CURRENCY_AMOUNT += self.calculate_sell_profit_loss(trade_dict[trade_uuid]['open_price'], trade_dict[trade_uuid]['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
            self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT
            self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05


        del trade_dict[trade_uuid]

    def calculate_total_profit_loss(self):
        #use previous and current trade details to calculate total profit loss
        current_profit_loss = 0
        for buy_open_uuid, trade_info in self.buy_open_uuids.items():
            current_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.sell_open_uuids.items():
            current_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], self.current_price, self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        previous_profit_loss = 0
        for buy_open_uuid, trade_info in self.previous_trade_details.items():
            previous_profit_loss += self.calculate_buy_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        for sell_open_uuid, trade_info in self.previous_trade_details.items():
            previous_profit_loss += self.calculate_sell_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE)
        total_profit_loss = current_profit_loss + previous_profit_loss
        return total_profit_loss



    

    def calculate_number_of_profitable_trades(self):
        profitable_trades = 0
        for buy_open_uuid, trade_info in self.previous_trade_details.items():
            if self.calculate_buy_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) > 0:
                profitable_trades += 1
        for sell_open_uuid, trade_info in self.previous_trade_details.items():
            if self.calculate_sell_profit_loss(trade_info['open_price'], trade_info['close_price'], self.OWN_CURRENCY_AMOUNT, self.USED_LEVERAGE) > 0:
                profitable_trades += 1
        return profitable_trades





    def calculate_number_of_total_trades_profotalbe_and_lossing_trades(self):
        total_trades = len(self.previous_trade_details) + self.current_buy_open_trades + self.current_sell_open_trades
        profitable_trades = self.calculate_number_of_profitable_trades()
        lossing_trades = total_trades - profitable_trades
        return total_trades, profitable_trades, lossing_trades


    

    def reset(self, new_data=None):
        self.current_step = 0
        self.current_price = self.data['Close'][self.current_step]
        self.current_buy_open_trades = 0
        self.current_sell_open_trades = 0
        self.auto_terminated_trades = 0
        self.buy_open_uuids = {}
        self.sell_open_uuids = {}
        self.previous_trade_details = {}
        self.previous_reward = 0
        self.net_gains = 0
        self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE = 100

        # Reset currency amounts
        self.OWN_CURRENCY_AMOUNT = np.random.randint(10, 100)
        self.USED_CURRENCY_AMOUNT = np.random.randint(1000, 100000)
        self.USED_LEVERAGE = self.USED_CURRENCY_AMOUNT / self.OWN_CURRENCY_AMOUNT

        #loss tollaranace is the 5%  of own currency amount
        self.LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.05

        #accumulated loss tollarance is the 10%  of own currency amount
        self.ACCUMULATED_LOSS_TOLLARANCE = self.OWN_CURRENCY_AMOUNT * 0.1







        # Reset action and observation spaces
        self.action_space = spaces.Discrete(6)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12 ,), dtype=np.float32)
        
        # Reset the environment and return the initial observation
        obs = self._next_observation()

        
        return obs
    
    def _next_observation(self):
        obs = np.array([
            self.current_price,
            self.OWN_CURRENCY_AMOUNT,
            self.USED_CURRENCY_AMOUNT,
            self.USED_LEVERAGE,
            self.LOSS_TOLLARANCE,
            self.ACCUMULATED_LOSS_TOLLARANCE,
            self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING,
            self.current_buy_open_trades,
            self.current_sell_open_trades,
            self.auto_terminated_trades,
            self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE,
            self.current_step

        ])
        
        return obs

    def step(self, action ):
        self.current_step += 1.
        self.current_price = self.data['Close'][self.current_step]
        self.Terminate_after_maximum_step()
        self.Terminate_lossing_buy_open_trades()
        self.Terminate_lossing_sell_open_trades()
        self.current_buy_open_trades = len(self.buy_open_uuids)
        self.current_sell_open_trades = len(self.sell_open_uuids)
        total_open_trades = self.current_buy_open_trades + self.current_sell_open_trades
        wrong_action = False
        do_nothing = False

        if action == 0:
            if total_open_trades == 0:
                self.buy_open_uuids[str(uuid.uuid4())] = {
                    'open_price': self.current_price,
                    'available_time': self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING
                }
            else:
                action = 4

                self.taking_wrong_action_count += 1
                wrong_action = True
        elif action == 1:
            if total_open_trades == 0:
                self.sell_open_uuids[str(uuid.uuid4())] = {
                    'open_price': self.current_price,
                    'available_time': self.MAXIMUM_AMOUNT_OF_TIME_FOR_HOLDING
                }
            else:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
        elif action == 2:
            if self.current_buy_open_trades > 0:
                buy_open_uuid = list(self.buy_open_uuids.keys())[0]
                self.close_trade(buy_open_uuid, self.buy_open_uuids, 'buy_open')
            elif self.current_sell_open_trades > 0:
                action = 4 # Hold
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 5 # Do nothing
                self.taking_wrong_action_count += 1
                wrong_action = True
                self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
                do_nothing = True
            
        elif action == 3:
            if self.current_sell_open_trades > 0:
                sell_open_uuid = list(self.sell_open_uuids.keys())[0]
                self.close_trade(sell_open_uuid, self.sell_open_uuids, 'sell_open')
            elif self.current_buy_open_trades > 0:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 5 # Do nothing
                self.taking_wrong_action_count += 1
                wrong_action = True
                self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
                do_nothing = True

        elif action == 4:
            if total_open_trades == 0:
                action = 5
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 4
        elif action == 5:
            if total_open_trades > 0:
                action = 4
                self.taking_wrong_action_count += 1
                wrong_action = True
            else:
                action = 5
                self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
                do_nothing = True
            
        else:
            action = 5
            self.taking_wrong_action_count += 1
            wrong_action = True
            self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE -= 1
            do_nothing = True



        obs = self._next_observation()
        done = self.done()
        reward = self.reward(wrong_action,do_nothing,done)

        info = {'action':action,'current_price': self.current_price, 'current_step': self.current_step, 'current_profit_loss': self.calculate_current_profit_loss(), 'total_profit_loss': self.calculate_total_profit_loss(), 'net_gains': self.net_gains, 'current_buy_open_trades': self.current_buy_open_trades, 'current_sell_open_trades': self.current_sell_open_trades, 'auto_terminated_trades': self.auto_terminated_trades, 'previous_reward': self.previous_reward, 'buy_open_uuids': self.buy_open_uuids, 'sell_open_uuids': self.sell_open_uuids, 'previous_trade_details': self.previous_trade_details}      

        #if done then add to tensorboard
        if done:
            self.log_to_tensorboard(reward)
        
        return obs, reward, done, info

    def reward(self,wrong_action:bool = False,do_nothing:bool = False,done:bool = False):
        # 1) reward is proportional to the profit or loss
        # 2) reward is proportional to the number of trades
        # 4) reward is inversely proportional to number of auto terminated trades
        # 5)panalty for doing nothing
        # 6) reward for closing profitable trades
        # 7) panalty for closing lossing trades
        # 8) extra reward for each 2% profit
        # 9) i step == 404 then means done and panalty
        # 10)  if current reward is more than previous reward then reward is positive else negative
        # 11) panalty for done
        total_profit_loss = self.calculate_total_profit_loss() 
        current_profit_loss = self.calculate_current_profit_loss()

        total_profit_loss_percentage = (total_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100
        current_profit_loss_percentage = (current_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100

        current_number_of_open_trade = self.current_buy_open_trades + self.current_sell_open_trades
        auto_terminated_trades = self.auto_terminated_trades
        total_trades, profitable_trades, lossing_trades = self.calculate_number_of_total_trades_profotalbe_and_lossing_trades()
        profit_loss_percentage = total_profit_loss / self.OWN_CURRENCY_AMOUNT            
        K = 0.1  # You can adjust this value based on the desired magnitude of the rewards/penalties

        reward = (
            K * total_profit_loss_percentage +                  # Reward proportional to profit or loss
            K * current_profit_loss_percentage +                # Reward proportional to profit or loss
            K *10* total_trades +                       # Reward proportional to the number of trades
            -K * auto_terminated_trades +            # Penalty inversely proportional to the number of auto-terminated trades
            -K * (100-self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE) +  # Penalty for doing nothing
            K *20* profitable_trades +                  # Reward for closing profitable trades
            -K * lossing_trades +               
                 
                 
                      # Penalty for closing losing trades
            K * (
                
                
                
                   profit_loss_percentage // 0.02)     # Extra reward for each 2% profit
        )
        reward= reward + K * (1 if reward > self.previous_reward else -1)  # Reward for improving or penalizing the reward

        if wrong_action:
            reward = -K * 10
            self.WRONG_STEPS_TOLLARANCE -= 1
        if done:
            reward = 0 

        self.previous_reward = reward
        
        return reward
    


    def done(self):
    #1)if current loss is greater than 3% of own currency amount
    #2)if auto terminated trades are greater than 3
    #3)if all comolative loss is greater than 10% of own currency amount
    #4)maximum doing nothing steps tollarance
    #5) IF 3 loss trades over 10 trades and 2 consecutive loss trades
    #6) if maximum doing nothing steps tollarance is 0
    #7) if total rades are 3 or more but cutent totatal profit is less than 10% of own currency amount
        current_profit_loss = self.calculate_current_profit_loss()
        # print("current loss is ", current_profit_loss)
        # print("loss tollarance is ", self.LOSS_TOLLARANCE)
        # print("accumulated loss tollarance is ", self.ACCUMULATED_LOSS_TOLLARANCE)


        autoterminated_trades = self.auto_terminated_trades
        cumulative_profit_loss = self.calculate_total_profit_loss()
        doing_nothing_steps_tollarance = self.MAXIMUM_DOING_NOTHING_STEPS_TOLLARANCE
        total_trades, profitable_trades, lossing_trades = self.calculate_number_of_total_trades_profotalbe_and_lossing_trades()


        wrong_steps_tollarance = self.WRONG_STEPS_TOLLARANCE

        
        if current_profit_loss < -self.LOSS_TOLLARANCE:
            
            return True
        elif autoterminated_trades > 2:
            
            return True
        elif cumulative_profit_loss < -self.ACCUMULATED_LOSS_TOLLARANCE:
            
            return True
        elif doing_nothing_steps_tollarance == 0:
            
            return True
        elif wrong_steps_tollarance == 0:
            
            return True
        elif lossing_trades > 2 and lossing_trades / total_trades > 0.3:
            
            return True
        elif total_trades >= 4 and cumulative_profit_loss < self.MINIMUM_GAINS:
            
            return True
        
        else:
            return False




    def render(self, mode='human'):
        profit_loss = self.calculate_total_profit_loss()
        print(f'Step: {self.current_step}')
        print(f'Price: {self.current_price}')
        print(f'Profit/Loss: {profit_loss}')
        print(f'Net Gains: {self.net_gains}')
        print(f'Current Buy Open Trades: {self.current_buy_open_trades}')
        print(f'Current Sell Open Trades: {self.current_sell_open_trades}')
        print(f'Auto Terminated Trades: {self.auto_terminated_trades}')
        print(f'Previous Reward: {self.previous_reward}')
        print(f'Action Space: {self.action_space}')
        print(f'Observation Space: {self.observation_space}')
        print(f'Buy Open Trades: {self.buy_open_uuids}')
        print(f'Sell Open Trades: {self.sell_open_uuids}')
        print(f'Previous Trade Details: {self.previous_trade_details}')


                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
    def log_to_tensorboard(self, reward):
        # Log relevant metrics to TensorBoard
    
        total_trades, profitable_trades, lossing_trades = self.calculate_number_of_total_trades_profotalbe_and_lossing_trades()

        total_profit_loss = self.calculate_total_profit_loss()
        current_profit_loss = self.calculate_current_profit_loss()

        total_profit_loss_percentage = (total_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100
        current_profit_loss_percentage = (current_profit_loss / self.OWN_CURRENCY_AMOUNT) * 100


        self.writer.add_scalar("Reward", reward, self.current_step)
        self.writer.add_scalar("Profit/Loss", total_profit_loss_percentage, self.current_step)
        self.writer.add_scalar("Auto Terminated Trades", self.auto_terminated_trades, self.current_step)
        self.writer.add_scalar("Previous Reward", self.previous_reward, self.current_step)
        self.writer.add_scalar("Current Step", self.current_step, self.current_step)
        self.writer.add_scalar("current profit loss", current_profit_loss_percentage, self.current_step)
        self.writer.add_scalar("total trades", total_trades, self.current_step)
        self.writer.add_scalar("number of profitable trades", profitable_trades, self.current_step)
        self.writer.add_scalar("number of lossing trades", lossing_trades, self.current_step)
        self.writer.flush()                                                                                                                                                                                                                                                                                                





In [4]:
import pandas as pd
data = pd.read_csv('Demo_4M.csv')

In [5]:
env = TradingEnv(data)


length of data is  1000
pre calculated buy open uuids are  {8: {'open_index': 8, 'close_index': 17, 'profit_loss': 4.104251169272702}, 9: {'open_index': 9, 'close_index': 17, 'profit_loss': 1.7100032928739899}, 10: {'open_index': 10, 'close_index': 24, 'profit_loss': 1.71033638124629}, 11: {'open_index': 11, 'close_index': 17, 'profit_loss': 4.446309750965041}, 12: {'open_index': 12, 'close_index': 17, 'profit_loss': 3.4201513847134364}, 19: {'open_index': 19, 'close_index': 24, 'profit_loss': 0.684117171029708}, 20: {'open_index': 20, 'close_index': 37, 'profit_loss': 0.3420875563406922}, 21: {'open_index': 21, 'close_index': 38, 'profit_loss': 1.368512495013152}, 27: {'open_index': 27, 'close_index': 37, 'profit_loss': 2.7368627107700845}, 30: {'open_index': 30, 'close_index': 37, 'profit_loss': 1.026280053301744}, 31: {'open_index': 31, 'close_index': 37, 'profit_loss': 0.3420875563406922}, 32: {'open_index': 32, 'close_index': 37, 'profit_loss': 1.710495730256113}, 40: {'open_index

In [9]:
env.pre_calculated_min_trades

[{'type': 'sell',
  'open_index': 7,
  'close_index': 31,
  'profit_loss_percentage': 0.5299382460013432},
 {'type': 'sell',
  'open_index': 9,
  'close_index': 26,
  'profit_loss_percentage': 0.5299968755839327},
 {'type': 'sell',
  'open_index': 10,
  'close_index': 26,
  'profit_loss_percentage': 1.0599839785895948},
 {'type': 'sell',
  'open_index': 11,
  'close_index': 26,
  'profit_loss_percentage': 1.0599839785895948},
 {'type': 'sell',
  'open_index': 12,
  'close_index': 29,
  'profit_loss_percentage': 2.1198897800744394},
 {'type': 'sell',
  'open_index': 13,
  'close_index': 26,
  'profit_loss_percentage': 1.0599839785895948},
 {'type': 'sell',
  'open_index': 15,
  'close_index': 29,
  'profit_loss_percentage': 2.649837795873202},
 {'type': 'sell',
  'open_index': 16,
  'close_index': 29,
  'profit_loss_percentage': 3.70970451428338},
 {'type': 'sell',
  'open_index': 18,
  'close_index': 30,
  'profit_loss_percentage': 1.0598862625990908},
 {'type': 'sell',
  'open_index':

In [6]:
import csv

def save_profitable_trades_to_csv(profitable_trades, file_prefix='profitable_trades'):
    # Create separate lists for 'buy' and 'sell' trades
    buy_trades = [trade for trade in profitable_trades if trade['type'] == 'buy']
    sell_trades = [trade for trade in profitable_trades if trade['type'] == 'sell']

    # Define CSV file paths for 'buy' and 'sell' trades
    buy_csv_path = f"{file_prefix}_buy.csv"
    sell_csv_path = f"{file_prefix}_sell.csv"

    # Save 'buy' trades to CSV
    with open(buy_csv_path, mode='w', newline='') as buy_file:
        fieldnames = ['type', 'open_index', 'close_index', 'profit_loss_percentage']
        writer = csv.DictWriter(buy_file, fieldnames=fieldnames)

        # Write the header
        writer.writeheader()

        # Write 'buy' trades data
        writer.writerows(buy_trades)

    # Save 'sell' trades to CSV
    with open(sell_csv_path, mode='w', newline='') as sell_file:
        fieldnames = ['type', 'open_index', 'close_index', 'profit_loss_percentage']
        writer = csv.DictWriter(sell_file, fieldnames=fieldnames)

        # Write the header
        writer.writeheader()

        # Write 'sell' trades data
        writer.writerows(sell_trades)

    print(f"Saved 'buy' trades to {buy_csv_path}")
    print(f"Saved 'sell' trades to {sell_csv_path}")

# Example usage:
profitable_trades = env.pre_calculated_min_trades
save_profitable_trades_to_csv(profitable_trades)


Saved 'buy' trades to profitable_trades_buy.csv
Saved 'sell' trades to profitable_trades_sell.csv


In [7]:
env.pre_calculate_profitable_trades()


length of data is  10000
pre calculated buy open uuids are  {0: {'open_index': 0, 'close_index': 99, 'profit_loss': 3.248779745037175}, 3: {'open_index': 3, 'close_index': 102, 'profit_loss': 8.353086461520896}, 4: {'open_index': 4, 'close_index': 103, 'profit_loss': 6.496845025625072}, 5: {'open_index': 5, 'close_index': 104, 'profit_loss': 6.497394599711966}, 6: {'open_index': 6, 'close_index': 105, 'profit_loss': 7.426222019183965}, 7: {'open_index': 7, 'close_index': 105, 'profit_loss': 5.1050957569169135}, 10: {'open_index': 10, 'close_index': 108, 'profit_loss': 2.320458813378418}, 11: {'open_index': 11, 'close_index': 110, 'profit_loss': 5.104836663224187}, 12: {'open_index': 12, 'close_index': 111, 'profit_loss': 2.784315052269514}, 33: {'open_index': 33, 'close_index': 132, 'profit_loss': 20.893142755589363}, 34: {'open_index': 34, 'close_index': 133, 'profit_loss': 25.076439967840596}, 35: {'open_index': 35, 'close_index': 134, 'profit_loss': 20.89349638217516}, 36: {'open_in

In [9]:
#save pre calculated trades in a csv file
pre_calculated_buy_open_uuids = env.pre_calculated_buy_open_uuids
pre_calculated_sell_open_uuids = env.pre_calculated_sell_open_uuids
pre_calculated_buy_open_uuids_df = pd.DataFrame.from_dict(pre_calculated_buy_open_uuids, orient='index')
pre_calculated_sell_open_uuids_df = pd.DataFrame.from_dict(pre_calculated_sell_open_uuids, orient='index')
pre_calculated_buy_open_uuids_df.to_csv('pre_calculated_buy30_open_uuids.csv')
pre_calculated_sell_open_uuids_df.to_csv('pre_calculated_sell30_open_uuids.csv')

In [21]:
env.current_price

1.0645

In [5]:
data.tail()

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume
4213434,26.11.2023 23:55:00.000,1.09394,1.09394,1.09392,1.09392,22.05
4213435,26.11.2023 23:56:00.000,1.09392,1.09393,1.0939,1.09392,199.8
4213436,26.11.2023 23:57:00.000,1.09393,1.09399,1.09392,1.09399,70.65
4213437,26.11.2023 23:58:00.000,1.09398,1.09399,1.09393,1.09396,140.37
4213438,26.11.2023 23:59:00.000,1.09399,1.09401,1.09395,1.09396,77.98


In [6]:
divergence = data['Close'].pct_change()

# Now, if you want to calculate the divergence for the first 100 rows:
divergence_first_100_rows = divergence.tail(100).round(4)

# Print or use the result as needed
print(divergence_first_100_rows)
#save to csv file

divergence_first_100_rows.to_csv('divergence_first_100_rows.csv', index=False)




4213339    0.0000
4213340    0.0000
4213341    0.0000
4213342    0.0000
4213343    0.0000
            ...  
4213434   -0.0000
4213435    0.0000
4213436    0.0001
4213437   -0.0000
4213438    0.0000
Name: Close, Length: 100, dtype: float64


In [7]:
divergence = data['Close'].pct_change()

# Now, if you want to calculate the divergence for the first 100 rows:
divergence_first_100_rows = divergence.tail(100).round(4)

# 


In [8]:
divergence_first_100_rows

4213339    0.0000
4213340    0.0000
4213341    0.0000
4213342    0.0000
4213343    0.0000
            ...  
4213434   -0.0000
4213435    0.0000
4213436    0.0001
4213437   -0.0000
4213438    0.0000
Name: Close, Length: 100, dtype: float64

In [9]:
# Define the frame skip frequency
frame_skip_frequency = 1

# Define the number of training steps
total_timesteps = 10000000

# Define the directory paths
LOG_DIR = './logs/'
OPT_DIR = './opt_modeldata/'
CHECKPOINT_DIR = './train_modeldata/'

# Create the directories if they don't exist
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(OPT_DIR, exist_ok=True)
os.makedirs(CHECKPOINT_DIR, exist_ok=True)


In [34]:
env = TradingEnv(data)
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)
for i in range(20):
    obs = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        env.render()
        print("obs=", obs, "reward=", reward, "done=", done)
        print(info)
        print("")


Action space: Discrete(6)
Observation space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf], (12,), float32)
Step: 1.0
Price: 1.10235
Profit/Loss: 0.0
Net Gains: 0
Current Buy Open Trades: 0
Current Sell Open Trades: 0
Auto Terminated Trades: 0
Previous Reward: -0.1
Action Space: Discrete(6)
Observation Space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf], (12,), float32)
Buy Open Trades: {}
Sell Open Trades: {'d8d32b93-e407-4892-9d3c-92a40b34895e': {'open_price': 1.10235, 'available_time': 100}}
Previous Trade Details: {}
obs= [1.10235000e+00 8.40000000e+01 1.92020000e+04 2.28595238e+02
 4.20000000e+00 8.40000000e+00 1.00000000e+02 0.00000000e+00
 0.00000000e+00 0.00000000e+00 1.00000000e+02 1.00000000e+00] reward= -0.1 done= False
{'action': 1, 'current_price': 1.10235, 'current_step': 1.0, 'current_profit_loss': 0.0, 'total_profit_loss': 0.0, '

In [11]:
# Import os for file path management
import os
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback
# Import Summary Writer for logging
from torch.utils.tensorboard import SummaryWriter
# Import t2xy for plotting
from stable_baselines3.common.results_plotter import load_results, ts2xy

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, tensorboard_log, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        self.tensorboard_log = tensorboard_log
        self.writer = SummaryWriter(tensorboard_log)

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)


    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

            # Log the mean reward to TensorBoard use t2xy
            x, y = ts2xy(load_results(self.tensorboard_log), 'timesteps')
            if len(x) > 0:
                self.writer.add_scalar('reward', y[-1], x[-1])
        return True            
    

CHECKPOINT_DIR = './train_modeldata/'
if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR, tensorboard_log=LOG_DIR)

env = TradingEnv(data) 
env = Monitor(env, LOG_DIR)

env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, n_stack=frame_skip_frequency)



# Define the hyperparameters
model_params = {
    'n_steps': 1440,
    'ent_coef': 0.0,
    'learning_rate': 0.0025,
    'gamma': 0.99,
    'gae_lambda': 0.95,
    'clip_range': 0.2,
    'vf_coef': 0.5,
    'max_grad_norm': 0.5,
}



#load the model
model = PPO('MlpPolicy',env,tensorboard_log= LOG_DIR , verbose=1, **model_params)


model.learn(total_timesteps=total_timesteps, callback=callback)





current data is                       Gmt time     Open     High      Low    Close  Volume
0     20.06.2016 14:10:00.000  1.13444  1.13444  1.13395  1.13414  418.33
1     20.06.2016 14:11:00.000  1.13415  1.13428  1.13401  1.13425  228.94
2     20.06.2016 14:12:00.000  1.13424  1.13425  1.13400  1.13418  189.67
3     20.06.2016 14:13:00.000  1.13420  1.13435  1.13412  1.13435  160.42
4     20.06.2016 14:14:00.000  1.13435  1.13455  1.13408  1.13451  336.63
...                       ...      ...      ...      ...      ...     ...
9995  27.06.2016 12:45:00.000  1.09770  1.09782  1.09723  1.09724  295.18
9996  27.06.2016 12:46:00.000  1.09722  1.09759  1.09709  1.09742  241.54
9997  27.06.2016 12:47:00.000  1.09742  1.09764  1.09741  1.09764  253.52
9998  27.06.2016 12:48:00.000  1.09764  1.09821  1.09764  1.09805  243.87
9999  27.06.2016 12:49:00.000  1.09805  1.09810  1.09768  1.09786  256.75

[10000 rows x 6 columns]
divergence is  0            NaN
1       0.000097
2      -0.000062
3  

We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=1440 and n_envs=1)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 15.4     |
|    ep_rew_mean     | 111      |
| time/              |          |
|    fps             | 300      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 1440     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.7        |
|    ep_rew_mean          | 25.2        |
| time/                   |             |
|    fps                  | 297         |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 2880        |
| train/                  |             |
|    approx_kl            | 0.007890106 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.78       |
|    explained_variance   | -6.88e-05   |
|    learning_rate        | 0.

KeyError: 10000.0