# Simple Hierarchical RL Based Model

## Imports

In [283]:
import os
import pandas as pd
import gym
from gym import spaces
import numpy as np
import random
import pickle

## Data Loaders

In [274]:
def load_data(tickers, daily_folder='data/processed', intraday_folder='data/processed_intra_day', synthetic_data_dir = 'data/synthetic_data/'):
    intraday_data = {}

    data_dir = 'data/processed/'
    intra_data_dir = 'data/processed_intra_day/'
    synthetic_data_dir = 'data/synthetic_data/'
    processed_data = {}
    synthetic_data = {}
 
    for file in os.listdir(data_dir):
        if file.endswith('_final.csv'):
            ticker = file.replace('_final.csv', '')
            df = pd.read_csv(os.path.join(data_dir, file))
            df.dropna(inplace=True)  # Drop any rows with NaN values
            processed_data[ticker] = df
            
    for file in os.listdir(intra_data_dir):
        if file.endswith('_intraday_processed.csv'):
            ticker = file.replace('_intraday_processed.csv', '')
            df = pd.read_csv(os.path.join(intra_data_dir, file))
            df.dropna(inplace=True)  # Drop any rows with NaN values
            intraday_data[ticker] = df
            
    for ticker_folder in os.listdir(synthetic_data_dir):
        ticker_folder_path = os.path.join(synthetic_data_dir, ticker_folder)
        
        if os.path.isdir(ticker_folder_path):
            for file in os.listdir(ticker_folder_path):
                if file.endswith('_synthetic.csv'):
                    ticker = file.split('_')[0]
                    file_path = os.path.join(ticker_folder_path, file)
                    df.dropna(inplace=True)  # Drop any rows with NaN values
                    synthetic_data[ticker] = pd.read_csv(file_path)

    return processed_data, intraday_data, synthetic_data


In [275]:
midcap_stocks = [
    "MSUMI.NS", "TORNTPOWER.NS", "GODREJPROP.NS", "SRF.NS",
    "APLAPOLLO.NS", "TVSMOTOR.NS", "LTIM.NS", "PAGEIND.NS",
    "AUROPHARMA.NS", "JINDALSTEL.NS", "BAJAJHLDNG.NS", "BATAINDIA.NS",
    "BHEL.NS", "CANBK.NS", "CHOLAFIN.NS", "CUB.NS", "DALMIASUG.NS",
    "ESCORTS.NS", "FEDERALBNK.NS", "FORTIS.NS", "GICRE.NS",
    "GMRINFRA.NS", "GNFC.NS", "GODREJAGRO.NS", "GRASIM.NS", "HAVELLS.NS",
    "HINDPETRO.NS", "INDHOTEL.NS", "JUBLFOOD.NS", "LICHSGFIN.NS",
    "M&MFIN.NS", "MANAPPURAM.NS", "MRF.NS", "NATCOPHARM.NS",
    "NCC.NS", "NMDC.NS", "OBEROIRLTY.NS", "PERSISTENT.NS", "PETRONET.NS",
    "RAMCOCEM.NS", "RBLBANK.NS", "SAIL.NS", "SUNTV.NS", "TATACOMM.NS",
    "TATAPOWER.NS", "THYROCARE.NS", "TORNTPHARM.NS", "TRENT.NS", "VOLTAS.NS",
    "WHIRLPOOL.NS", "YESBANK.NS", "ZEEL.NS", "ZYDUSWELL.NS",
    "ABBOTINDIA.NS", "ASHOKLEY.NS", "BALKRISIND.NS", "BEL.NS", "CONCOR.NS",
    "CROMPTON.NS", "DEEPAKNTR.NS", "DIXON.NS", "EMAMILTD.NS",
    "INDIAMART.NS", "IRCTC.NS", "JUBLPHARMA.NS", "LTTS.NS", "MFSL.NS",
    "METROPOLIS.NS", "OBEROIRLTY.NS", "PIIND.NS", "POLYCAB.NS", "RECLTD.NS",
    "SUPREMEIND.NS", "TATACONSUM.NS", "TV18BRDCST.NS", "VGUARD.NS",
    "VBL.NS", "VINATIORGA.NS", "ZENSARTECH.NS", "IDFCFIRSTB.NS",
    "SONACOMS.NS", "AMBUJACEM.NS", "GAIL.NS", "TATAELXSI.NS", "MAXHEALTH.NS",
    "LALPATHLAB.NS", "JSWENERGY.NS", "AARTIIND.NS", "ADANIGREEN.NS",
    "ABFRL.NS", "BANDHANBNK.NS", "BANKINDIA.NS", "BERGEPAINT.NS", "BOSCHLTD.NS",
    "CUMMINSIND.NS", "DMART.NS", "GLENMARK.NS", "GUJGASLTD.NS",
    "HAL.NS", "IIFLWAM.NS", "LICI.NS", "LUXIND.NS", "M&MFIN.NS",
    "NAUKRI.NS", "PHOENIXLTD.NS", "RAJESHEXPO.NS", "SHREECEM.NS",
    "TATACHEM.NS", "THERMAX.NS", "TTKPRESTIG.NS", "UJJIVANSFB.NS", "VAKRANGEE.NS"
]


In [276]:
processed_data, intraday_data, synthetic_data = load_data(midcap_stocks)

In [289]:
processed_data

{'AARTIIND.NS':             Open        High         Low       Close   Adj Close     Volume  \
 0      65.365448   67.552399   65.256104   67.200058   63.204884    69334.0   
 1      67.989784   71.197311   67.066406   70.820671   66.610252   256001.0   
 2      71.926292   74.769325   70.954315   74.125389   69.718498  1166533.0   
 3      74.356232   77.952553   74.356232   75.704849   71.204056   494431.0   
 4      75.583359   76.227287   72.910416   73.384254   69.021431   233540.0   
 ...          ...         ...         ...         ...         ...        ...   
 2259  599.000000  602.349976  593.200012  598.549988  597.683655  1586627.0   
 2260  600.000000  635.000000  599.400024  633.099976  632.183655  4809101.0   
 2261  636.000000  649.650024  626.250000  645.750000  644.815369  6230784.0   
 2262  645.000000  650.750000  638.099976  639.650024  638.724182  2551640.0   
 2263  648.000000  661.500000  643.049988  649.599976  648.659729  3941092.0   
 
           SMA_50      

## Defining the Environment

In [277]:
class TradingEnv(gym.Env):
    def __init__(self, data, ticker_list):
        super(TradingEnv, self).__init__()
        self.data = data
        self.ticker_list = ticker_list
        self.current_step = 0
        self.current_ticker = ticker_list[0]
        self.initial_portfolio_value = 1000000  # Initial portfolio value (e.g., $1,000,000)
        self.portfolio_value = self.initial_portfolio_value  # Start with the initial value
        self.position = 0  # Number of shares currently held

        # State space size depends on the number of features (e.g., OHLCV, indicators)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(len(data[self.current_ticker].columns),), dtype=np.float32)
        self.action_space = spaces.Discrete(3)  # 0 = Hold, 1 = Buy, 2 = Sell

    def reset(self):
        self.current_step = 0
        self.current_ticker = np.random.choice(self.ticker_list)  # Randomly select a ticker
        self.position = 0  # Reset position
        self.portfolio_value = self.initial_portfolio_value  # Reset portfolio value
        return self.data[self.current_ticker].iloc[self.current_step].values

    def step(self, action):
        done = False
        reward = 0

        # Market data for the current step
        current_price = self.data[self.current_ticker].iloc[self.current_step]['Close']

        # Implement trade logic and update the portfolio
        if action == 1:  # Buy
            reward = self._execute_buy(current_price)
        elif action == 2:  # Sell
            reward = self._execute_sell(current_price)
        else:
            reward = self._hold_position(current_price)

        self.current_step += 1
        if self.current_step >= len(self.data[self.current_ticker]):
            done = True
            self.current_step = len(self.data[self.current_ticker]) - 1  # Adjust to the last valid index

        next_state = self.data[self.current_ticker].iloc[self.current_step].values
        return next_state, reward, done, {}

    def _execute_buy(self, current_price):
        if self.position == 0:  # Buy only if not already holding
            self.position = self.portfolio_value // current_price  # Buy as many shares as possible
            self.portfolio_value -= self.position * current_price  # Update cash
        reward = self._calculate_profit(current_price)
        return reward

    def _execute_sell(self, current_price):
        if self.position > 0:  # Sell only if holding shares
            self.portfolio_value += self.position * current_price  # Sell all shares
            self.position = 0  # Reset position
        reward = self._calculate_profit(current_price)
        return reward

    def _hold_position(self, current_price):
        reward = self._calculate_profit(current_price)
        return reward

    def _calculate_profit(self, current_price):
        # Calculate unrealized profit if still holding
        if self.position > 0:
            total_value = self.portfolio_value + self.position * current_price
        else:
            total_value = self.portfolio_value
        return total_value - self.initial_portfolio_value  # Reward based on portfolio value increase


## High Level Agent

In [278]:
class HighLevelAgent:
    def __init__(self, env, total_budget=100000):
        self.env = env
        self.tickers = env.ticker_list
        self.total_budget = total_budget
        self.initial_portfolio_value = 1000000  # Initial portfolio value
        self.portfolio_value = self.initial_portfolio_value
        self.profit_target = 0.05  # 5% profit target
        self.take_profit_threshold = 0.03  # Take profit at 3% gain

    def choose_equities_and_allocate_budget(self):
        # Select multiple equities
        selected_tickers = self.select_tickers()

        # Allocate budget based on Sharpe ratio
        allocated_budgets = self.allocate_budget(selected_tickers)

        # Create and manage mid- and low-level agents for each selected equity
        agents = []
        for ticker, budget in zip(selected_tickers, allocated_budgets):
            mid_agent = MidLevelAgent()
            low_agent = LowLevelAgent()
            agents.append((ticker, budget, mid_agent, low_agent))

        return agents

    def select_tickers(self, num_tickers=3):
        # Select `num_tickers` tickers based on the evaluation score
        scores = {ticker: self.evaluate_ticker(ticker) for ticker in self.tickers}
        sorted_tickers = sorted(scores, key=scores.get, reverse=True)
        selected_tickers = sorted_tickers[:num_tickers]
        return selected_tickers

    def evaluate_ticker(self, ticker):
        data = self.env.data[ticker]
        returns = data['Close'].pct_change().dropna()
        avg_return = returns.mean()
        volatility = returns.std()
        sharpe_ratio = avg_return / volatility
        max_drawdown = (data['Close'].max() - data['Close'].min()) / data['Close'].max()

        # Combine metrics into a single score
        score = sharpe_ratio - max_drawdown
        return score

    def allocate_budget(self, selected_tickers):
        # Calculate the Sharpe ratio for each selected ticker
        sharpe_ratios = {ticker: self.calculate_sharpe_ratio(ticker) for ticker in selected_tickers}

        # Total sum of all Sharpe ratios
        total_sharpe = sum(sharpe_ratios.values())

        # Allocate budget proportionally to the Sharpe ratio of each equity
        allocated_budgets = [(sharpe_ratios[ticker] / total_sharpe) * self.total_budget for ticker in selected_tickers]

        return allocated_budgets

    def calculate_sharpe_ratio(self, ticker):
        data = self.env.data[ticker]
        returns = data['Close'].pct_change().dropna()
        avg_return = returns.mean()
        volatility = returns.std()

        # Calculate Sharpe ratio (assuming a risk-free rate of 0 for simplicity)
        sharpe_ratio = avg_return / volatility
        return sharpe_ratio

    def calculate_reward(self, current_ticker):
        # Calculate portfolio return
        self.portfolio_value = self.get_portfolio_value()
        portfolio_return = (self.portfolio_value - self.initial_portfolio_value) / self.initial_portfolio_value

        # Calculate drawdown
        rolling_max = self.env.data[current_ticker]['Close'].cummax()
        drawdown = (rolling_max - self.env.data[current_ticker]['Close']) / rolling_max
        max_drawdown = drawdown.max()

        # Calculate risk-adjusted returns (Sharpe and Sortino ratios)
        returns = self.env.data[current_ticker]['Close'].pct_change().dropna()
        sharpe_ratio = returns.mean() / returns.std()
        sortino_ratio = returns.mean() / returns[returns < 0].std()

        # Consistency factor (standard deviation of returns)
        consistency = -returns.std()  # Negative because lower volatility (std) is preferred

        # Profit target bonus/penalty
        profit_target = 0.05  # 5% profit target
        profit_bonus = 1 if portfolio_return >= profit_target else -0.5

        # Calculate final reward with stronger penalties for drawdowns and rewards for consistency
        reward = (portfolio_return 
                  - 0.7 * max_drawdown 
                  + 0.4 * sharpe_ratio 
                  + 0.3 * sortino_ratio 
                  + 0.3 * consistency 
                  + profit_bonus)

        return reward

    
    def take_profit(self, current_price, entry_price):
        # Implementing a simple take profit mechanism
        gain = (current_price - entry_price) / entry_price
        if gain >= self.take_profit_threshold:
            return True  # Signal to take profit
        return False

    
    def get_portfolio_value(self):
        # This method should retrieve the most recent portfolio value from the environment or relevant agent
        return self.env.portfolio_value


## Mid Level Agent

In [279]:
class MidLevelAgent:
    def __init__(self):
        self.strategies = {
            'moving_average_crossover': self.moving_average_crossover,
            'rsi_reversion': self.rsi_reversion,
            'macd_trend_following': self.macd_trend_following,
            'bollinger_bands': self.bollinger_bands,
            'adx_trend_strength': self.adx_trend_strength,
            'stochastic_oscillator': self.stochastic_oscillator,
            'volume_price_trend': self.volume_price_trend,
            'cci_correction': self.cci_correction,
            'ema_rsi_combo': self.ema_rsi_combo,
            'ichimoku_cloud': self.ichimoku_cloud,
            'parabolic_sar': self.parabolic_sar,
            'momentum': self.momentum,
            'roc_trend': self.roc_trend,
            'williams_percent_r': self.williams_percent_r,
            'keltner_channel': self.keltner_channel,
            'atr_volatility': self.atr_volatility,
            'vwap_mean_reversion': self.vwap_mean_reversion,
            'trix_trend_following': self.trix_trend_following,
            'donchian_channel': self.donchian_channel,
            'pivot_point_support_resistance': self.pivot_point_support_resistance
        }
        
        self.strategy_performance = {name: 0 for name in self.strategies}
        self.epsilon = 0.5
        self.min_epsilon = 0.1
        self.decay_rate = 0.995
        self.recent_rewards = []
        
    
    def update_strategy_performance(self, strategy_name, reward, decay=0.99):
        # Update the performance score of the strategy based on the received reward
        self.strategy_performance[strategy_name] = (
            self.strategy_performance[strategy_name] * decay + reward
        )

    def choose_strategy(self, state):
        # Adjust epsilon based on recent performance
        if len(self.recent_rewards) > 50:
            avg_recent_reward = sum(self.recent_rewards[-50:]) / 50
            if avg_recent_reward < 0:
                self.epsilon = min(1.0, self.epsilon * 1.05)  # Increase exploration
            else:
                self.epsilon = max(self.min_epsilon, self.epsilon * 0.95)  # Increase exploitation

        if random.random() < self.epsilon:
            chosen_strategy = random.choice(list(self.strategies.keys()))
        else:
            chosen_strategy = max(self.strategy_performance, key=self.strategy_performance.get)

        self.epsilon = max(self.min_epsilon, self.epsilon * self.decay_rate)
        return chosen_strategy


    def moving_average_crossover(self, state):
        if len(state) < 200:  # Ensure we have enough data points
            return 0  # Hold if not enough data

        short_ma = state['SMA_50']
        long_ma = state['SMA_200']
        if short_ma.iloc[-1] > long_ma.iloc[-1]:
            return 1  # Signal to buy
        elif short_ma.iloc[-1] < long_ma.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def rsi_reversion(self, state):
        if len(state) < 14:  # Ensure enough data points for RSI calculation
            return 0  # Hold if not enough data

        rsi = state['RSI']
        if rsi.iloc[-1] < 30:
            return 1  # Signal to buy
        elif rsi.iloc[-1] > 70:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def macd_trend_following(self, state):
        if len(state) < 26:  # Ensure enough data points for MACD calculation
            return 0  # Hold if not enough data

        macd = state['MACD']
        signal = state['MACD_Signal']
        if macd.iloc[-1] > signal.iloc[-1]:
            return 1  # Signal to buy
        elif macd.iloc[-1] < signal.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def bollinger_bands(self, state):
        if len(state) < 20:  # Ensure enough data points for Bollinger Bands
            return 0  # Hold if not enough data

        upper_band = state['Bollinger_Upper']
        lower_band = state['Bollinger_Lower']
        close = state['Close']
        if close.iloc[-1] < lower_band.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] > upper_band.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def adx_trend_strength(self, state):
        if len(state) < 14:  # Ensure enough data points for ADX calculation
            return 0  # Hold if not enough data

        adx = state['ADX']
        if adx.iloc[-1] > 25:
            return 1  # Signal to trade based on trend
        return 0  # Signal to hold

    def stochastic_oscillator(self, state):
        if len(state) < 14:  # Ensure enough data points for Stochastic Oscillator
            return 0  # Hold if not enough data

        stoch_rsi = state['Stoch_RSI']
        if stoch_rsi.iloc[-1] < 0.2:
            return 1  # Signal to buy
        elif stoch_rsi.iloc[-1] > 0.8:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def volume_price_trend(self, state):
        if len(state) < 14:  # Ensure enough data points
            return 0  # Hold if not enough data

        vpt = state['Volume_Price_Trend']
        if vpt.iloc[-1] > 0:
            return 1  # Signal to buy
        elif vpt.iloc[-1] < 0:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def cci_correction(self, state):
        if len(state) < 20:  # Ensure enough data points for CCI calculation
            return 0  # Hold if not enough data

        cci = state['CCI']
        if cci.iloc[-1] < -100:
            return 1  # Signal to buy
        elif cci.iloc[-1] > 100:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def ema_rsi_combo(self, state):
        if len(state) < 50:  # Ensure enough data points for EMA and RSI
            return 0  # Hold if not enough data

        ema = state['EMA_50']
        rsi = state['RSI']
        if rsi.iloc[-1] < 30 and state['Close'].iloc[-1] > ema.iloc[-1]:
            return 1  # Signal to buy
        elif rsi.iloc[-1] > 70 and state['Close'].iloc[-1] < ema.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def ichimoku_cloud(self, state):
        if len(state) < 52:  # Ensure enough data points for Ichimoku Cloud
            return 0  # Hold if not enough data

        span_a = state['Ichimoku_A']
        span_b = state['Ichimoku_B']
        close = state['Close']
        if close.iloc[-1] > span_a.iloc[-1] and close.iloc[-1] > span_b.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] < span_a.iloc[-1] and close.iloc[-1] < span_b.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def parabolic_sar(self, state):
        if len(state) < 14:  # Ensure enough data points
            return 0  # Hold if not enough data

        sar = state['KAMA']  # Assume KAMA is used for parabolic SAR substitute
        close = state['Close']
        if close.iloc[-1] > sar.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] < sar.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def momentum(self, state):
        if len(state) < 14:  # Ensure enough data points
            return 0  # Hold if not enough data

        momentum = state['ROC']
        if momentum.iloc[-1] > 0:
            return 1  # Signal to buy
        elif momentum.iloc[-1] < 0:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def roc_trend(self, state):
        if len(state) < 14:  # Ensure enough data points
            return 0  # Hold if not enough data

        roc = state['ROC']
        if roc.iloc[-1] > 0:
            return 1  # Signal to buy
        elif roc.iloc[-1] < 0:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def williams_percent_r(self, state):
        if len(state) < 14:  # Ensure enough data points for Williams %R
            return 0  # Hold if not enough data

        will_r = state['Williams_R']
        if will_r.iloc[-1] < -80:
            return 1  # Signal to buy
        elif will_r.iloc[-1] > -20:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def keltner_channel(self, state):
        if len(state) < 20:  # Ensure enough data points for Keltner Channel
            return 0  # Hold if not enough data

        upper_band = state['Keltner_Channel_Upper']
        lower_band = state['Keltner_Channel_Lower']
        close = state['Close']
        if close.iloc[-1] < lower_band.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] > upper_band.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def atr_volatility(self, state):
        if len(state) < 14:  # Ensure enough data points for ATR
            return 0  # Hold if not enough data

        atr = state['ATR']
        if atr.iloc[-1] > atr.mean():
            return 1  # Signal to buy (volatility breakout)
        return 0  # Signal to hold

    def vwap_mean_reversion(self, state):
        if len(state) < 14:  # Ensure enough data points for VWAP
            return 0  # Hold if not enough data

        vwap = state['VWAP']
        close = state['Close']
        if close.iloc[-1] < vwap.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] > vwap.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def trix_trend_following(self, state):
        if len(state) < 15:  # Ensure enough data points for TRIX
            return 0  # Hold if not enough data

        trix = state['TRIX']
        if trix.iloc[-1] > 0:
            return 1  # Signal to buy
        elif trix.iloc[-1] < 0:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def donchian_channel(self, state):
        if len(state) < 20:  # Ensure enough data points for Donchian Channel
            return 0  # Hold if not enough data

        upper_band = state['Donchian_Channel_Upper']
        lower_band = state['Donchian_Channel_Lower']
        close = state['Close']
        if close.iloc[-1] > upper_band.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] < lower_band.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def pivot_point_support_resistance(self, state):
        if len(state) < 20:  # Ensure enough data points
            return 0  # Hold if not enough data

        pivot = state['Bollinger_Mid']  # Using Bollinger Mid as a pivot substitute
        close = state['Close']
        if close.iloc[-1] > pivot.iloc[-1]:
            return 1  # Signal to buy
        elif close.iloc[-1] < pivot.iloc[-1]:
            return -1  # Signal to sell
        return 0  # Signal to hold

    def calculate_reward(self, profit, transaction_costs):
        # Profit per trade minus transaction costs
        net_profit = profit - transaction_costs
        
        # Factor in a time penalty to encourage quick execution of profitable trades
        time_penalty = 0.01  # Arbitrary small penalty, increase if trades take too long

        # Final reward combines net profit with a penalty for trade duration
        reward = net_profit - time_penalty
        return reward


## Low Level Agent

In [280]:
class LowLevelAgent:
    def __init__(self, strategy=None):
        self.strategy = strategy

    def decide_action(self, state):
        if self.strategy:
            signal = self.strategy(state)
            if signal == 1:
                return 1  # Buy
            elif signal == -1:
                return 2  # Sell
        return 0  # Hold
    
    def position_sizing(self, budget, volatility):
        max_risk = 0.02  # Risk 2% of the portfolio on a single trade
        position_size = (budget * max_risk) / volatility
        return min(position_size, budget)  # Ensure position size doesn't exceed the budget


    def calculate_reward(self, realized_profit, slippage, spread):
        # Reward for minimizing loss per trade and reducing slippage and spread
        trade_costs = slippage + spread

        # Net profit after accounting for trade costs
        net_profit = realized_profit - trade_costs

        # If net profit is negative, emphasize the loss to encourage better trade timing
        if net_profit < 0:
            loss_penalty = abs(net_profit) * 1.5  # Penalize losses more heavily
            reward = net_profit - loss_penalty
        else:
            reward = net_profit

        return reward

## Training

In [281]:
# Initialize the environment
env = TradingEnv(data=processed_data, ticker_list=list(processed_data.keys()))

# Initialize the high-level agent with a total budget (e.g., $1,000,000)
total_budget = 1000000
high_agent = HighLevelAgent(env, total_budget)

# Training loop
for episode in range(1000):  # Number of episodes
    env.reset()
    agents = high_agent.choose_equities_and_allocate_budget()

    for ticker, budget, mid_agent, low_agent in agents:
        done = False
        entry_price = None  # Track entry price for profit-taking

        while not done:
            # Ensure we have enough data points
            if env.current_step < 50:  # or another suitable number based on your rolling window size
                env.current_step += 1
                continue

            state = env.data[ticker].iloc[max(0, env.current_step - 50):env.current_step + 1]

            if state.empty or len(state['Close']) < 1:
                # Skip the rest of the loop if the state is not valid
                done = True
                continue

            # Mid-Level Agent: Choose strategy using epsilon-greedy
            strategy_name = mid_agent.choose_strategy(state)
            strategy_function = mid_agent.strategies[strategy_name]
            low_agent.strategy = strategy_function

            # Determine position size based on volatility
            volatility = state['Close'].std()
            position_size = low_agent.position_sizing(budget, volatility)

            action = low_agent.decide_action(state)
            current_price = state['Close'].iloc[-1]

            if entry_price is None and action == 'buy':
                entry_price = current_price

            if entry_price and high_agent.take_profit(current_price, entry_price):
                action = 'sell'  # Override action to sell and take profit
                entry_price = None  # Reset entry price after taking profit

            next_state, trade_profit, done, _ = env.step(action)

            slippage = 0.001 * env.portfolio_value
            spread = 0.001
            low_reward = low_agent.calculate_reward(trade_profit, slippage, spread)

            transaction_costs = 0.001 * budget
            mid_reward = mid_agent.calculate_reward(trade_profit, transaction_costs)

            mid_agent.update_strategy_performance(strategy_name, mid_reward)

            state = next_state

        high_reward = high_agent.calculate_reward(ticker)
        print(f"Episode {episode} - High-Level Reward: {high_reward}")




Episode 0 - High-Level Reward: -1.4835068492484864
Episode 0 - High-Level Reward: -1.5521542112808988
Episode 0 - High-Level Reward: 2.2068822060173505
Episode 1 - High-Level Reward: -0.6610039568572816
Episode 1 - High-Level Reward: -0.7856188968239592
Episode 1 - High-Level Reward: -1.777775762255812
Episode 2 - High-Level Reward: -1.483526673642712
Episode 2 - High-Level Reward: -1.5525738195496361
Episode 2 - High-Level Reward: -1.7778092937919139
Episode 3 - High-Level Reward: -0.4678620769096189
Episode 3 - High-Level Reward: -0.5336125924454493
Episode 3 - High-Level Reward: -0.8348239969855784
Episode 4 - High-Level Reward: -1.483506724076825
Episode 4 - High-Level Reward: 1.2345941144217683
Episode 4 - High-Level Reward: 1.3679189069412705
Episode 5 - High-Level Reward: -1.4834481713233765
Episode 5 - High-Level Reward: -1.5524953172303007
Episode 5 - High-Level Reward: 1.0070853860405438
Episode 6 - High-Level Reward: -1.4834499565025148
Episode 6 - High-Level Reward: -1.5525

Episode 57 - High-Level Reward: -1.4835293340564546
Episode 57 - High-Level Reward: -1.5525709436531827
Episode 57 - High-Level Reward: -1.7778174106465823
Episode 58 - High-Level Reward: -1.4763244876762207
Episode 58 - High-Level Reward: -1.5509166833878325
Episode 58 - High-Level Reward: -0.9027854535285476
Episode 59 - High-Level Reward: -0.7009676234412965
Episode 59 - High-Level Reward: -1.5525398306313323
Episode 59 - High-Level Reward: -0.903668706156874
Episode 60 - High-Level Reward: -0.4406335717826661
Episode 60 - High-Level Reward: -1.5525836845475005
Episode 60 - High-Level Reward: 1.0985966114738939
Episode 61 - High-Level Reward: -1.4834453232475098
Episode 61 - High-Level Reward: -1.5525300827408597
Episode 61 - High-Level Reward: 1.083142076531267
Episode 62 - High-Level Reward: -1.4833385476127445
Episode 62 - High-Level Reward: 1.9070139607772063
Episode 62 - High-Level Reward: 1.1218955620354172
Episode 63 - High-Level Reward: 1.0695170194763362
Episode 63 - High-L

Episode 113 - High-Level Reward: -1.483509051007825
Episode 113 - High-Level Reward: -1.552539823886948
Episode 113 - High-Level Reward: -1.7777603588820932
Episode 114 - High-Level Reward: -1.4830756054130367
Episode 114 - High-Level Reward: -1.552118651344375
Episode 114 - High-Level Reward: 0.7985794964226248
Episode 115 - High-Level Reward: -1.4834637573142828
Episode 115 - High-Level Reward: -1.5525423191376497
Episode 115 - High-Level Reward: -0.7596076037627704
Episode 116 - High-Level Reward: -0.9274991780143547
Episode 116 - High-Level Reward: -1.5524897810440816
Episode 116 - High-Level Reward: -1.1701503393622872
Episode 117 - High-Level Reward: 1.5453989522728153
Episode 117 - High-Level Reward: -1.5524990586090843
Episode 117 - High-Level Reward: 1.4892410344780447
Episode 118 - High-Level Reward: 1.1855406227096736
Episode 118 - High-Level Reward: -1.5525194584999846
Episode 118 - High-Level Reward: -1.777773579844618
Episode 119 - High-Level Reward: -1.4829585688529785
E

Episode 167 - High-Level Reward: 1.4095388314155763
Episode 167 - High-Level Reward: -1.5525771233750147
Episode 167 - High-Level Reward: 1.0219036392067429
Episode 168 - High-Level Reward: -0.4706207068229493
Episode 168 - High-Level Reward: -1.552320178718643
Episode 168 - High-Level Reward: -1.777633085181379
Episode 169 - High-Level Reward: -1.4834451361366092
Episode 169 - High-Level Reward: -1.5524922820435334
Episode 169 - High-Level Reward: 0.8503048699348924
Episode 170 - High-Level Reward: -1.4832116783500489
Episode 170 - High-Level Reward: -1.552258824256973
Episode 170 - High-Level Reward: -1.7772116147223953
Episode 171 - High-Level Reward: -1.4833420105949218
Episode 171 - High-Level Reward: -1.55247463654335
Episode 171 - High-Level Reward: -1.7776436462928293
Episode 172 - High-Level Reward: 1.2332368613762081
Episode 172 - High-Level Reward: -1.552567784530716
Episode 172 - High-Level Reward: -1.7777563779479508
Episode 173 - High-Level Reward: 1.4197800250190917
Epis

Episode 222 - High-Level Reward: -1.4692394837699707
Episode 222 - High-Level Reward: -1.542629446083145
Episode 222 - High-Level Reward: -1.7719175785285475
Episode 223 - High-Level Reward: -1.483497006219464
Episode 223 - High-Level Reward: -1.5524965916939542
Episode 223 - High-Level Reward: -1.777780535628462
Episode 224 - High-Level Reward: -1.4830150632987789
Episode 224 - High-Level Reward: -1.5519857234879297
Episode 224 - High-Level Reward: -1.7770438646003246
Episode 225 - High-Level Reward: -1.4834466715675176
Episode 225 - High-Level Reward: -1.552541484497147
Episode 225 - High-Level Reward: -0.8228747105247032
Episode 226 - High-Level Reward: -1.483517607030469
Episode 226 - High-Level Reward: -1.5524275749588778
Episode 226 - High-Level Reward: -1.7777443968574063
Episode 227 - High-Level Reward: -0.44395406583173697
Episode 227 - High-Level Reward: 1.0004207882613387
Episode 227 - High-Level Reward: 0.8645100524528989
Episode 228 - High-Level Reward: -1.4834303855033693

Episode 277 - High-Level Reward: -1.483524744985181
Episode 277 - High-Level Reward: -1.552547013145523
Episode 277 - High-Level Reward: -1.7778041969825271
Episode 278 - High-Level Reward: -1.4833396948295408
Episode 278 - High-Level Reward: -0.8621348938217921
Episode 278 - High-Level Reward: -1.7775532154914373
Episode 279 - High-Level Reward: -0.5453182256217775
Episode 279 - High-Level Reward: -1.5524560126419829
Episode 279 - High-Level Reward: -1.7775621525794503
Episode 280 - High-Level Reward: -0.5069792625022709
Episode 280 - High-Level Reward: -1.552582390869217
Episode 280 - High-Level Reward: 0.876422595604753
Episode 281 - High-Level Reward: -1.4834870121093569
Episode 281 - High-Level Reward: -0.5433921016045379
Episode 281 - High-Level Reward: -1.7777604533187392
Episode 282 - High-Level Reward: -1.4834394213004891
Episode 282 - High-Level Reward: -1.5524865672074133
Episode 282 - High-Level Reward: -1.7777340695334922
Episode 283 - High-Level Reward: -1.483367494695264

Episode 329 - High-Level Reward: -1.4834173165336426
Episode 329 - High-Level Reward: -0.5590923315201568
Episode 329 - High-Level Reward: 0.9178048214348311
Episode 330 - High-Level Reward: -1.483502534215528
Episode 330 - High-Level Reward: -1.5525363722611238
Episode 330 - High-Level Reward: -1.7776221553260334
Episode 331 - High-Level Reward: -1.4834868589835937
Episode 331 - High-Level Reward: -1.552541123878555
Episode 331 - High-Level Reward: -1.3247894797126298
Episode 332 - High-Level Reward: -1.4835327132011231
Episode 332 - High-Level Reward: -1.5521154794083403
Episode 332 - High-Level Reward: 0.7961176863640306
Episode 333 - High-Level Reward: -1.483527489537793
Episode 333 - High-Level Reward: -1.5525328845597075
Episode 333 - High-Level Reward: -1.777797271971846
Episode 334 - High-Level Reward: -0.8215285665794192
Episode 334 - High-Level Reward: -1.5524045030747222
Episode 334 - High-Level Reward: -1.777773233229781
Episode 335 - High-Level Reward: -1.483475406850415
E

Episode 382 - High-Level Reward: 1.3052655408882323
Episode 382 - High-Level Reward: 1.2289333760604098
Episode 382 - High-Level Reward: -1.777331443579817
Episode 383 - High-Level Reward: -1.4719648685355957
Episode 383 - High-Level Reward: -1.53479507303627
Episode 383 - High-Level Reward: -1.7638449867316726
Episode 384 - High-Level Reward: -1.4834942006851013
Episode 384 - High-Level Reward: -1.5525288867035674
Episode 384 - High-Level Reward: -1.7777974283782487
Episode 385 - High-Level Reward: -1.4834383371787845
Episode 385 - High-Level Reward: -1.5525421651230622
Episode 385 - High-Level Reward: -1.777765356238509
Episode 386 - High-Level Reward: -0.649981806249219
Episode 386 - High-Level Reward: -1.5525009713516997
Episode 386 - High-Level Reward: -1.7776870918265826
Episode 387 - High-Level Reward: -1.4830261767020996
Episode 387 - High-Level Reward: -1.552018405860977
Episode 387 - High-Level Reward: -1.7773834070807937
Episode 388 - High-Level Reward: -1.4833801362968262
E

Episode 437 - High-Level Reward: 1.1984741646675292
Episode 437 - High-Level Reward: -1.551654332801895
Episode 437 - High-Level Reward: -1.7577118578254227
Episode 438 - High-Level Reward: -1.4834953167777833
Episode 438 - High-Level Reward: -1.5525205001297757
Episode 438 - High-Level Reward: -1.7777592390281205
Episode 439 - High-Level Reward: -1.4834819429221926
Episode 439 - High-Level Reward: -1.5525412391587068
Episode 439 - High-Level Reward: -0.8026800823585039
Episode 440 - High-Level Reward: -1.4835340761619384
Episode 440 - High-Level Reward: -1.5525063145829008
Episode 440 - High-Level Reward: -1.7777693899184706
Episode 441 - High-Level Reward: -1.4833228302055177
Episode 441 - High-Level Reward: -1.552279273841934
Episode 441 - High-Level Reward: -1.7774206346808914
Episode 442 - High-Level Reward: -0.5340426850181396
Episode 442 - High-Level Reward: -0.6505929162827295
Episode 442 - High-Level Reward: -0.7813615044471263
Episode 443 - High-Level Reward: -1.4833101395011

Episode 492 - High-Level Reward: -1.483374684911328
Episode 492 - High-Level Reward: -1.5524563308182522
Episode 492 - High-Level Reward: -1.7776412389518264
Episode 493 - High-Level Reward: -1.483410988042432
Episode 493 - High-Level Reward: 1.0956810801619719
Episode 493 - High-Level Reward: -1.7771783718635095
Episode 494 - High-Level Reward: 1.1439375402397338
Episode 494 - High-Level Reward: -1.552534657058792
Episode 494 - High-Level Reward: -1.7778112451125625
Episode 495 - High-Level Reward: 1.1053353252128777
Episode 495 - High-Level Reward: -1.5524878444367223
Episode 495 - High-Level Reward: -1.7777364835883622
Episode 496 - High-Level Reward: -1.4832713034721186
Episode 496 - High-Level Reward: -1.552441899269179
Episode 496 - High-Level Reward: -1.7775555268622871
Episode 497 - High-Level Reward: -1.4834862856498536
Episode 497 - High-Level Reward: -1.5522393141861723
Episode 497 - High-Level Reward: -0.9174938638068678
Episode 498 - High-Level Reward: -1.4834172128654297


Episode 547 - High-Level Reward: -1.4834769250236328
Episode 547 - High-Level Reward: -1.5525041715546415
Episode 547 - High-Level Reward: -0.9347929817534923
Episode 548 - High-Level Reward: -1.48334145606001
Episode 548 - High-Level Reward: -1.5523886019669342
Episode 548 - High-Level Reward: 1.1192377932976234
Episode 549 - High-Level Reward: -1.4831456368461426
Episode 549 - High-Level Reward: -1.5521927827530668
Episode 549 - High-Level Reward: -1.7775139000312334
Episode 550 - High-Level Reward: 1.161919066255969
Episode 550 - High-Level Reward: 1.100560861236496
Episode 550 - High-Level Reward: -1.777731005690718
Episode 551 - High-Level Reward: -1.4827970133720219
Episode 551 - High-Level Reward: -0.531827746742325
Episode 551 - High-Level Reward: 0.7724797235954757
Episode 552 - High-Level Reward: 1.0680123540291018
Episode 552 - High-Level Reward: -0.5561695418778225
Episode 552 - High-Level Reward: -1.777535925254011
Episode 553 - High-Level Reward: -1.4832704184013183
Episo

Episode 600 - High-Level Reward: 1.5038685966915313
Episode 600 - High-Level Reward: 1.4348214507846069
Episode 600 - High-Level Reward: 2.008379164160586
Episode 601 - High-Level Reward: -1.483480653173047
Episode 601 - High-Level Reward: -1.552527799079971
Episode 601 - High-Level Reward: -1.77765259085765
Episode 602 - High-Level Reward: -1.4835152757659729
Episode 602 - High-Level Reward: -1.5525352904778287
Episode 602 - High-Level Reward: -1.7777360722075952
Episode 603 - High-Level Reward: -1.483511907102185
Episode 603 - High-Level Reward: -1.5525590530091091
Episode 603 - High-Level Reward: -1.7777770439902785
Episode 604 - High-Level Reward: -1.4834088698783692
Episode 604 - High-Level Reward: -1.552137427406387
Episode 604 - High-Level Reward: 1.1939487813347336
Episode 605 - High-Level Reward: 1.1376202155479611
Episode 605 - High-Level Reward: -1.5525371559296417
Episode 605 - High-Level Reward: 1.0364297208031172
Episode 606 - High-Level Reward: -1.483536383779126
Episode

Episode 652 - High-Level Reward: -1.4832316408897217
Episode 652 - High-Level Reward: 1.302118037635727
Episode 652 - High-Level Reward: -1.7776973622352128
Episode 653 - High-Level Reward: -0.5983181265464599
Episode 653 - High-Level Reward: -1.5524164515915677
Episode 653 - High-Level Reward: -1.777801365271712
Episode 654 - High-Level Reward: -1.4835025608535586
Episode 654 - High-Level Reward: -1.552570557083206
Episode 654 - High-Level Reward: -1.7778135031806477
Episode 655 - High-Level Reward: -1.483506850110608
Episode 655 - High-Level Reward: -1.5525629082451629
Episode 655 - High-Level Reward: -1.7777868716498855
Episode 656 - High-Level Reward: -1.4834757691627323
Episode 656 - High-Level Reward: -0.8944883651734163
Episode 656 - High-Level Reward: -1.1613909507438187
Episode 657 - High-Level Reward: 1.1668359054512205
Episode 657 - High-Level Reward: 1.0887933879317475
Episode 657 - High-Level Reward: -1.777768503516341
Episode 658 - High-Level Reward: -1.4783000228324708
E

Episode 706 - High-Level Reward: -1.777780689475203
Episode 707 - High-Level Reward: -1.4835114625907715
Episode 707 - High-Level Reward: -1.5525544400406646
Episode 707 - High-Level Reward: -0.9471666731330397
Episode 708 - High-Level Reward: -1.4835292822776607
Episode 708 - High-Level Reward: -1.5525622054444115
Episode 708 - High-Level Reward: -1.7777478503028392
Episode 709 - High-Level Reward: -1.4833254044242683
Episode 709 - High-Level Reward: -1.5525449642716223
Episode 709 - High-Level Reward: -1.7776241834479822
Episode 710 - High-Level Reward: 1.5095836442207515
Episode 710 - High-Level Reward: 1.7995354983138272
Episode 710 - High-Level Reward: -1.7764109034186846
Episode 711 - High-Level Reward: -1.4821803953910646
Episode 711 - High-Level Reward: -1.5512275412979888
Episode 711 - High-Level Reward: -1.7773252670051103
Episode 712 - High-Level Reward: -0.6133238196311767
Episode 712 - High-Level Reward: -0.6163826850052639
Episode 712 - High-Level Reward: -1.7776219933387

Episode 761 - High-Level Reward: -1.4835091837211427
Episode 761 - High-Level Reward: -1.5519897837296293
Episode 761 - High-Level Reward: 0.9508335887688157
Episode 762 - High-Level Reward: -1.4795684661918458
Episode 762 - High-Level Reward: -1.54401485428627
Episode 762 - High-Level Reward: -1.766869515051985
Episode 763 - High-Level Reward: -1.4835242611442385
Episode 763 - High-Level Reward: -1.5525714070511627
Episode 763 - High-Level Reward: -1.777672879355574
Episode 764 - High-Level Reward: -1.4834878820243653
Episode 764 - High-Level Reward: -0.7172120763321684
Episode 764 - High-Level Reward: -0.7720062237006666
Episode 765 - High-Level Reward: -1.4835323114066894
Episode 765 - High-Level Reward: -1.5525794573136136
Episode 765 - High-Level Reward: -1.777755230506087
Episode 766 - High-Level Reward: -1.4830948639274417
Episode 766 - High-Level Reward: 1.025579238944931
Episode 766 - High-Level Reward: -1.7776650444404134
Episode 767 - High-Level Reward: -0.4663222174125489
E

Episode 814 - High-Level Reward: -1.4826680418754394
Episode 814 - High-Level Reward: -1.551250871131973
Episode 814 - High-Level Reward: -1.7765277534553054
Episode 815 - High-Level Reward: -1.483232107732373
Episode 815 - High-Level Reward: -1.5525087856217192
Episode 815 - High-Level Reward: -1.7777877272407054
Episode 816 - High-Level Reward: 3.393069787729664
Episode 816 - High-Level Reward: -1.5519626596298974
Episode 816 - High-Level Reward: -1.7777858831092348
Episode 817 - High-Level Reward: -1.4833172687431158
Episode 817 - High-Level Reward: -0.6977077831192783
Episode 817 - High-Level Reward: -0.7699767302619465
Episode 818 - High-Level Reward: -1.4833424905906494
Episode 818 - High-Level Reward: -1.552456223564224
Episode 818 - High-Level Reward: -0.8343752012549872
Episode 819 - High-Level Reward: -1.4588664310355959
Episode 819 - High-Level Reward: 1.12082445040123
Episode 819 - High-Level Reward: -1.7487232504035477
Episode 820 - High-Level Reward: -1.4826738584647947
E

Episode 868 - High-Level Reward: -1.4828756350761232
Episode 868 - High-Level Reward: 1.706363705833359
Episode 868 - High-Level Reward: 1.7092932949455735
Episode 869 - High-Level Reward: -1.4835340286460688
Episode 869 - High-Level Reward: -1.552492212165908
Episode 869 - High-Level Reward: -1.777639360465191
Episode 870 - High-Level Reward: -1.4824453659721193
Episode 870 - High-Level Reward: -1.551179641761856
Episode 870 - High-Level Reward: -1.7763632861701488
Episode 871 - High-Level Reward: -1.4835251877914244
Episode 871 - High-Level Reward: 4.05146645589058
Episode 871 - High-Level Reward: 3.823598944096421
Episode 872 - High-Level Reward: -1.4834976274467282
Episode 872 - High-Level Reward: -1.5512910438614649
Episode 872 - High-Level Reward: -1.7763326827765944
Episode 873 - High-Level Reward: 1.3921137176689347
Episode 873 - High-Level Reward: -1.5525455168686657
Episode 873 - High-Level Reward: 1.134581895333148
Episode 874 - High-Level Reward: 1.222802346125049
Episode 8

Episode 921 - High-Level Reward: -1.4834770802436645
Episode 921 - High-Level Reward: -1.5525622912369534
Episode 921 - High-Level Reward: 1.155350667488909
Episode 922 - High-Level Reward: 1.1620797447456541
Episode 922 - High-Level Reward: -1.542838114051895
Episode 922 - High-Level Reward: 0.8844982710808275
Episode 923 - High-Level Reward: -0.4926803322349366
Episode 923 - High-Level Reward: -1.5524831851731107
Episode 923 - High-Level Reward: 0.8466588130577557
Episode 924 - High-Level Reward: -1.4834817982230954
Episode 924 - High-Level Reward: -1.5525734606400294
Episode 924 - High-Level Reward: -1.777806461249495
Episode 925 - High-Level Reward: -1.483444439183789
Episode 925 - High-Level Reward: -1.5524668888321684
Episode 925 - High-Level Reward: -1.7778036498939043
Episode 926 - High-Level Reward: -1.4834810785728272
Episode 926 - High-Level Reward: 1.0624287703322601
Episode 926 - High-Level Reward: -1.777656502463484
Episode 927 - High-Level Reward: -1.4835045406852536
Epi

Episode 974 - High-Level Reward: -1.4757427835746584
Episode 974 - High-Level Reward: -1.5465935505753325
Episode 974 - High-Level Reward: 1.0540276324089524
Episode 975 - High-Level Reward: -1.4766105775199707
Episode 975 - High-Level Reward: -1.513859539833145
Episode 975 - High-Level Reward: -0.9363203773566725
Episode 976 - High-Level Reward: -1.4830621054435547
Episode 976 - High-Level Reward: -0.5507159441605375
Episode 976 - High-Level Reward: 1.1992398782433022
Episode 977 - High-Level Reward: -0.9769652971816833
Episode 977 - High-Level Reward: -1.552500696483688
Episode 977 - High-Level Reward: -0.9618423136588576
Episode 978 - High-Level Reward: 1.1299543413032713
Episode 978 - High-Level Reward: 1.0013612493514252
Episode 978 - High-Level Reward: 0.9794318039787767
Episode 979 - High-Level Reward: -1.4827912329154787
Episode 979 - High-Level Reward: -1.5511785130997464
Episode 979 - High-Level Reward: -1.7764808000861647
Episode 980 - High-Level Reward: -0.6657309398246583


## Saving the Models

In [284]:
with open('high_level_agent.pkl', 'wb') as f:
    pickle.dump(high_agent, f)

# Save the mid-level agent(s) and low-level agent(s) if needed
with open('mid_level_agent.pkl', 'wb') as f:
    pickle.dump(mid_agent, f)

with open('low_level_agent.pkl', 'wb') as f:
    pickle.dump(low_agent, f)

## Loading the Models

In [285]:
with open('high_level_agent.pkl', 'rb') as f:
    high_agent = pickle.load(f)

# Load the mid-level agent(s) and low-level agent(s) if needed
with open('mid_level_agent.pkl', 'rb') as f:
    mid_agent = pickle.load(f)

with open('low_level_agent.pkl', 'rb') as f:
    low_agent = pickle.load(f)