<a href="https://colab.research.google.com/github/MarriRohan/Stock-Trading-Bot-with-Deep-Q-Learning/blob/main/bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
# Original cell PW7NFh8Ojz5g is now empty as its content has been moved to new cells.
# Please execute the newly created cells in order (env, train_dqn, live_trader, risk).
# Also ensure 'alerts' and 'broker' placeholder cells are executed if they exist.

In [35]:
# Create an instance of the SLTPEnv using the dummy data
# Assumes price_array, features, and regime_ids are already defined from new_cell_data
# Assumes SLTPEnv class is defined from new_cell_env

env = SLTPEnv(price_array, features, regime_ids, max_capital=50000)
print("SLTPEnv instance created successfully.")

SLTPEnv instance created successfully.


In [36]:
import time
import random

print("Starting dynamic live trading simulation...")

simulation_ticks = 20 # Number of simulated market ticks

for i in range(simulation_ticks):
    print(f"\n--- Simulating Tick {i+1}/{simulation_ticks} ---")
    # Create a new dummy tick to simulate market data arrival
    current_tick = DummyTick()
    current_tick.price = 100.0 + random.uniform(-0.5, 0.5) # Simulate price fluctuation
    current_tick.features = np.random.randn(num_features_from_x).astype(np.float32)
    current_tick.features[-1] = np.abs(current_tick.features[-1]) * 0.5 + 0.1 # Ensure ATR is positive
    current_tick.regime_id = np.random.randint(0, num_regimes)

    # Call the on_market_tick function from the live_trader
    on_market_tick(current_tick)

    # Simulate some time passing
    time.sleep(0.5) # Pause for 0.5 seconds to visualize the dynamic updates

print("Dynamic live trading simulation complete.")


Starting dynamic live trading simulation...

--- Simulating Tick 1/20 ---
Placing BUY order for 704 of DUMMY with SL=99.18739886714408, TP=100.25151504708717, Trailing=0.3547053933143616
ALERT: Placed order ORDER_DUMMY_704 qty=704 SL=99.19 TP=100.25 trail=0.35


  action = int(model.predict(live_state.observation.reshape(1, -1), deterministic=True)[0])



--- Simulating Tick 2/20 ---
Placing BUY order for 1694 of DUMMY with SL=99.92441639367414, TP=100.36707788769078, Trailing=0.14755383133888245
ALERT: Placed order ORDER_DUMMY_1694 qty=1694 SL=99.92 TP=100.37 trail=0.15

--- Simulating Tick 3/20 ---
Placing BUY order for 554 of DUMMY with SL=99.51887692408012, TP=100.87162355975555, Trailing=0.45091554522514343
ALERT: Placed order ORDER_DUMMY_554 qty=554 SL=99.52 TP=100.87 trail=0.45

--- Simulating Tick 4/20 ---
Placing BUY order for 869 of DUMMY with SL=100.18482306861694, TP=101.04739439749534, Trailing=0.2875237762928009
ALERT: Placed order ORDER_DUMMY_869 qty=869 SL=100.18 TP=101.05 trail=0.29

--- Simulating Tick 5/20 ---
Placing BUY order for 2332 of DUMMY with SL=99.53967146228175, TP=99.861244809135, Trailing=0.10719111561775208
ALERT: Placed order ORDER_DUMMY_2332 qty=2332 SL=99.54 TP=99.86 trail=0.11

--- Simulating Tick 6/20 ---
Placing BUY order for 624 of DUMMY with SL=99.45997408695548, TP=100.6617914802584, Trailing=0.

In [30]:
import gymnasium as gym
import numpy as np

class SLTPEnv(gym.Env):
    def __init__(self, price_array, features, regime_ids, max_capital, fee_bps=2):
        super().__init__()
        self.price = price_array
        self.x = features
        self.regime = regime_ids
        self.max_cap = max_capital
        # Discrete grid of (SL, TP, Trail) indices
        self.sl_grid = np.array([0.5, 0.75, 1.0])   # in ATR
        self.tp_grid = np.array([1.0, 1.5, 2.0])    # in ATR
        self.tr_grid = np.array([0.5, 0.75, 1.0])   # in ATR
        self.action_map = [(i,j,k) for i in range(3) for j in range(3) for k in range(3)]
        self.action_space = gym.spaces.Discrete(len(self.action_map))
        obs_dim = self.x.shape[1] + len(np.unique(self.regime))
        self.observation_space = gym.spaces.Box(low=-5, high=5, shape=(obs_dim,), dtype=np.float32)
        self.reset()

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.t = 100  # start after warmup window
        self.equity = self.max_cap
        self.daily_start_equity = self.equity
        self.pos = 0  # 0=flat, 1=long, -1=short
        self.entry_price = None
        return self._obs(), {}

    def _obs(self):
        feat = self.x[self.t]
        reg = int(self.regime[self.t])
        reg_onehot = np.eye(len(np.unique(self.regime)))[reg]
        return np.clip(np.concatenate([feat, reg_onehot]), -5, 5).astype(np.float32)

    def step(self, action):
        i,j,k = self.action_map[action]
        sl_atr, tp_atr, tr_atr = self.sl_grid[i], self.tp_grid[j], self.tr_grid[k]

        price_t = self.price[self.t]
        atr_t   = max(1e-6, self.x[self.t][-1])  # assume last feature is ATR

        # Simple position/exit sim (stub): one bar ahead outcome using SL/TP bounds
        qty = self._size(sl_atr * atr_t)

        pnl = self._simulate_bar(price_t, atr_t, qty, sl_atr, tp_atr, tr_atr)
        fee = abs(qty) * price_t * (2/10000)  # 2 bps per side example

        self.equity += (pnl - fee)
        reward = (pnl - fee) / self.max_cap

        done = False
        info = {"equity": self.equity}

        # daily loss breaker (example âˆ’3%)
        if (self.equity - self.daily_start_equity) / self.daily_start_equity <= -0.03:
            reward -= 0.01
            done = True
            info["breaker"] = True

        self.t += 1
        if self.t >= len(self.price)-1:
            done = True

        return self._obs(), reward, done, False, info

    def _size(self, sl_abs):
        risk_per_trade = 0.005 * self.equity  # 0.5% per trade
        return max(0, int(risk_per_trade / sl_abs))

    def _simulate_bar(self, p, atr, qty, sl_atr, tp_atr, tr_atr):
        # Placeholder: plug in high/low path and hit-tests vs SL/TP/trailing
        # Return signed PnL in currency units
        return np.random.randn() * 0.1 * atr * qty

In [32]:
from stable_baselines3 import DQN
# The SLTPEnv and model objects should be available from previous cell executions.
# If not, you might need to re-run the environment definition and model loading/training cells.

# Load the trained model if it's not already loaded (e.g., if kernel was reset)
try:
    model = DQN.load("dqn_sltp")
    print("DQN model loaded successfully for evaluation.")
except Exception as e:
    print(f"Could not load DQN model for evaluation: {e}. Please ensure it has been trained and saved.")
    # Fallback to a dummy model if loading fails
    class DummyModel:
        def predict(self, observation, deterministic=True):
            return np.array([0]), None
    model = DummyModel()


# Assuming `env` (SLTPEnv instance) is available from previous executions.
# If not, create a new instance:
# env = SLTPEnv(price_array, features, regime_ids, max_capital=50000)

print("Starting model evaluation...")

n_eval_episodes = 100
episode_rewards = []

for episode in range(n_eval_episodes):
    obs, info = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        # Fix: Convert action array to scalar integer
        action = int(action[0])
        obs, reward, done, truncated, info = env.step(action)
        total_reward += reward
    episode_rewards.append(total_reward)
    print(f"Episode {episode + 1}/{n_eval_episodes} - Total Reward: {total_reward:.4f}")

mean_reward = np.mean(episode_rewards)
std_reward = np.std(episode_rewards)

print(f"\nEvaluation complete over {n_eval_episodes} episodes.")
print(f"Mean total reward: {mean_reward:.4f}")
print(f"Standard deviation of total reward: {std_reward:.4f}")

Could not load DQN model for evaluation: [Errno 2] No such file or directory: 'dqn_sltp.zip'. Please ensure it has been trained and saved.
Starting model evaluation...
Episode 1/100 - Total Reward: -0.0400
Episode 2/100 - Total Reward: -0.0418
Episode 3/100 - Total Reward: -0.0404
Episode 4/100 - Total Reward: -0.0409
Episode 5/100 - Total Reward: -0.0401
Episode 6/100 - Total Reward: -0.0404
Episode 7/100 - Total Reward: -0.0401
Episode 8/100 - Total Reward: -0.0403
Episode 9/100 - Total Reward: -0.0402
Episode 10/100 - Total Reward: -0.0412
Episode 11/100 - Total Reward: -0.0405
Episode 12/100 - Total Reward: -0.0410
Episode 13/100 - Total Reward: -0.0409
Episode 14/100 - Total Reward: -0.0406
Episode 15/100 - Total Reward: -0.0413
Episode 16/100 - Total Reward: -0.0405
Episode 17/100 - Total Reward: -0.0404
Episode 18/100 - Total Reward: -0.0402
Episode 19/100 - Total Reward: -0.0400
Episode 20/100 - Total Reward: -0.0404
Episode 21/100 - Total Reward: -0.0410
Episode 22/100 - Total

In [22]:
# Dummy data for training and live trading components
import numpy as np

# Assuming a sequence length for simulation
sequence_length = 2000

# Price array (e.g., random walk prices)
price_array = 100 + np.cumsum(np.random.randn(sequence_length))

# Features (e.g., 5 features, last one being ATR)
# Make sure ATR is non-negative and somewhat realistic
features = np.random.randn(sequence_length, 5)
features[:, -1] = np.abs(features[:, -1]) * 0.5 + 0.1 # Ensure ATR is positive

# Regime IDs (e.g., 3 different regimes)
regime_ids = np.random.randint(0, 3, sequence_length)

print("Dummy data created for price_array, features, and regime_ids.")

Dummy data created for price_array, features, and regime_ids.


In [23]:
# alerts.py
def send_alert(message):
    print(f"ALERT: {message}")

print("Alerts module loaded.")

Alerts module loaded.


In [24]:
# broker.py
class Broker:
    def __init__(self, paper=True):
        print(f"Broker initialized (Paper Trading: {paper})")
        self._equity = 50000.0  # Placeholder initial equity

    def account_equity(self):
        # Placeholder: returns current equity
        return self._equity

    def place_bracket_order(self, symbol, side, qty, sl_price, tp_price, trailing):
        # Placeholder: simulates placing an order
        print(f"Placing {side} order for {qty} of {symbol} with SL={sl_price}, TP={tp_price}, Trailing={trailing}")
        # In a real scenario, this would interact with a broker API
        return f"ORDER_{symbol}_{qty}" # Returns a dummy order ID

    def flatten_all(self):
        # Placeholder: simulates flattening all positions
        print("All positions flattened.")

    def request_withdrawal(self, amount):
        # Placeholder: simulates a withdrawal request
        print(f"Requesting withdrawal of {amount}.")
        return True # Always succeeds in this placeholder

print("Broker module loaded.")

Broker module loaded.


In [37]:
from stable_baselines3 import DQN
import numpy as np

# The following modules are defined in separate cells and are globally available.
# No need for explicit 'from ... import ...' statements for them in a notebook environment.
# from alerts import send_alert
# from broker import Broker
# from risk import RiskManager

MAX_CAPITAL = 50000
DAILY_LOSS_LIMIT = 0.03  # 3%

broker = Broker(paper=True) # Broker class is available globally
risk = RiskManager(MAX_CAPITAL, DAILY_LOSS_LIMIT) # RiskManager class is available globally
# The model will be loaded after training, so this line might fail if not trained yet.
# For now, let's assume it's available or handle the error gracefully for placeholders.
try:
    model = DQN.load("dqn_sltp")
except Exception as e:
    print(f"Could not load DQN model: {e}. Please ensure it has been trained and saved.")
    # Create a dummy model for execution flow if load fails
    class DummyModel:
        def predict(self, observation, deterministic=True):
            # Returns a dummy action (e.g., action 0)
            # Ensure the observation is correctly shaped (1, N) for model.predict
            return np.array([0]), None
    model = DummyModel()

# Assuming SLTPEnv's x.shape[1] (features) is 5 and number of unique regimes is 3
num_features_from_x = 5
num_regimes = 3

class LiveTraderState:
    def __init__(self, observation_for_model, atr_value):
        self.observation = observation_for_model
        self.atr = atr_value

class DummyTick:
    def __init__(self):
        self.price = 100.0
        self.symbol = "DUMMY"
        # Generate dummy features for the tick, matching expected input for build_state
        self.features = np.random.randn(num_features_from_x).astype(np.float32)
        # Ensure ATR (last feature) is positive
        self.features[-1] = np.abs(self.features[-1]) * 0.5 + 0.1 # Ensure ATR is positive
        self.regime_id = np.random.randint(0, num_regimes)

def build_state(tick):
    # Use DummyTick if the provided tick is not sufficient
    if not isinstance(tick, DummyTick) and (not hasattr(tick, 'features') or not hasattr(tick, 'regime_id')):
        tick = DummyTick()

    feat = tick.features
    reg = int(tick.regime_id)
    reg_onehot = np.eye(num_regimes)[reg]

    # The observation for the model needs to be a flat array
    obs_for_model = np.concatenate([feat, reg_onehot]).astype(np.float32)

    return LiveTraderState(obs_for_model, feat[-1]) # feat[-1] is assumed to be ATR

def decode_action(action):
    sl_grid = np.array([0.5, 0.75, 1.0])   # in ATR
    tp_grid = np.array([1.0, 1.5, 2.0])    # in ATR
    tr_grid = np.array([0.5, 0.75, 1.0])   # in ATR

    # Decode action from a single integer back to (i, j, k) indices
    k = action % 3
    j = (action // 3) % 3
    i = (action // 9) % 3

    return sl_grid[i], tp_grid[j], tr_grid[k]

def try_request_withdrawal():
    ok = broker.request_withdrawal(amount=risk.sweep_amount())
    send_alert("Withdrawal request " + ("submitted." if ok else "FAILED. Manual action needed.")) # send_alert is available globally

def on_market_tick(tick):
    live_state = build_state(tick)

    # model.predict expects a 2D array: (num_samples, num_features)
    # Reshape live_state.observation from (N,) to (1, N)
    # Fix: Convert action array to scalar integer to avoid DeprecationWarning
    action = int(model.predict(live_state.observation.reshape(1, -1), deterministic=True)[0])
    sl, tp, tr = decode_action(action)

    if not risk.can_trade(broker.account_equity()):
        send_alert("Breaker active: trading halted.")
        return

    # Ensure tick object has a 'price' attribute
    current_price = tick.price if hasattr(tick, 'price') else 100.0 # Use dummy price if not available

    qty, sl_price, tp_price, trail = risk.compute_order_params(current_price, sl, tp, tr, live_state.atr)

    if qty > 0:
        order_id = broker.place_bracket_order(
            symbol=tick.symbol if hasattr(tick, 'symbol') else "DUMMY_SYMBOL",
            side="BUY",
            qty=qty,
            sl_price=sl_price,
            tp_price=tp_price,
            trailing=trail
        )
        send_alert(f"Placed order {order_id} qty={qty} SL={sl_price:.2f} TP={tp_price:.2f} trail={trail:.2f}")

    if risk.hit_hard_stop(broker.account_equity()):
        broker.flatten_all()
        send_alert("HARD STOP hit. Flattened. Initiating withdrawal request.")
        try_request_withdrawal()


Broker initialized (Paper Trading: True)
Could not load DQN model: [Errno 2] No such file or directory: 'dqn_sltp.zip'. Please ensure it has been trained and saved.


In [16]:
class RiskManager:
    def __init__(self, max_capital, daily_loss_limit):
        self.max_capital = max_capital
        self.daily_start = None
        self.daily_loss_limit = daily_loss_limit

    def start_day(self, equity):
        self.daily_start = equity

    def can_trade(self, equity):
        if self.daily_start is None: self.start_day(equity)
        dd = (equity - self.daily_start) / self.daily_start
        return dd > -self.daily_loss_limit and equity <= self.max_capital

    def compute_order_params(self, price, sl_atr, tp_atr, tr_atr, atr):
        sl_abs = sl_atr * atr
        tp_abs = tp_atr * atr
        trail_abs = tr_atr * atr
        risk_per_trade = 0.005 * self.max_capital
        qty = max(0, int(risk_per_trade / sl_abs))
        return qty, price - sl_abs, price + tp_abs, trail_abs

    def hit_hard_stop(self, equity):
        dd = (equity - self.daily_start) / self.daily_start
        return dd <= -self.daily_loss_limit

    def sweep_amount(self):
        # example: sweep anything above a safety buffer
        return max(0, self.max_capital * 0.2)
