<a href="https://colab.research.google.com/github/Dmoore628/AUTOMATICAI/blob/main/rl_spotCFD_TraderV4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Cell 1
##Install Dependencies

In [None]:
# Install required packages
!pip install --upgrade pip
!pip install torch==2.0.1 torchvision
!pip install numpy==1.23.5 pandas==1.5.3 matplotlib==3.7.1
!pip install SQLAlchemy==2.0.19
!pip install tqdm==4.66.1
!pip install stable-baselines3[extra]==1.6.2
!pip install tensorboard==2.9.1
!pip install gym==0.21.0
!pip install cloudpickle==1.4.1


Collecting stable-baselines3==1.6.2 (from stable-baselines3[extra]==1.6.2)
  Using cached stable_baselines3-1.6.2-py3-none-any.whl.metadata (4.1 kB)
Collecting gym==0.21 (from stable-baselines3==1.6.2->stable-baselines3[extra]==1.6.2)
  Using cached gym-0.21.0.tar.gz (1.5 MB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
Collecting gym==0.21.0
  Using cached gym-0.21.0.tar.gz (1.5 MB)
  [1;31merror[0m: [1ms

#Cell 2
##Import Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
from datetime import datetime, timedelta
import torch
import random
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
from tensorboard import notebook
from IPython.display import clear_output


ModuleNotFoundError: No module named 'stable_baselines3'

#Cell 3


In [None]:
# Connect to the SQLite database
conn = sqlite3.connect('/content/Historical_tick_price_data.db')
# Load data into a DataFrame
query = "SELECT PRICE, DATETIME FROM trades"
data = pd.read_sql_query(query, conn)
conn.close()

# Parse DATETIME column
data['DATETIME'] = pd.to_datetime(data['DATETIME'], format='%Y-%m-%d %H:%M:%S')

# Remove weekend data (Forex market is closed on weekends)
data = data[(data['DATETIME'].dt.weekday >= 0) & (data['DATETIME'].dt.weekday <= 4)]

# Reset index after filtering
data = data.reset_index(drop=True)

# Calculate indicators
data['SMA_5'] = data['PRICE'].rolling(window=5).mean()
data['SMA_13'] = data['PRICE'].rolling(window=13).mean()
data['SMA_21'] = data['PRICE'].rolling(window=21).mean()

# MACD
ema_12 = data['PRICE'].ewm(span=12, adjust=False).mean()
ema_26 = data['PRICE'].ewm(span=26, adjust=False).mean()
data['MACD'] = ema_12 - ema_26
data['MACD_Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

# Bollinger Bands
data['BB_Middle'] = data['PRICE'].rolling(window=20).mean()
data['BB_Std'] = data['PRICE'].rolling(window=20).std()
data['BB_Upper'] = data['BB_Middle'] + (data['BB_Std'] * 2)
data['BB_Lower'] = data['BB_Middle'] - (data['BB_Std'] * 2)

# Fill NaN values resulting from rolling calculations
data.fillna(method='bfill', inplace=True)

# Normalize input features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = ['PRICE', 'SMA_5', 'SMA_13', 'SMA_21', 'MACD', 'MACD_Signal', 'BB_Middle', 'BB_Upper', 'BB_Lower']
data[features] = scaler.fit_transform(data[features])

print(f"Total data points after preprocessing: {len(data)}")


#Cell 4

In [None]:
class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, initial_balance=500, profit_target=25000, leverage=100, spread=0.0002, commission=0.0001):
        super(TradingEnvironment, self).__init__()

        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        self.profit_target = profit_target
        self.leverage = leverage
        self.spread = spread
        self.commission = commission
        self.max_steps = len(self.data) - 1

        # Action space: Continuous action for position size adjustment between -1 and 1
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        # Observation space
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(12,), dtype=np.float32)

        # Initialize variables
        self.reset()

    def reset(self, start_idx=None):
        if start_idx is None:
            self.current_step = random.randint(0, self.max_steps - 1440 * 7)  # Ensure at least 7 days of data
        else:
            self.current_step = start_idx
        self.balance = self.initial_balance
        self.equity = self.initial_balance
        self.net_worth = self.initial_balance
        self.position_size = 0
        self.position_price = 0
        self.position_type = 0  # 1 for long, -1 for short, 0 for flat
        self.trade_history = []
        self.done = False
        self.start_time = self.data.loc[self.current_step, 'DATETIME']
        self.end_time = self.start_time + timedelta(days=7)
        self.highest_equity = self.initial_balance
        self.total_reward = 0
        self.episode_start_step = self.current_step
        return self._next_observation()

    def _next_observation(self):
        current_row = self.data.loc[self.current_step]
        obs = np.array([
            current_row['PRICE'],
            self.balance / self.initial_balance,
            (self.equity - self.initial_balance) / self.initial_balance,
            self.position_type,
            current_row['SMA_5'],
            current_row['SMA_13'],
            current_row['SMA_21'],
            current_row['MACD'],
            current_row['MACD_Signal'],
            current_row['BB_Middle'],
            current_row['BB_Upper'],
            current_row['BB_Lower']
        ], dtype=np.float32)
        return obs

    def step(self, action):
        action = action[0]  # Extract the action value
        current_price = self.data.loc[self.current_step, 'PRICE']
        time = self.data.loc[self.current_step, 'DATETIME']

        # Apply spread
        bid_price = current_price - self.spread
        ask_price = current_price + self.spread

        # Update position
        previous_position_size = self.position_size
        previous_position_type = self.position_type
        self._execute_trade(action, current_price, bid_price, ask_price)

        # Update equity
        self.equity = self.balance + self._get_unrealized_pnl(current_price)

        # Update highest equity for drawdown calculation
        if self.equity > self.highest_equity:
            self.highest_equity = self.equity

        # Calculate reward
        reward = self._calculate_reward(previous_position_size, previous_position_type, current_price)
        self.total_reward += reward

        # Check for termination conditions
        self.done = False
        if self.equity <= 0:
            self.done = True
            reward -= 50  # Severe penalty for bankruptcy
        elif self.equity >= self.profit_target:
            self.done = True
            reward += 100  # Large reward for hitting profit target
        elif time >= self.end_time:
            self.done = True
            if self.equity >= self.initial_balance:
                reward += 10  # Reward for surviving the week without bankruptcy

        # Move to the next step
        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True

        obs = self._next_observation()
        info = {}
        return obs, reward, self.done, info

    def _execute_trade(self, action, current_price, bid_price, ask_price):
        # Action represents the desired change in position size between -1 and 1
        max_position_size = self.equity * self.leverage  # Use equity for compounding
        desired_position_size = action * max_position_size
        position_change = desired_position_size - self.position_size

        if position_change > 0:
            # Increase long position or decrease short position
            trade_price = ask_price
            trade_type = 1  # Long
        elif position_change < 0:
            # Increase short position or decrease long position
            trade_price = bid_price
            trade_type = -1  # Short
        else:
            return  # Hold

        # Calculate required margin and fees
        required_margin = abs(position_change) / self.leverage
        fee = self.commission * abs(position_change)
        total_cost = required_margin + fee

        if self.balance >= total_cost:
            # Update balance and position
            self.balance -= total_cost
            self.position_size += position_change
            self.position_price = trade_price
            self.position_type = np.sign(self.position_size)
            self.trade_history.append({
                'step': self.current_step,
                'type': 'buy' if trade_type == 1 else 'sell',
                'price': trade_price,
                'size': position_change
            })
        else:
            pass  # Not enough balance to execute trade

    def _get_unrealized_pnl(self, current_price):
        if self.position_size == 0:
            return 0
        else:
            price_diff = (current_price - self.position_price) * self.position_type
            return price_diff * abs(self.position_size)

    def _calculate_reward(self, previous_position_size, previous_position_type, current_price):
        # Immediate profit incentive
        profit = self.equity - self.initial_balance
        profit_reward = profit / self.initial_balance

        # Compounding incentive
        compounding_reward = (self.equity / self.initial_balance) ** 2

        # Penalty for unrealized losses
        unrealized_pnl = self._get_unrealized_pnl(current_price)
        if unrealized_pnl < 0:
            unrealized_loss_penalty = 2 * (abs(unrealized_pnl) / self.initial_balance)
        else:
            unrealized_loss_penalty = 0

        # Penalty for turning significant profit into loss
        if previous_position_size != 0 and unrealized_pnl < 0 and profit > 0:
            profit_to_loss_penalty = abs(unrealized_pnl) / self.initial_balance
        else:
            profit_to_loss_penalty = 0

        # Tiered drawdown penalty
        drawdown = (self.highest_equity - self.equity) / self.highest_equity
        if drawdown > 0.2:
            drawdown_penalty = 5 * drawdown
        elif drawdown > 0.1:
            drawdown_penalty = 2 * drawdown
        else:
            drawdown_penalty = drawdown

        # Total reward
        reward = profit_reward + compounding_reward - unrealized_loss_penalty - profit_to_loss_penalty - drawdown_penalty
        return reward

    def render(self, mode='human'):
        # Visualization code will be added in the training loop
        pass


#Cell 5

In [None]:
# Create the environment
env = TradingEnvironment(data)

# Wrap the environment
env = Monitor(env)
env = DummyVecEnv([lambda: env])


#Cell 6



In [None]:
from stable_baselines3.common.callbacks import BaseCallback

class VisualizationCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(VisualizationCallback, self).__init__(verbose)
        self.episode = 0

    def _on_step(self) -> bool:
        return True

    def _on_rollout_end(self):
        # Access the environment
        env = self.training_env.envs[0].envs[0]

        # Plot the episode
        self.plot_episode(env)
        self.episode += 1

    def plot_episode(self, env):
        plt.figure(figsize=(12, 6))
        start = env.episode_start_step
        end = env.current_step
        price = env.data['PRICE'].iloc[start:end].reset_index(drop=True)
        plt.plot(price, label='Price')

        # Plot trades
        for trade in env.trade_history:
            step = trade['step'] - start
            if trade['type'] == 'buy':
                plt.plot(step, price.iloc[step], 'g^', label='Buy' if 'Buy' not in plt.gca().get_legend_handles_labels()[1] else "")
            else:
                plt.plot(step, price.iloc[step], 'rv', label='Sell' if 'Sell' not in plt.gca().get_legend_handles_labels()[1] else "")

        plt.title(f'Episode {self.episode}')
        plt.xlabel('Time Steps')
        plt.ylabel('Normalized Price')
        plt.legend()
        plt.show()
        plt.close()

# Define the checkpoint callback
checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='/content/checkpoints/', name_prefix='ppo_trading_model')


#Cell 7


In [None]:
# Initialize the agent
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log="/content/logs")

# Set up the visualization callback
visualization_callback = VisualizationCallback()

# Start training with checkpointing and visualization
model.learn(total_timesteps=100000, callback=[checkpoint_callback, visualization_callback])


#Cell 8

In [None]:
# Start TensorBoard
%load_ext tensorboard
%tensorboard --logdir /content/logs


#Cell 9

In [None]:
model.save('ppo_trading_agent')
print("Model saved successfully.")


#Cell 10

In [None]:
# Load the latest checkpoint
latest_checkpoint = '/content/checkpoints/ppo_trading_model_100000_steps.zip'  # Replace with your latest checkpoint file
model = PPO.load(latest_checkpoint, env=env)

# Continue training
model.learn(total_timesteps=100000, callback=[checkpoint_callback, visualization_callback])


#Cell 11

#Cell 12

#Cell 13

# Bankruptcy Rate and Success Rate Calculation

**Bankruptcy Rate:**

- **Definition:** The proportion of episodes where the agent's equity or balance falls to zero or below, resulting in bankruptcy.
- **Calculation in Parallel Processing:**
  - Each environment in the vectorized environments runs an episode independently.
  - When an episode ends due to bankruptcy in any environment, it increments the bankruptcy count.
  - **Bankruptcy Rate = Total Bankruptcy Episodes / Total Episodes Completed**

**Success Rate:**

- **Definition:** The proportion of episodes where the agent's equity reaches or exceeds the profit target (\$25,000) within the 7-day period.
- **Calculation in Parallel Processing:**
  - Similar to bankruptcy rate, success is tracked individually in each environment.
  - When an episode ends with the agent reaching the profit target, it increments the success count.
  - **Success Rate = Total Successful Episodes / Total Episodes Completed**

**Notes:**

- **Total Episodes Completed** includes all episodes across all environments.
- Since episodes are running in parallel, counts are aggregated across all environments.
- **Careful synchronization** is required to ensure counts are accurate.

**Ensuring Correctness:**

- Tianshou's built-in metrics tracking ensures that counts are correctly aggregated.
- Custom callbacks can be used to track and log these metrics accurately.


In [None]:
import pandas as pd
import time
import sqlite3

# Connect to the database
conn = sqlite3.connect('/content/BTCUSD_data_SMALL.db')

# Read the data from the 'trades' table, sorted by DATETIME
df = pd.read_sql_query("SELECT * FROM btc_usd_data ORDER BY '<DATE>' ASC", conn)

# Iterate through the rows, simulating a live tick stream
for index, row in df.iterrows():
    print(f"DATE: {row['<DATE>']}, TIME: {row['<TIME>']}, BID: {row['<BID>']}, ASK: {row['<ASK>']}, LAST: {row['<LAST>']}")
    time.sleep(1)  # Adjust the sleep time to control the printing speed

# Close the database connection
conn.close()

DATE: 2024.11.14, TIME: 22:40:00.121, BID: nan, ASK: nan, LAST: 87968.63
DATE: 2024.11.14, TIME: 22:40:00.121, BID: 87944.63, ASK: nan, LAST: nan
DATE: 2024.11.14, TIME: 22:40:00.221, BID: nan, ASK: nan, LAST: 87965.52
DATE: 2024.11.14, TIME: 22:40:00.222, BID: 87943.39, ASK: 87987.64, LAST: nan
DATE: 2024.11.14, TIME: 22:40:00.321, BID: nan, ASK: nan, LAST: 87964.71
DATE: 2024.11.14, TIME: 22:40:00.322, BID: 87941.77, ASK: nan, LAST: nan
DATE: 2024.11.14, TIME: 22:40:00.423, BID: nan, ASK: nan, LAST: 87964.0
DATE: 2024.11.14, TIME: 22:40:00.423, BID: nan, ASK: 87986.22, LAST: nan
DATE: 2024.11.14, TIME: 22:40:00.724, BID: nan, ASK: nan, LAST: 87966.94
DATE: 2024.11.14, TIME: 22:40:00.724, BID: nan, ASK: 87992.11, LAST: nan
DATE: 2024.11.14, TIME: 22:40:00.825, BID: nan, ASK: nan, LAST: 87945.11
DATE: 2024.11.14, TIME: 22:40:00.825, BID: 87898.11, ASK: nan, LAST: nan
DATE: 2024.11.14, TIME: 22:40:00.925, BID: nan, ASK: nan, LAST: 87931.27
DATE: 2024.11.14, TIME: 22:40:00.925, BID: nan,