<a href="https://colab.research.google.com/github/Sreerag-Pillai/Data_Science/blob/main/Trading%20bot%20using%20Reinforcement%20Learning%20(Q%20Learning)%20SMA%20Cross%20Over%20strategy_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chat GPT RL Model In Backtrader

In [None]:
pip install backtrader gym matplotlib pandas mplfinance plotly seaborn numpy scipy mplfinance yfinance


In [None]:
pip install backtrader --upgrade


# Processing Data

In [None]:
import pandas as pd

def load_and_preprocess_data(filename):
    data = pd.read_csv(filename)

    # Combine 'Date' and 'Time' columns into a single datetime column
    data['datetime'] = pd.to_datetime(data['Date'].astype(str) + ' ' + data['Time'])
    data.set_index('datetime', inplace=True)

    # Rename columns to match convention
    data = data.rename(columns={'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'})

    # Sort by DateTime in ascending order
    data.sort_index(inplace=True)

    # Drop the 'Date' and 'Time' columns
    data.drop(['Date', 'Time'], axis=1, inplace=True)

    return data

def compute_sma(data, window_size, column_name):
    return data[column_name].rolling(window=window_size).mean()

data = load_and_preprocess_data('/content/Dataset 2021 - 5 Minute Data.csv')

# Compute fast and slow SMAs
fast_window = 1
slow_window = 3
data['fast_sma'] = compute_sma(data, fast_window, 'close')
data['slow_sma'] = compute_sma(data, slow_window, 'close')

data = data.dropna()
data.head()


# Integrating Q learn agent to SMA crossover strategy

# Training the agent

Step 1: Define the Q-learning Agent

In [None]:
import numpy as np

class QLearningAgent:
    def __init__(self, alpha, gamma, epsilon, actions):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.actions = actions
        self.Q = {}  # Q-table

    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return np.random.choice(self.actions)
        else:
            q_values = [self.get_Q(state, action) for action in self.actions]
            return self.actions[np.argmax(q_values)]

    def learn(self, state, action, reward, next_state):
        predict = self.get_Q(state, action)
        target = reward + self.gamma * max([self.get_Q(next_state, a) for a in self.actions])
        self.update_Q(state, action, predict + self.alpha * (target - predict))

    def get_Q(self, state, action):
        return self.Q.get((state, action), 0.0)

    def update_Q(self, state, action, value):
        self.Q[(state, action)] = value


Step 2: Define the Training Loop

In [None]:
def train_agent_on_data(data, agent, epochs=1):
    for _ in range(epochs):
        for i in range(1, len(data) - 1):
            # Define current state
            price_trend = 'rising' if data['close'][i] > data['close'][i-1] else 'falling' if data['close'][i] < data['close'][i-1] else 'stable'
            sma_trend = 'bullish' if data['fast_sma'][i] > data['slow_sma'][i] else 'bearish' if data['fast_sma'][i] < data['slow_sma'][i] else 'neutral'
            state = (price_trend, sma_trend)

            # Agent decides on an action
            action = agent.choose_action(state)

            # Calculate reward
            reward = 0
            if action == 1:  # Buy
                reward = data['close'][i+1] - data['close'][i]
            elif action == 2:  # Sell
                reward = data['close'][i] - data['close'][i+1]

            # Determine next_state
            next_price_trend = 'rising' if data['close'][i+1] > data['close'][i] else 'falling' if data['close'][i+1] < data['close'][i] else 'stable'
            next_sma_trend = 'bullish' if data['fast_sma'][i+1] > data['slow_sma'][i+1] else 'bearish' if data['fast_sma'][i+1] < data['slow_sma'][i+1] else 'neutral'
            next_state = (next_price_trend, next_sma_trend)

            # Update the Q-values
            agent.learn(state, action, reward, next_state)


Step 4: Splitting Data & Training

In [None]:
# Train the agent
train_size = int(0.8 * len(data))
train_data = data[:train_size]
test_data = data[train_size:]

agent = QLearningAgent(alpha=0.1, gamma=0.9, epsilon=0.1, actions=[0, 1, 2])  # 0:Hold, 1:Buy, 2:Sell
train_agent_on_data(train_data, agent, epochs=10)
trained_Q = agent.Q

## 1. Backtesting the Q-learning-based Trading Strategy



In [None]:
import backtrader as bt
import datetime as dt

class QLearningStrategy(bt.Strategy):
    params = (
        ('stop_loss', 0.20),  # 20% Stop Loss
    )

    def __init__(self):
        self.fast_sma = bt.indicators.SimpleMovingAverage(self.data.close, period=1)
        self.slow_sma = bt.indicators.SimpleMovingAverage(self.data.close, period=3)

        self.agent = QLearningAgent(alpha=0.1, gamma=0.9, epsilon=0.1, actions=[0, 1, 2])
        self.agent.Q = trained_Q

        self.order = None
        self.start_cash = self.broker.getvalue()

    def next(self):
        # Ensure there's enough data for the slow SMA
        if len(self.data) < slow_window:  # Changed from 20 to slow_window
            return

        # Only trade during regular market hours: 9 am to 2:45 pm
        if self.data.datetime.time() < dt.time(9) or self.data.datetime.time() > dt.time(14, 45):
            return

        if self.order:  # Check if an order is pending
            return

        price_trend = 'rising' if self.data.close[0] > self.data.close[-1] else 'falling'
        sma_trend = 'bullish' if self.fast_sma[0] > self.slow_sma[0] else 'bearish'
        state = (price_trend, sma_trend)

        action = self.agent.choose_action(state)

        if action == 1 and not self.position:
            self.order = self.buy(size=3)

        elif action == 2 and self.position:
            self.order = self.sell(size=3)

        # Check for stop loss condition
        if self.broker.getvalue() < (1 - self.params.stop_loss) * self.start_cash:
            self.close()
            self.order = None

In [None]:
class BuyAndHoldStrategy(bt.Strategy):

    def __init__(self):
        pass

    def next(self):
        # Buy at the start and hold
        if not self.position:
            self.buy(size=3)


In [None]:
# Create cerebro instance
cerebro = bt.Cerebro()

# Load test data into a data feed and add to cerebro
data_feed = bt.feeds.PandasData(dataname=test_data)
cerebro.adddata(data_feed)

# Add the Q-learning strategy
cerebro.addstrategy(QLearningStrategy)

# Add constraints and set initial cash
cerebro.broker.set_cash(100000.0)
cerebro.broker.setcommission(commission=0.001)

print('Starting Portfolio Value (QLearning): %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('Ending Portfolio Value (QLearning): %.2f' % cerebro.broker.getvalue())

# Plot the performance
cerebro.plot()

# Now for the Buy and Hold Strategy
cerebro = bt.Cerebro()
cerebro.adddata(data_feed)
cerebro.addstrategy(BuyAndHoldStrategy)
cerebro.broker.set_cash(100000.0)
cerebro.broker.setcommission(commission=0.001)

print('\nStarting Portfolio Value (Buy and Hold): %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('Ending Portfolio Value (Buy and Hold): %.2f' % cerebro.broker.getvalue())

cerebro.plot()
