Use of reinforcement Learning along with Harmonic retracement models combined with technical indicators.

Reward is based on whether the prices move along with the predicition.


In [8]:
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
from untrade.client import Client
import plotly.graph_objects as go


data = pd.read_csv(
    '/Users/tejasmacipad/Desktop/Final_inter_IIT_submission/BTC/BTC_2019_2023_30m.csv',
    # parse_dates=['datetime'],  # Parse the 'datetime' column as datetime
    # index_col='datetime'       # Use 'datetime' as the index
)

print(data.head())

   Unnamed: 0             datetime      open      high       low     close  \
0           0  2019-09-08 17:30:00  10000.00  10000.00  10000.00  10000.00   
1           1  2019-09-08 18:00:00  10000.00  10000.00  10000.00  10000.00   
2           2  2019-09-08 18:30:00  10000.00  10000.00  10000.00  10000.00   
3           3  2019-09-08 19:00:00  10344.77  10357.53  10342.90  10354.62   
4           4  2019-09-08 19:30:00  10354.62  10357.35  10337.43  10340.12   

    volume  
0    0.002  
1    0.000  
2    0.000  
3  136.177  
4  335.482  


In [None]:

def extract_extremes(price_values, i, order_dec):
    max_idx = list(argrelextrema(price_values[:i], np.greater, order=order_dec)[0])
    min_idx = list(argrelextrema(price_values[:i], np.less, order=order_dec)[0])
    idx = max_idx + min_idx + [len(price_values[:i]) - 1]
    idx.sort()
    current_idx = idx[-5:]
    current_pat = price_values[current_idx]
    return current_idx, current_pat

def identify_pattern(current_idx, current_pat, err_allowed, ab, bc, cd):
    XA, AB, BC, CD = np.diff(current_pat)
    
    AB_range = np.array([ab[0] - err_allowed, ab[1] + err_allowed]) * abs(XA)
    BC_range = np.array([bc[0] - err_allowed, bc[1] + err_allowed]) * abs(AB)
    CD_range = np.array([cd[0] - err_allowed, cd[1] + err_allowed]) * abs(BC)

    abs_AB, abs_BC, abs_CD = map(abs, (AB, BC, CD))

    if XA > 0 and AB < 0 and BC > 0 and CD < 0:
        if AB_range[0] < abs_AB < AB_range[1] and BC_range[0] < abs_BC < BC_range[1] and CD_range[0] < abs_CD < CD_range[1]:
            return 1, current_idx
    elif XA < 0 and AB > 0 and BC < 0 and CD > 0:
        if AB_range[0] < abs_AB < AB_range[1] and BC_range[0] < abs_BC < BC_range[1] and CD_range[0] < abs_CD < CD_range[1]:
            return -1, current_idx
    return np.nan, []

# Define all patterns
harmonic_patterns = {
    "Butterfly": ([0.786, 0.786], [0.382, 0.886], [1.618, 2.618]),
    "Gartley": ([0.618, 0.618], [0.382, 0.886], [1.272, 1.618]),
    "Cypher": ([0.382, 0.618], [1.272, 1.414], [0.786, 1.786]),
    "Bat": ([0.382, 0.5], [0.382, 0.886], [1.618, 2.618]),
    "AltBat": ([0.382, 0.382], [0.382, 0.886], [2.0, 3.618]),
    "Crab": ([0.382, 0.618], [0.382, 0.886], [2.24, 3.618]),
    "DeepCrab": ([0.886, 0.886], [0.382, 0.886], [2.618, 3.618]),
    "Shark": ([0.382, 0.886], [1.13, 1.618], [1.618, 2.24]),
    "WhiteSwan": ([1.382, 2.618], [0.236, 0.5], [1.128, 2.0]),
    "BlackSwan": ([0.382, 0.724], [2.0, 4.237], [0.5, 0.886])
}

def detect_harmonic_pattern(price, order_dec=5, err_allowed=0.1):
    signals = {pattern: [] for pattern in harmonic_patterns}
    patterns = {pattern: [] for pattern in harmonic_patterns}
    
    for i in range(100, len(price)):
        current_idx, current_pat = extract_extremes(price.values, i, order_dec)

        for pattern_name, ratios in harmonic_patterns.items():
            pattern_signal, pattern_idx = identify_pattern(current_idx, current_pat, err_allowed, *ratios)
            signals[pattern_name].append(pattern_signal)
            if not np.isnan(pattern_signal):
                patterns[pattern_name].append((pattern_signal, pattern_idx))
    
    return signals, patterns

def plot_harmonic_patterns(df, patterns):
    fig = go.Figure()
    fig.add_trace(go.Candlestick(x=df.index, 
                                 open=df['open'], 
                                 high=df['high'], 
                                 low=df['low'], 
                                 close=df['close'], 
                                 name='Candlesticks'))
    
    colors = {
        "Butterfly": "blue", "Bat": "green", "AltBat": "purple",
        "Crab": "red", "DeepCrab": "pink", "Cypher": "orange",
        "Gartley": "cyan", "Shark": "yellow", "WhiteSwan": "black", "BlackSwan": "brown"
    }
    
    for pattern_name, pattern_list in patterns.items():
        for signal, indices in pattern_list:
            x_vals = df.index[indices]
            y_vals = df.close.iloc[indices]
            fig.add_trace(go.Scatter(
                x=x_vals, y=y_vals, mode='lines+markers',
                line=dict(color=colors[pattern_name], width=2),
                marker=dict(size=8, color=colors[pattern_name]),
                name=f"{pattern_name} {'Bullish' if signal == 1 else 'Bearish'}"
            ))

    fig.update_layout(title='Harmonic Patterns',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      xaxis_rangeslider_visible=False)
    fig.show()

# Example Usage
# Assuming df is a Pandas DataFrame with 'close' prices and OHLC data
signals, patterns = detect_harmonic_pattern(data['close'])
# plot_harmonic_patterns(data, patterns)

In [4]:
pat, sig = signals, patterns
signals, patterns = pat, sig

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import gym
import numpy as np
import pandas as pd
from gym import spaces
import random
from torch.cuda.amp import autocast, GradScaler
from scipy.signal import argrelextrema
from untrade.client import Client

# Harmonic Pattern Detection Functions
def extract_extremes(price_values, i, order_dec):
    max_idx = list(argrelextrema(price_values[:i], np.greater, order=order_dec)[0])
    min_idx = list(argrelextrema(price_values[:i], np.less, order=order_dec)[0])
    idx = max_idx + min_idx + [len(price_values[:i]) - 1]
    idx.sort()
    current_idx = idx[-5:]
    current_pat = price_values[current_idx]
    return current_idx, current_pat

def identify_pattern(current_idx, current_pat, err_allowed, ab, bc, cd):
    XA, AB, BC, CD = np.diff(current_pat)
    
    AB_range = np.array([ab[0] - err_allowed, ab[1] + err_allowed]) * abs(XA)
    BC_range = np.array([bc[0] - err_allowed, bc[1] + err_allowed]) * abs(AB)
    CD_range = np.array([cd[0] - err_allowed, cd[1] + err_allowed]) * abs(BC)

    abs_AB, abs_BC, abs_CD = map(abs, (AB, BC, CD))

    if XA > 0 and AB < 0 and BC > 0 and CD < 0:
        if AB_range[0] < abs_AB < AB_range[1] and BC_range[0] < abs_BC < BC_range[1] and CD_range[0] < abs_CD < CD_range[1]:
            return 1, current_idx
    elif XA < 0 and AB > 0 and BC < 0 and CD > 0:
        if AB_range[0] < abs_AB < AB_range[1] and BC_range[0] < abs_BC < BC_range[1] and CD_range[0] < abs_CD < CD_range[1]:
            return -1, current_idx
    return np.nan, []

harmonic_patterns = {
    "Butterfly": ([0.786, 0.786], [0.382, 0.886], [1.618, 2.618]),
    "Gartley": ([0.618, 0.618], [0.382, 0.886], [1.272, 1.618]),
    "Cypher": ([0.382, 0.618], [1.272, 1.414], [0.786, 1.786]),
    "Bat": ([0.382, 0.5], [0.382, 0.886], [1.618, 2.618]),
    "AltBat": ([0.382, 0.382], [0.382, 0.886], [2.0, 3.618]),
    "Crab": ([0.382, 0.618], [0.382, 0.886], [2.24, 3.618]),
    "DeepCrab": ([0.886, 0.886], [0.382, 0.886], [2.618, 3.618]),
    "Shark": ([0.382, 0.886], [1.13, 1.618], [1.618, 2.24]),
    "WhiteSwan": ([1.382, 2.618], [0.236, 0.5], [1.128, 2.0]),
    "BlackSwan": ([0.382, 0.724], [2.0, 4.237], [0.5, 0.886])
}

def detect_harmonic_pattern(price, order_dec=5, err_allowed=0.1):
    signals = {pattern: [] for pattern in harmonic_patterns}
    patterns = {pattern: [] for pattern in harmonic_patterns}
    
    for i in range(100, len(price)):  # 100-period offset
        current_idx, current_pat = extract_extremes(price.values, i, order_dec)

        for pattern_name, ratios in harmonic_patterns.items():
            pattern_signal, pattern_idx = identify_pattern(current_idx, current_pat, err_allowed, *ratios)
            signals[pattern_name].append(pattern_signal)
            if not np.isnan(pattern_signal):
                patterns[pattern_name].append((pattern_signal, pattern_idx))
    
    return signals, patterns

signals, patterns = detect_harmonic_pattern(data['close'])

In [14]:
##############################
# TECHNICAL INDICATOR HELPERS#
##############################
def calculate_rsi(series, period=14):
    delta = series.diff()
    gain = delta.copy()
    loss = delta.copy()
    gain[gain < 0] = 0
    loss[loss > 0] = 0
    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.abs().rolling(window=period).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

def calculate_atr(df, period=14):
    df['TR'] = pd.DataFrame({
        'hl': df['high'] - df['low'],
        'hc': abs(df['high'] - df['close'].shift(1)),
        'lc': abs(df['low'] - df['close'].shift(1))
    }).max(axis=1)
    return df['TR'].rolling(window=period).mean()

##############################
# TRADING ENVIRONMENT        #
##############################
class EnhancedTradingEnv(gym.Env):
    def __init__(self, df):
        super().__init__()
        if len(df) < 100:
            raise ValueError("Input data must have at least 100 rows for the 100-row offset.")
        self.df = df.copy()
        self.current_step = 0
        self.cash = 100000
        self.position = 0
        self.entry_price = 0

        # Add harmonic pattern columns – each pattern gets its own column.
        global signals, patterns
        signals, _ = signals, patterns
        for pattern in harmonic_patterns:
            arr = np.full(len(self.df), np.nan)
            valid = np.array(signals[pattern])
            # Preserve the 100-row offset.
            if len(valid) > 0 and len(arr) >= 100 + len(valid):
                arr[100:100+len(valid)] = valid
            self.df[f'pattern_{pattern}'] = arr

        # Add technical indicators: RSI and ATR.
        self.df['RSI'] = calculate_rsi(self.df['close'])
        self.df['ATR'] = calculate_atr(self.df)
        # Instead of dropping rows (which can lead to an empty DataFrame), fill forward.
        self.df = self.df.fillna(method='ffill').reset_index(drop=True)

        # Use only numeric columns for observations.
        self.numeric_features = self.df.select_dtypes(include=[np.number]).columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(len(self.numeric_features),), dtype=np.float32
        )
        self.action_space = spaces.Discrete(3)

    def reset(self):
        self.current_step = 0
        self.cash = 100000
        self.position = 0
        self.entry_price = 0
        return self._next_observation()

    def _next_observation(self):
        if self.current_step >= len(self.df):
            self.current_step = len(self.df) - 1
        return self.df[self.numeric_features].iloc[self.current_step].values.astype(np.float32)

    def step(self, action):
        current_price = self.df.iloc[self.current_step]['close']
        reward = 0
        done = self.current_step >= len(self.df) - 1
        if action == 1 and self.position == 0:  # Buy
            self.position = 1
            self.entry_price = current_price * 1.0002  # include transaction cost
        elif action == 2 and self.position == 1:  # Sell
            pnl = (current_price * 0.9998 - self.entry_price)
            reward = np.log1p(pnl / self.cash)
            self.cash += pnl
            self.position = 0
        self.current_step += 1
        done = done or (self.cash < 10000)
        return self._next_observation(), reward, done, {}

##############################
# DQN AGENT WITH BATCHNORM   #
##############################
class TurboDQNAgent:
    def __init__(self, state_size, action_size):
        self.device = torch.device("mps" if torch.backends.mps.is_available() else "cuda")
        self.model = nn.Sequential(
            nn.Linear(state_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, action_size)
        ).to(self.device)
        self.optimizer = optim.AdamW(self.model.parameters(), lr=0.0001)
        self.scaler = GradScaler()
        self.memory = []
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.97

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(3)
        # Switch temporarily to eval mode to avoid batchnorm errors on batch-size=1.
        self.model.eval()
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            q_values = self.model(state_tensor)
        self.model.train()
        return q_values.argmax().item()

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        batch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        states = torch.FloatTensor(np.array(states)).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)
        with autocast():
            current_q = self.model(states).gather(1, actions.unsqueeze(1))
            next_q = self.model(next_states).max(1)[0].detach()
            target_q = rewards + (1 - dones) * self.gamma * next_q
            loss = nn.SmoothL1Loss()(current_q.squeeze(), target_q)
        self.optimizer.zero_grad()
        self.scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
        self.scaler.step(self.optimizer)
        self.scaler.update()
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

##############################
# PROCESS DATA FUNCTION      #
##############################
def process_data(df, agent):
    df = df.copy()
    df['Signal'] = 0
    df['trade'] = 0
    df['SL'] = np.nan
    df['TP'] = np.nan
    df['trade_type'] = "square-off"
    # Recompute harmonic patterns (with offset preserved)
    global signals, patterns
    signals, _ = signals, patterns
    for pattern in harmonic_patterns:
        arr = np.full(len(df), np.nan)
        valid = np.array(signals[pattern])
        if len(valid) > 0 and len(arr) >= 100 + len(valid):
            arr[100:100+len(valid)] = valid
        df[f'pattern_{pattern}'] = arr
    df['ATR'] = calculate_atr(df)
    df['RSI'] = calculate_rsi(df['close'])
    # Build an environment from this processed DataFrame.
    env = EnhancedTradingEnv(df)
    state = env.reset()
    # Use the agent to simulate through the environment; update trade signals based on each step.
    while True:
        action = agent.act(state)
        idx = env.current_step  # index in env.df (post fillna)
        if idx < len(env.df):
            if action == 1:
                df.loc[df.index[idx], 'Signal'] = 1
                df.loc[df.index[idx], 'trade'] = 1
                df.loc[df.index[idx], 'trade_type'] = "long"
                df.loc[df.index[idx], 'SL'] = df['close'].iloc[idx] - 1.5 * df['ATR'].iloc[idx]
                df.loc[df.index[idx], 'TP'] = df['close'].iloc[idx] + 3 * df['ATR'].iloc[idx]
            elif action == 2:
                df.loc[df.index[idx], 'Signal'] = -1
                df.loc[df.index[idx], 'trade'] = -1
                df.loc[df.index[idx], 'trade_type'] = "short"
                df.loc[df.index[idx], 'SL'] = df['close'].iloc[idx] + 1.5 * df['ATR'].iloc[idx]
                df.loc[df.index[idx], 'TP'] = df['close'].iloc[idx] - 3 * df['ATR'].iloc[idx]
        next_state, reward, done, _ = env.step(action)
        if done:
            break
        state = next_state
    return df

##############################
# POST-PROCESS & BACKTEST    #
##############################
def strat(data):
    signal = []
    prev = None
    for value in data["Signal"]:
        if value == prev:
            signal.append(0)
        else:
            signal.append(value)
        prev = value
    data["signals"] = signal
    data = data[['datetime', 'open', 'high', 'low', 'close', 'volume', 'signals', 'trade_type']]
    return data

def perform_backtest(csv_file_path):
    client = Client()
    result = client.backtest(
        jupyter_id="vraj2811",  # your Jupyter ID
        file_path=csv_file_path,
        leverage=1,  # Adjust leverage if needed
    )
    return result

##############################
# MAIN EXECUTION             #
##############################
if __name__ == "__main__":
    data = pd.read_csv("/Users/tejasmacipad/Desktop/Final_inter_IIT_submission/BTC/BTC_2019_2023_30m.csv", parse_dates=['datetime'])
    # Precompute ATR for consistency (OPTIONAL)
    data['ATR'] = calculate_atr(data)
    
    # Initialize environment and agent (note: environment processing uses a 100-row offset)
    env = EnhancedTradingEnv(data)
    agent = TurboDQNAgent(env.observation_space.shape[0], env.action_space.n)
    
    episodes = 10
    batch_size = 512
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        while True:
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.memory.append((state, action, reward, next_state, done))
            state = next_state
            total_reward += reward
            if len(agent.memory) >= batch_size:
                agent.replay(batch_size)
            if done:
                break
        print(f"Episode {episode+1}/{episodes} | Total Return: {total_reward:.2f}")
    
    processed_df = process_data(data, agent)
    filtered_df = strat(processed_df)
    filtered_df.to_csv("trading_signals.csv", index=False)
    backtest_results = perform_backtest("trading_signals.csv")
    for result in backtest_results:
        print(f"{result['metric']}: {result['value']}")


  self.df = self.df.fillna(method='ffill').reset_index(drop=True)
  self.scaler = GradScaler()
  dones = torch.FloatTensor(dones).to(self.device)
  with autocast():


Episode 1/10 | Total Return: -0.12
Episode 2/10 | Total Return: 0.27
Episode 3/10 | Total Return: 0.06
Episode 4/10 | Total Return: 0.28


KeyboardInterrupt: 

In [None]:
filtered_data = strat(processed_data)
filtered_data.to_csv("final_signals.csv", index=False)

# Backtest
backtest_results = perform_backtest("final_signals.csv")
for result in backtest_results:
    print(result)