In [15]:
# This is necessary to recognize the modules
import os
import sys
from decimal import Decimal
import warnings
import pandas as pd
import joblib


warnings.filterwarnings("ignore")

root_path = os.path.abspath(os.path.join(os.getcwd(), '../..'))
sys.path.append(root_path)

from core.data_sources.clob import CLOBDataSource

# Get trading rules and candles
clob = CLOBDataSource()

# Constants
CONNECTOR_NAME = "binance"
INTERVAL = "1s"
TRADING_PAIR = "BTC-USDT"
DAYS = 2

await clob.get_candles_last_days(CONNECTOR_NAME, TRADING_PAIR, INTERVAL, DAYS)
clob.dump_candles_cache(root_path)
candles = clob.candles_cache[("binance", "BTC-USDT", "1s")]
df = candles.data

2025-05-15 05:21:49,717 - asyncio - ERROR - Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x7fadabff9580>
2025-05-15 05:21:58,083 - asyncio - ERROR - Task was destroyed but it is pending!
task: <Task pending name='Task-7' coro=<safe_wrapper() running at /home/dominhnhat/miniconda3/envs/quants-lab/lib/python3.12/site-packages/hummingbot/core/utils/async_utils.py:9> wait_for=<Future pending cb=[Task.task_wakeup()]>>


In [16]:
from typing import Optional

import numpy as np
import pandas as pd


def triple_barrier_method(df, tp=1.0, sl=1.0, tl=5, std_span: Optional[int] = 100, trade_cost=0.0006,  max_executors: int = 1):
    df.index = pd.to_datetime(df.timestamp, unit="s")
    if std_span:
        df["target"] = df["close"].rolling(std_span).std() / df["close"]
    else:
        df["target"] = 1 / 100
    df["tl"] = df.index + pd.Timedelta(seconds=tl)
    df.dropna(subset="target", inplace=True)

    df = apply_tp_sl_on_tl(df, tp=tp, sl=sl)

    df = get_bins(df, trade_cost)

    df['tp'] = df['close'] * (1 + df['target'] * tp * df["side"])
    df['sl'] = df['close'] * (1 - df['target'] * sl * df["side"])

    return df

def get_bins(df, trade_cost):
    # 1) prices aligned with events
    px = df.index.union(df['tl'].values).drop_duplicates()
    px = df.close.reindex(px, method='ffill')

    # 2) create out object
    df['ret'] = (px.loc[df['close_time'].values].values / px.loc[df.index] - 1) * df['side']
    df['real_class'] = np.sign(df['ret'] - trade_cost)
    return df


def apply_tp_sl_on_tl(df: pd.DataFrame, tp: float, sl: float):
    events = df[df["side"] != 0].copy()
    if tp > 0:
        take_profit = tp * events['target']
    else:
        take_profit = pd.Series(index=df.index)  # NaNs
    if sl > 0:
        stop_loss = - sl * events['target']
    else:
        stop_loss = pd.Series(index=df.index)  # NaNs

    for loc, tl in events['tl'].fillna(df.index[-1]).items():
        df0 = df.close[loc:tl]  # path prices
        df0 = (df0 / df.close[loc] - 1) * events.at[loc, 'side']  # path returns
        df.loc[loc, 'stop_loss_time'] = df0[df0 < stop_loss[loc]].index.min()  # earliest stop loss.
        df.loc[loc, 'take_profit_time'] = df0[df0 > take_profit[loc]].index.min()  # earliest profit taking.
    df["close_time"] = df[["tl", "take_profit_time", "stop_loss_time"]].dropna(how='all').min(axis=1)
    df['close_type'] = df[['take_profit_time', 'stop_loss_time', 'tl']].dropna(how='all').idxmin(axis=1)
    df['close_type'].replace({'take_profit_time': 1, 'stop_loss_time': -1, 'tl': 0}, inplace=True)
    return df


In [17]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Add technical indicators using pandas_ta
import pandas_ta as ta

df_copy = df.copy()
df_copy["side"] = 1
df_with_tbm = triple_barrier_method(df_copy, tp=3.5, sl=3.5, tl=300, std_span=200, trade_cost=0.0000)

In [None]:
import numpy as np
import pandas as pd
from typing import Optional
from sklearn.metrics import accuracy_score
import torch

def process_data(df, loaded_scaler):
    # Create a copy to work with
    df_with_indicators = df.copy()

    # Bollinger Bands with different lengths
    df_with_indicators.ta.bbands(length=20, std=2, append=True)  # Standard BB
    df_with_indicators.ta.bbands(length=50, std=2, append=True)  # Longer term BB

    # MACD with different parameters
    df_with_indicators.ta.macd(fast=12, slow=26, signal=9, append=True)  # Standard MACD
    df_with_indicators.ta.macd(fast=8, slow=21, signal=5, append=True)  # Faster MACD

    # RSI with different lengths
    df_with_indicators.ta.rsi(length=14, append=True)  # Standard RSI
    df_with_indicators.ta.rsi(length=21, append=True)  # Longer RSI

    # Moving averages
    df_with_indicators.ta.sma(length=20, append=True)  # Short MA
    df_with_indicators.ta.sma(length=50, append=True)  # Medium MA
    df_with_indicators.ta.ema(length=20, append=True)  # Short EMA
    df_with_indicators.ta.ema(length=50, append=True)  # Medium EMA

    # Volatility and momentum indicators
    df_with_indicators.ta.atr(length=14, append=True)  # ATR
    df_with_indicators.ta.stoch(k=14, d=3, append=True)  # Stochastic
    df_with_indicators.ta.adx(length=14, append=True)  # ADX

    # Replace df_with_tbm with df_with_indicators for further processing
    df_processed = df_with_indicators.copy()
    df_processed["close"] = df["close"]
    # df_processed.reset_index(inplace=True, drop=True)

    # 1. Remove unnecessary columns
    columns_to_drop = ['timestamp', 'taker_buy_base_volume', 'volume', 
                    'close_time', 'real_class', 'ret', 'tp', 'sl', 'take_profit_time', 'stop_loss_time', 'tl', 'side']
    df_processed = df_processed.drop(columns=columns_to_drop)
    # 2. Convert prices to returns
    price_columns = ['open', 'high', 'low', 'close']
    for col in price_columns:
        df_processed[f'{col}_ret'] = df_processed[col].pct_change()
    # df_processed = df_processed.drop(columns=price_columns)

    # 3. Create buy/sell volume ratio
    df_processed['buy_volume_ratio'] = df_processed['taker_buy_quote_volume'] / df_processed['quote_asset_volume']
    df_processed = df_processed.drop(columns=['taker_buy_quote_volume'])

    # 4. Drop any rows with NaN values (first row will have NaN due to returns calculation)
    df_processed = df_processed.dropna()

    # 5. Get all numeric columns for scaling (excluding the target 'close_type')
    numeric_columns = df_processed.select_dtypes(include=['float64', 'int64']).columns.tolist()
    numeric_columns.remove('close_type')  # Don't scale the target variable

    # 6. Apply StandardScaler to all numeric columns
    scaler = loaded_scaler
    df_processed[numeric_columns] = scaler.fit_transform(df_processed[numeric_columns])
    return df_processed

def test(df, model, scaler, tp=1.0, sl=1.0, tl=5, std_span: Optional[int] = 100, trade_cost=0.0000):
    """
    Test mô hình giao dịch với RandomForest.
    Args:
        df: DataFrame chứa dữ liệu đã được gán nhãn với Triple Barrier.
        model: Mô hình sklearn (RandomForest) đã train.
        scaler: StandardScaler dùng lúc training.
        tp, sl, tl: các tham số của Triple Barrier.
        std_span: khoảng thời gian tính std.
        trade_cost: chi phí giao dịch.
    Returns:
        pnl: Tổng lợi nhuận.
        num_trade: Tổng số lệnh giao dịch.
        accuracy: Độ chính xác của mô hình.
    """
    df_test = df.copy()
    df_test = process_data(df_test, scaler)
    feature_columns = [col for col in df_test.columns if col not in ['open', 'high', 'low', 'close','timestamp', 'tl', 'stop_loss_time', 
                                                                'take_profit_time', 'close_time', 'close_type',
                                                                'real_class', 'ret', 'target_pct']]
    print(len(feature_columns))

    X_test = df_test[feature_columns]
    Y_test = df_test['close_type']


    # 3. Dự đoán tín hiệu entry: 1 = mua, -1 = bán, 0 = không giao dịch
    X_test_tensor  = torch.tensor(X_test.values, dtype=torch.float32)
    print(X_test_tensor.shape)
    entry_signals = model(X_test_tensor).detach().numpy()


    # Tính accuracy
    #print(entry_signals)
    #print(Y_test)
    acc = 0
    bitcoin = 0.0
    pnl = Quota = 1000.0
    num_trade = 0

    for i in range(len(df_test)):
        
        entry_sig = entry_signals[i].argmax()
        #print(entry_sig, entry_signals[i])
        signal = entry_sig
        close_price = df.iloc[i]["close"]

        if (entry_sig == 0): 
            signal = -1
        elif (entry_sig == 1): 
            signal = 0
        else:
            signal = 1

        if (signal == Y_test[i]): acc += 1

        print("pnl:", pnl, "bitcoin:", bitcoin, "close_price:", close_price, "signal:", signal)
        
        

        buy_amount = 0.0001

        total_value = pnl + bitcoin * close_price
        if pnl <= close_price* buy_amount and bitcoin == 0:
            print("Suffered from liquidation lmao", pnl, bitcoin)
            break

        if signal == 1:  # buy
            cost = close_price * buy_amount + trade_cost
            if pnl >= cost:
                bitcoin += buy_amount
                pnl -= cost
                num_trade += 1

        elif signal == -1 and bitcoin > 0:  # sell
            proceeds = close_price * bitcoin - trade_cost
            pnl += proceeds
            bitcoin = 0.0
            num_trade += 1

    
    return pnl - Quota, num_trade, acc/num_trade


# Ví dụ về cách sử dụng:
# Giả sử bạn đã có một mô hình học máy (model) với hàm entry

df_test = df_with_tbm.copy()

# Test mô hình với hàm test
root_path = os.path.abspath(os.path.join(os.getcwd(), '../..'))
scaler_path = '/home/dominhnhat/quants-lab/research_notebooks/bitcoinenaitor/models/scaler.pkl'
model_path = '/home/dominhnhat/quants-lab/research_notebooks/bitcoinenaitor/models/mlp.pth'
scaler = joblib.load(scaler_path)

import torch.nn as nn
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        
        self.fc1 = nn.Linear(input_dim, 512)
        self.relu1 = nn.ReLU()
        self.drop1 = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.drop2 = nn.Dropout(0.2)
        
        self.fc3 = nn.Linear(256, 128)
        self.relu3 = nn.ReLU()
        self.drop3 = nn.Dropout(0.2)

        self.fc4 = nn.Linear(128, 64)
        self.relu4 = nn.ReLU()
        self.drop4 = nn.Dropout(0.2)

        self.fc5 = nn.Linear(64, 3)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.drop1(x)

        x = self.fc2(x)
        x = self.relu2(x)
        x = self.drop2(x)

        x = self.fc3(x)
        x = self.relu3(x)
        x = self.drop3(x)

        x = self.fc4(x)
        x = self.relu4(x)
        x = self.drop4(x)

        x = self.fc5(x)

        return x

# Initialize the model
input_dim = 36  # number of feature columns
model = MLP(input_dim)
model.load_state_dict(torch.load(model_path))
pnl, num_trade, accuracy = test(df_test, model, scaler)

print(f"Total PNL: {pnl}, Number of Trades: {num_trade}, Accuracy: {accuracy * 100:.2f}%")



36
torch.Size([163368, 36])
