# (4) Labelling 

The labeling process will be made for the SMA 21,9 entry points. This labelling process uses the limit barriers that are not used in the labeling V1, the vertical barrier will be determined from asimple Bayesian waiting-time model for trade durations using an exponential distribution. 

## (4.1) Triple Barrier Method

using the SMA , EMA and RSI market signals for entering and exiting the market. this returns all the data the happened during the labelling process

In [11]:
class triple_barrier_method():

    """
    ptSl[0]: The factor that multiplies trgt to set the width of the upper barrier.
    If 0, there will not be an upper barrier.
    ptSl[1]: The factor that multiplies trgt to set the width of the lower barrier.
    If 0, there will not be a lower barrier.
    """
    
    def __init__(self,trade,ptSl,signal):
        self.trades = []
        self.trades.append(trade)

        self.entry_signal = signal
        self.upper_barrier = ptSl[0]
        self.lower_barrier = ptSl[1]
        self.time_barrier = ptSl[2]
        
    def add_trade(self,tickdata):
        self.trades.append(tickdata)
    
    def get_trade_list(self):
        return self.trades

    def get_upper_barrier(self):
        return self.upper_barrier

    def get_lower_barrier(self):
        return self.lower_barrier

    def get_time_barrier(self):
        return self.time_barrier
    
    def end_barrier(self,signal):
        tickdata = pd.DataFrame(self.trades)
        self.exit_signal = signal
        
        ## calculate the change form first and last price
        if len(tickdata) < 2:
            return 0.0

        first =  (tickdata.iloc[0]["price"])
        last = (tickdata.iloc[-1]["price"])
        change = (last - first) / first
        
        entry_dict = {"timestamp":tickdata.iloc[0]["timestamp"],"price":first}
        out_entry = entry_dict | self.entry_signal
    
        exit_dict = {"timestamp":tickdata.iloc[-1]["timestamp"],"price":last}
        out_exit = exit_dict | self.exit_signal

        waiting_time = (tickdata.iloc[-1]["timestamp"]).timestamp() - (tickdata.iloc[0]["timestamp"]).timestamp() 
        
        out = {"entry": out_entry, "exit": out_exit, "change":change,"upper barrier": self.upper_barrier,"lower barrier":self.lower_barrier,"trade duration":waiting_time}
        
        self.trades = []
        return(pd.DataFrame([out]))

    def remove_all_data(self):
        self.trades = []
        self.entry_signal = None
        self.upper_barrier = None
        self.lower_barrier = None
        self.time_barrier = None
        self.exit_signal = None
        

## (4.2) Bayesian Modeling of Trade Waiting Times

In [12]:
from scipy.stats import gamma

class calculate_trade_wait_time():

    ## update priors on sucessful trades ##
    ## the model is a gamma posterior on a exponential likelihood and a gamma prior
    
    def __init__(self,priors):
        # save priors
        self.priors = priors
        # variables that will be updated
        self.alpha = priors[0] 
        self.beta = priors[1]

    def update_parameters(self,trade_time):
        self.alpha += 1
        self.beta += trade_time
        
    def get_posteriors(self):
        return {"alpha":self.alpha,"beta":self.beta}

    def sample_time(self):
        return gamma.rvs(a=self.alpha, scale=1/self.beta, size=1)[0]
        

## (4.3) Labelling process 

In [13]:
import json
import pandas as pd
import numpy as np
import os
from bson import json_util
from datetime import datetime
import asyncio
import nest_asyncio
import ipaddress
import pymongo
from datetime import datetime, timedelta
from statistics import mean, stdev

files_tickdata = os.listdir("Data")
files_tickdata = [f for f in files_tickdata if f != '.ipynb_checkpoints']
print(files_tickdata)
files_tickdata = files_tickdata[2:]

Signals = os.listdir("Signals")
Signals = [f for f in Signals if f != '.ipynb_checkpoints']
print(Signals)
Signals = Signals[:1]

['MatchTrades 2025-08-21 13:36:44 to 2025-08-21 18:08:04.json', 'MatchTrades 2025-08-12 19:16:36 to 2025-08-13 21:05:00.json', 'MatchTrades 2025-08-18 23:37:27 to 2025-08-21 13:36:43.json']
['Signals_df 2025-08-18 23:39:48 to 2025-08-21 13:36:28.json', 'Signals_df 2025-08-12 19:18:27 to 2025-08-13 21:04:40.json']


In [9]:
signal_file = Signals[0]
# load the signal data
with open("Signals/"+signal_file) as f:
    signal_df = pd.read_json(f).astype({"RSI":"float32","ATR":"float32","+DI":"float32","-DI":"float32","Signal SMA":"int16",
                                       "Signal EMA":"int16","Signal RSI":"int16"})
        
print(signal_df[signal_df["Signal SMA"]==1])

                    timestamp      SMA 9     SMA 21      EMA 9     EMA 21  \
45    2025-08-18 22:43:32.693  116746.80  116746.61  116747.58  116749.80   
107   2025-08-18 22:49:39.437  116644.25  116643.52  116646.06  116649.63   
135   2025-08-18 22:52:44.736  116621.30  116620.59  116623.61  116623.73   
207   2025-08-18 23:01:45.221  116414.24  116412.72  116417.15  116425.11   
289   2025-08-18 23:14:39.700  116427.11  116426.85  116422.73  116429.09   
...                       ...        ...        ...        ...        ...   
19261 2025-08-21 12:03:39.742  113137.15  113126.46  113152.70  113137.05   
19317 2025-08-21 12:14:53.480  113154.17  113150.99  113157.69  113154.32   
19348 2025-08-21 12:20:24.953  113160.47  113154.26  113151.01  113154.41   
19402 2025-08-21 12:30:22.037  113263.84  113259.39  113286.46  113271.40   
19421 2025-08-21 12:33:53.066  113373.62  113372.44  113379.64  113359.93   

             RSI        ATR        +DI        -DI    ADX  Signal SMA  \
45 

In [18]:
total_positions_SMA = pd.DataFrame()
total_positions_EMA = pd.DataFrame()

SMA_time_barrier = calculate_trade_wait_time(priors=[10,1]) # alpha = 10, beta = 1
EMA_time_barrier = calculate_trade_wait_time(priors=[10,1]) # alpha = 10, beta = 1

for tick_file,signal_file in zip(files_tickdata,Signals):

    # ----------------------------------------------- #
    #                  load data                      #
    # ----------------------------------------------- #

    # Load the match trades tick data
    with open("Data/"+tick_file) as f:
        MatchTrades_tickdata = (pd.read_json(f)[["timestamp","amount","price"]]).astype({"amount":"float32","price":"float64"})
        MatchTrades_tickdata["timestamp"] = pd.to_datetime(MatchTrades_tickdata["timestamp"], unit='ms')
        
    # load the signal data
    with open("Signals/"+signal_file) as f:
        signal_df = pd.read_json(f).astype({"RSI":"float32","ATR":"float32","+DI":"float32","-DI":"float32","Signal SMA":"int16",
                                           "Signal EMA":"int16","Signal RSI":"int16"})
        
        signal_df["ADX"] = signal_df["ADX"].apply(
            lambda x: x.get("ADX") if isinstance(x, dict) else x
        ).astype("float32")

        signal_df["timestamp"] = pd.to_datetime(signal_df['timestamp'], unit='ms')

    MatchTrades_tickdata = MatchTrades_tickdata
    signal_df = signal_df
    
    # ----------------------------------------------- #
    #                                                 #
    # ----------------------------------------------- #

    
    MatchTrades_stats = pd.DataFrame() # Tick data for the Tick Bar (TB)
    positions_SMA = pd.DataFrame()
    SMA_open_position = False

    # trading fess
    trading_fee = 0.001  # 0.1% per side
    round_trip_fee = 2 * trading_fee  # buy + sell
    
    for Matchtrade in MatchTrades_tickdata.itertuples(index=True, name="Trade"):

        timestamp = Matchtrade.timestamp

        try:    
            MatchTrades_stats = pd.concat([MatchTrades_stats, pd.DataFrame([dict(list(signal_x.items())[1:])])], ignore_index=True)   
        except:
            pass


        # ----------------------------- #
        try:
            if (SMA_tbm.get_trade_list() != []):
                SMA_tbm.add_trade(Matchtrade)
        except: 
            pass
        # ----------------------------- #

        try:
            
            if timestamp == signal_df.iloc[0]["timestamp"]:
                
                signal_x = signal_df.iloc[0].to_dict()
                signal_df = signal_df.drop(0).reset_index(drop=True)
    
                # ----------------------------- #
                # Signal Simple moving averages #
                
                try:
                    
                    # start position
                    if ((signal_x["Signal SMA"] == 1) and (SMA_open_position == False)):

                        SMA_open_position = True
                        
                        # profit-taking, stop-loss and time limits 
                        barriers = [round(Matchtrade.price + 1.5*signal_x["ATR"],2),round(Matchtrade.price - 1.5*signal_x["ATR"],2), round((Matchtrade.timestamp).timestamp(),3) + round(SMA_time_barrier.sample_time(),3)*60]
                        signal = {k: signal_x[k] for k in ["SMA 9","SMA 21","RSI","ATR","+DI","-DI","ADX"]} 

                        SMA_tbm = triple_barrier_method(Matchtrade,ptSl = barriers,signal=signal)

                except:
                    pass
                    
                # ----------------------------- #
                # ----------------------------- #
           
        except:
            pass


        # ----------------------------------------------------- # 
        # ----------------------------------------------------- #        
        try:
            
            if ((SMA_tbm.get_upper_barrier() <= Matchtrade.price) or (SMA_tbm.get_lower_barrier() >= Matchtrade.price) or (round(SMA_tbm.get_time_barrier(),3) <= timestamp.timestamp())):
                
                SMA_open_position = False
                
                signal = {k: signal_x[k] for k in ["SMA 9","SMA 21","RSI","ATR","+DI","-DI","ADX"]}
    
                result = SMA_tbm.end_barrier(signal=signal)
                result = result.rename(columns={"change":"SMA change"})
                result["SMA change"] = result["SMA change"] - round_trip_fee
                
                if result.iloc[0]["SMA change"] > 0:
                    SMA_time_barrier.update_parameters(result.iloc[0]["trade duration"])
                    print(SMA_time_barrier.get_posteriors())

                positions_SMA = pd.concat([positions_SMA, result], ignore_index=True)
                SMA_tbm.remove_all_data()
                
        except:
            pass
        # ----------------------------------------------------- # 
        # ----------------------------------------------------- #  

    SMA_tbm.remove_all_data()

    # save the positions from all the different files
    total_positions_SMA =  pd.concat([total_positions_SMA, positions_SMA], ignore_index=True)

    ##########################################
    ##          Save signals data           ##
    
    raw_data = total_positions_SMA.to_dict(orient="records")
    json_raw_data = json_util.dumps(raw_data)
    
    with open("Positions/"+"Positions SMA.json", "w") as f:
                f.write(json_raw_data)
       
    ##########################################
    ##########################################


In [20]:
positions_SMA[positions_SMA["SMA change"]>0]

Unnamed: 0,entry,exit,SMA change,upper barrier,lower barrier,trade duration
