In [1]:
import numpy as np
from sklearn.metrics import f1_score, make_scorer, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from backbone.probability_transformer import ProbabilityTransformer 
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import make_scorer, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from backbone.utils import load_function
from typing import Tuple
import yaml
from sklearn.metrics import classification_report
from datetime import datetime, timedelta

In [2]:
import numpy as np
import pandas as pd

# Función para calcular la volatilidad diaria
def get_daily_volatility(close_prices, span=100):
    returns = close_prices.pct_change()
    volatility = returns.ewm(span=span).std()
    return volatility

# Función para aplicar las barreras de triple límite
def apply_triple_barrier(close_prices, daily_volatility, upper_barrier=0.02, lower_barrier=0.02, max_holding_period=50):
    barriers = []
    for index in range(len(close_prices)):
        # Definir niveles de toma de ganancias y stop-loss basados en el precio actual
        upper_barrier_level = close_prices[index] * (1 + upper_barrier)
        lower_barrier_level = close_prices[index] * (1 - lower_barrier)
        # Evaluar los precios futuros dentro del período máximo de mantenimiento
        for j in range(index + 1, min(index + max_holding_period, len(close_prices))):
            if close_prices[j] >= upper_barrier_level:
                barriers.append((index, 2))
                break
            elif close_prices[j] <= lower_barrier_level:
                barriers.append((index, 0))
                break
        else:
            barriers.append((index, 1))
    return barriers

# Función principal para etiquetar los datos
def triple_barrier_labeling(data, upper_barrier=0.02, lower_barrier=0.02, max_holding_period=50, span=100):
    close_prices = data['Close']
    daily_volatility = get_daily_volatility(close_prices, span=span)
    labels = apply_triple_barrier(close_prices, daily_volatility, upper_barrier, lower_barrier, max_holding_period)
    data['target'] = [label for _, label in labels]
    return data['target']


In [3]:
import pandas as pd
import os
pd.set_option('display.max_columns', None)

periods_forward = 5

tickers = ['EURUSD', 'GBPUSD', 'USDJPY', 'USDCAD', 'AUDUSD', 'USDCHF']
symbols_path = './backbone/data/backtest/symbols'
instruments = {}
df = pd.DataFrame()

for ticker in tickers:
    instruments[ticker] = pd.read_csv(os.path.join(symbols_path, f'{ticker}.csv'))
  
    instruments[ticker]['ticker'] = ticker
  
    print('Creando target')
    instruments[ticker] = instruments[ticker].sort_values(by='Date')
    # instruments[ticker]['target'] = ((instruments[ticker]['Close'].shift(-periods_forward) - instruments[ticker]['Close']) / instruments[ticker]['Close']) * 100
    
    # cut_right = round(instruments[ticker]['target'].mean() + 1 * instruments[ticker]['target'].std(), 2)
    # cut_left = round(instruments[ticker]['target'].mean() - 1 * instruments[ticker]['target'].std(), 2)
    
    # bins = [-100000, cut_left, cut_right, 100000]
    # labels = [0, 1, 2]

    # instruments[ticker]['target'] = pd.cut(instruments[ticker]['target'], bins, labels=labels)

    instruments[ticker]['target'] = triple_barrier_labeling(instruments[ticker], upper_barrier=0.015, lower_barrier=0.015, max_holding_period=48, span=100)
    
    df = pd.concat([
        df,
        instruments[ticker]
    ])

    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d %H:00:00')

    df = df.sort_values(by='Date')

Creando target
Creando target
Creando target
Creando target
Creando target
Creando target


In [13]:
date_format = '%Y-%m-%d %H:00:00'
window = 960

actual_date = datetime(2023,5,1,0,0,0)

date_to = actual_date - timedelta(hours=periods_forward+1) 
date_from = date_to - timedelta(hours=window)

date_from_test = actual_date
date_to_test = date_from_test + timedelta(hours=48)

date_from_str = date_from.strftime(date_format)
date_to_str = date_to.strftime(date_format)
date_from_test_str = date_from_test.strftime(date_format)
date_to_test_str = date_to_test.strftime(date_format)


train = df[(df['Date']>date_from_str) & (df['Date']<date_to_str)]
test = df[(df['Date']>date_from_test_str) & (df['Date']<date_to_test_str)]

# Inicio undersampling
class_0 = train[train['target']==0]
class_2 = train[train['target']==2]
avg_examples = (class_0.shape[0] + class_2.shape[0]) / 2
class_1 = train[train['target']==1].tail(int(avg_examples)).sample(frac=1)

train = pd.concat([class_0, class_1, class_2])
# fin undersampling

print(train.Date.head(1))
print(train.Date.tail(1))
print('------------------------')
print(test.Date.head(1))
print(test.Date.tail(1))

train.target.value_counts()

1851   2023-03-22 03:00:00
Name: Date, dtype: datetime64[ns]
2512   2023-04-28 17:00:00
Name: Date, dtype: datetime64[ns]
------------------------
2520   2023-05-01 01:00:00
Name: Date, dtype: datetime64[ns]
2566   2023-05-02 23:00:00
Name: Date, dtype: datetime64[ns]


target
2    165
1    124
0     83
Name: count, dtype: int64

In [14]:
df.target.value_counts()

target
1    36463
0     1962
2     1750
Name: count, dtype: int64

In [15]:
test.target.value_counts()

target
1    243
0     38
2      1
Name: count, dtype: int64

In [16]:
scaler = StandardScaler()
log_reg = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model = XGBClassifier()

pipe = Pipeline([
    ('scaler', scaler),
    ('prob_transf', ProbabilityTransformer(model)),
    ('log_reg', log_reg)
])

with open('configs/model_config.yml', 'r') as file:
    model_configs = yaml.safe_load(file)

param_grid = model_configs['gradient_boosting']['param_grid']

n_splits = 5
stratified_kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

search = GridSearchCV(
    pipe,
    param_grid,
    n_jobs=-1,
    cv=stratified_kfold,
    scoring=make_scorer(precision_score, average='weighted')
)

search.fit(train.drop(columns=['target', 'Date', 'ticker']), train.target)

Parameters: { "scale_pos_weight" } are not used.



In [17]:
x = train.drop(columns=['target', 'Date', 'ticker'])
y = train.target

In [18]:
y.value_counts()

target
2    165
1    124
0     83
Name: count, dtype: int64

# Train performance

In [19]:
pipeline = search.best_estimator_

predictions = pipeline.predict_proba(train.drop(columns=['target', 'Date', 'ticker']))
max_probabilities = np.max(predictions, axis=1)
max_indices = np.argmax(predictions, axis=1)

precision = precision_score(train.target, max_indices, average='weighted')
recall = recall_score(train.target, max_indices, average='weighted')
f1 = f1_score(train.target, max_indices, average='weighted')

print(precision)
print(recall)
print(f1) 

target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(train.target, max_indices, target_names=target_names))

0.9973438300051203
0.9973118279569892
0.9973157910084333
              precision    recall  f1-score   support

     class 0       0.99      1.00      0.99        83
     class 1       1.00      1.00      1.00       124
     class 2       1.00      0.99      1.00       165

    accuracy                           1.00       372
   macro avg       1.00      1.00      1.00       372
weighted avg       1.00      1.00      1.00       372



# Test performance

In [20]:
predictions = pipeline.predict_proba(test.drop(columns=['target', 'Date', 'ticker']))
max_probabilities = np.max(predictions, axis=1)
max_indices = np.argmax(predictions, axis=1)

precision = precision_score(test.target, max_indices, average='weighted')
recall = recall_score(test.target, max_indices, average='weighted')
f1 = f1_score(test.target, max_indices, average='weighted')

print(precision)
print(recall)
print(f1)

target_names = ['class 0', 'class 1', 'class 2']
print(classification_report(test.target, max_indices, target_names=target_names))

0.07749692280640055
0.0851063829787234
0.07949818036500463
              precision    recall  f1-score   support

     class 0       0.57      0.61      0.59        38
     class 1       0.00      0.00      0.00       243
     class 2       0.00      1.00      0.01         1

    accuracy                           0.09       282
   macro avg       0.19      0.54      0.20       282
weighted avg       0.08      0.09      0.08       282



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [34]:
symbols_path = './backbone/data/backtest/symbols/dataset.csv'

In [35]:
import pandas as pd

In [38]:
df = pd.read_csv(symbols_path)
df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,ema_12,ema_26,ema_50,ema_200,rsi,upper_bband,middle_bband,lower_bband,atr,mfi,adx,macd,macdsignal,macdhist,macdhist_yesterday,macd_flag,change_percent_ch,change_percent_co,change_percent_cl,change_percent_1_day,change_percent_2_day,change_percent_3_day,change_percent_h,change_percent_o,change_percent_l,hour,day,three_stars,closing_marubozu,doji,doji_star,dragon_fly,engulfing,evening_doji_star,hammer,hanging_man,marubozu,morning_star,shooting_star,ticker,side,target
0,2022-11-15 10:00:00,1.04073,1.04171,1.03937,1.04075,11386,1.034996,1.033352,1.030025,1.007551,74.173967,1.039205,1.030028,1.020852,0.002287,73.397358,18.664835,0.001643,0.000992,0.000652,0.00036,0,-0.09,0.0,0.13,0.0,0.65,0.72,0.79,0.83,0.71,10,15,0,0,100,-100,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
1,2022-11-15 11:00:00,1.04073,1.04092,1.03885,1.04005,8473,1.035773,1.033849,1.030418,1.007875,70.975731,1.039628,1.030421,1.021215,0.002271,62.034459,20.739289,0.001925,0.001178,0.000746,0.000652,0,-0.08,-0.07,0.12,-0.07,-0.06,0.58,0.63,0.73,0.58,11,15,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
2,2022-11-15 12:00:00,1.0406,1.04373,1.03954,1.04279,8821,1.036853,1.034511,1.030903,1.008222,75.439776,1.040251,1.030906,1.021562,0.002408,71.011589,23.26683,0.002342,0.001411,0.000931,0.000746,0,-0.09,0.21,0.31,0.26,0.2,0.2,0.18,0.63,0.54,12,15,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
3,2022-11-15 13:00:00,1.04279,1.04366,1.04093,1.0411,7499,1.037506,1.034999,1.031303,1.008549,68.447103,1.040681,1.031306,1.021931,0.002431,65.571497,25.613833,0.002507,0.00163,0.000877,0.000931,0,-0.25,-0.16,0.02,-0.16,0.1,0.03,0.19,0.2,0.15,13,15,0,-100,0,0,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
4,2022-11-15 14:00:00,1.04113,1.04153,1.03973,1.04074,7138,1.038004,1.035424,1.031673,1.008869,67.021954,1.04105,1.031676,1.022302,0.002386,60.647914,26.969764,0.00258,0.00182,0.000759,0.000877,0,-0.08,-0.04,0.1,-0.03,-0.2,0.07,0.06,0.04,0.08,14,15,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
5,2022-11-15 16:00:00,1.04393,1.04447,1.03809,1.04004,17782,1.039079,1.036345,1.03246,1.009524,59.600981,1.041897,1.032463,1.02303,0.003108,59.664285,31.123446,0.002735,0.002169,0.000566,0.000828,0,-0.43,-0.37,0.19,-0.37,-0.07,-0.1,0.08,0.11,-0.27,16,15,0,0,0,0,0,-100,0,0,0,0,0,0,EURUSD,-1.0,1.0
6,2022-11-16 12:00:00,1.042,1.04387,1.04154,1.04281,7235,1.038848,1.037441,1.035126,1.0145,62.92279,1.042261,1.035127,1.027993,0.003105,67.885314,21.267875,0.001407,0.000741,0.000666,0.000542,0,-0.1,0.08,0.12,0.08,0.21,0.25,0.23,0.39,0.39,12,16,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
7,2022-11-16 13:00:00,1.04281,1.04319,1.04114,1.04167,6889,1.039282,1.037754,1.035382,1.01477,59.69545,1.042493,1.035384,1.028274,0.00303,64.965675,22.216728,0.001528,0.000898,0.00063,0.000666,0,-0.15,-0.11,0.05,-0.11,-0.03,0.1,0.11,0.24,0.23,13,16,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,-1.0,1.0
8,2022-11-17 16:00:00,1.03124,1.03484,1.03053,1.0335,11218,1.035323,1.036644,1.036489,1.020154,40.262495,1.041229,1.03649,1.03175,0.002631,37.768789,19.765775,-0.001321,-0.000647,-0.000674,-0.000762,0,-0.13,0.22,0.29,0.22,0.02,-0.11,-0.03,-0.25,-0.21,16,17,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,1.0,0.0
9,2022-11-18 21:00:00,1.03309,1.0334,1.03201,1.03279,3913,1.034857,1.035648,1.035995,1.02412,39.985525,1.039011,1.035995,1.032979,0.002339,45.092924,15.755701,-0.000791,-0.000363,-0.000429,-0.000389,0,-0.06,-0.03,0.08,-0.03,-0.05,-0.2,-0.37,-0.32,-0.21,21,18,0,0,0,0,0,0,0,0,0,0,0,0,EURUSD,1.0,0.0


In [33]:
df[df['target']==2]

Unnamed: 0,Date,Open,High,Low,Close,Volume,ema_12,ema_26,ema_50,ema_200,rsi,upper_bband,middle_bband,lower_bband,atr,mfi,adx,macd,macdsignal,macdhist,macdhist_yesterday,macd_flag,change_percent_ch,change_percent_co,change_percent_cl,change_percent_1_day,change_percent_2_day,change_percent_3_day,change_percent_h,change_percent_o,change_percent_l,hour,day,three_stars,closing_marubozu,doji,doji_star,dragon_fly,engulfing,evening_doji_star,hammer,hanging_man,marubozu,morning_star,shooting_star,ticker,target
