In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean
from sklearn.metrics import f1_score, make_scorer, precision_score, recall_score
pd.set_option('display.max_columns', None)


In [2]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_pred'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_pred'].notna()][f'{ticker}_pred']
        
        precision = precision_score(y_true, y_pred, average='weighted')
        recall = recall_score(y_true, y_pred, average='weighted')
        f1 = f1_score(y_true, y_pred, average='weighted')
        
        auc_list.append(precision)

    return mean(auc_list)

In [3]:
def max_drawdown(serie):
    max_valor_acumulado = serie[0]
    max_dd = 0

    for valor_actual in serie[1:]:
        if valor_actual > max_valor_acumulado:
            max_valor_acumulado = valor_actual
        else:
            dd = (max_valor_acumulado - valor_actual) / max_valor_acumulado
            if dd > max_dd:
                max_dd = dd

    return max_dd

In [4]:
def calculate_avg_returns_perc(orders):
    orders_per_month = orders[['close_time','profit', 'comment']].copy()
    orders_per_month['close_time'] = pd.to_datetime(orders_per_month['close_time'])
    orders_per_month['year'] = orders_per_month['close_time'].dt.year
    orders_per_month['month'] = orders_per_month['close_time'].dt.month
    
    orders_per_month_gb = orders_per_month.groupby(by=['year','month']).agg(
        profit_sum=('profit', 'sum'),
    ).reset_index()
    
    orders_per_month_gb = pd.concat([
        pd.DataFrame({'year':[2023], 'month':[0],'amount_ops':[0], 'profit_sum':[initial_wallet_value]}), 
        orders_per_month_gb
    ])
    
    orders_per_month_gb['profit_cumsum'] = orders_per_month_gb['profit_sum'].cumsum()
    
    orders_per_month_gb['perc_change'] = ((orders_per_month_gb['profit_cumsum'] - orders_per_month_gb['profit_cumsum'].shift(1)) / orders_per_month_gb['profit_cumsum'].shift(1))
    
    return orders_per_month_gb.perc_change.mean(), orders_per_month_gb.perc_change.std()

In [5]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

In [6]:
tickers = config["tickers"] 
tickers

['EURUSD']

In [7]:
os.listdir('./backbone/data/backtest/experiments')

['Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_12-SL_10-RR_5-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_12-SL_10-RR_7-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_5-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_7-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_48-SL_10-RR_5-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_48-SL_10-RR_7-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_5-UseDaysClose_True',
 'Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy

In [8]:
results_dict = {}
initial_wallet_value = config['start_money']

for path in os.listdir('./backbone/data/backtest/experiments'):
    if not path.endswith('.csv') and path.startswith('Mode_train'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./backbone/data/backtest/experiments', path, 'wallet.csv'))

            final_wallet_value = wallet.tail(1).iloc[0]['wallet']
            results_dict[path]['wallet'] = ((final_wallet_value - initial_wallet_value) / initial_wallet_value) * 100
            results_dict[path]['max_drawdown'] = max_drawdown(wallet['wallet'])
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./backbone/data/backtest/experiments', path, 'orders.csv'))

            mean_returns, std_returns = calculate_avg_returns_perc(orders)
            results_dict[path]['mean_perc_returns'] = mean_returns
            results_dict[path]['std_perc_returns'] = std_returns
            
            
            results_dict[path]['buys'] = orders[orders['open_time'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_time'].notna()].shape[0]
           
            # avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            # results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] >= 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] < 0].shape[0]
    
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 
    
            results_dict[path]['sharpe_ratio'] = (orders['profit'].mean() - 0.04) / orders['profit'].std()
        
            results_dict[path]['winning_rate'] = orders[orders['profit'] >= 0]['profit'].mean() / (-1 * orders[orders['profit'] < 0]['profit'].mean())
            
            results_dict[path]['recover_factor'] = orders[orders['profit'] >= 0]['profit'].sum() / (-1 * orders[orders['profit'] < 0]['profit'].sum())

            results_dict[path]['stability_index'] = orders['profit'].std() / orders['profit'].mean()

            for ticker in tickers:
                results_dict[path][f'profits_in_{ticker}'] = orders[orders['ticker'] == ticker]['profit'].sum()

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0
        # try:
        train_results = pd.read_csv(os.path.join('./backbone/data/backtest/experiments', path, 'trainres.csv'))
        results_dict[path]['avg_train_precision'] = train_results['precision'].mean()
        results_dict[path]['avg_train_recall'] = train_results['recall'].mean()
        results_dict[path]['avg_train_f1'] = train_results['f1'].mean()
        # # except:
        # #     results_dict[path]['avg_train_auc'] = 0
            
        # # try:
        predictions = pd.read_csv(os.path.join('./backbone/data/backtest/experiments', path, 'test_res.csv')).dropna()

        results_dict[path]['avg_test_precision'] = precision_score(predictions.y_true, predictions.y_pred, average='weighted')
        results_dict[path]['avg_test_recall'] = recall_score(predictions.y_true, predictions.y_pred, average='weighted')
        results_dict[path]['avg_test_f1'] = f1_score(predictions.y_true, predictions.y_pred, average='weighted')
        # except:
        #     results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_12-SL_10-RR_5-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_12-SL_10-RR_7-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_5-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_7-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_48-SL_10-RR_5-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_48-SL_10-RR_7-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_5-UseDaysClose_True
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_7-UseD

In [9]:
beta = 0.05
eurusd = pd.read_csv('./backbone/data/backtest/symbols/EURUSD.csv')
results['buy_ratio'] = results['buys'] / (eurusd.Date.nunique() * len(tickers))
results['operation_ratio_2'] = ((1 + beta**2) * results['operation_ratio'] * results['buy_ratio']) / (beta * results['operation_ratio'] + results['buy_ratio'])

In [16]:
positive_results = results[
    [
        'operation_ratio_2',
        'operation_ratio', 
        'buy_ratio',
        'mean_perc_returns',
        'std_perc_returns',
        'winning_rate',
        'recover_factor',
        'sharpe_ratio',
        'stability_index',
        'max_drawdown',
        'good_operations',
        'bad_operations',
        'wallet',
        'profits_in_EURUSD',
        # 'profits_in_GBPUSD', 
        # # 'profits_in_USDJPY', 
        # 'profits_in_USDCAD',
        # 'profits_in_AUDUSD', 
        # 'profits_in_USDCHF',
        'avg_train_precision',
        'avg_train_recall',
        'avg_train_f1',
        'avg_test_precision',
        'avg_test_recall',
        'avg_test_f1',
    ]
]#.sort_values(by='operation_ratio', ascending=False)

positive_results = positive_results[
    (results['wallet']>80)
    & (results['max_drawdown']<=0.25)
].sort_values(by='wallet', ascending=False)


positive_results

Unnamed: 0,operation_ratio_2,operation_ratio,buy_ratio,mean_perc_returns,std_perc_returns,winning_rate,recover_factor,sharpe_ratio,stability_index,max_drawdown,good_operations,bad_operations,wallet,profits_in_EURUSD,avg_train_precision,avg_train_recall,avg_train_f1,avg_test_precision,avg_test_recall,avg_test_f1
Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_5-UseDaysClose_True,0.167406,0.226006,0.031974,0.098923,0.141417,4.617077,1.348186,0.107367,9.249019,0.21101,73,250,185.5058,1855.058,0.264762,0.262464,0.263039,0.618643,0.460245,0.491833
Mode_train-Model_gradient_boosting-TrainWw_960-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_7-UseDaysClose_True,0.257356,0.42042,0.032964,0.092316,0.090964,2.171688,1.575318,0.149628,6.633135,0.128923,140,193,177.599897,1775.99897,0.485866,0.446749,0.453021,0.617645,0.515291,0.538276
Mode_train-Model_gradient_boosting-TrainWw_1920-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_7-UseDaysClose_True,0.256337,0.408696,0.034152,0.090516,0.065399,2.196522,1.518184,0.139841,7.095326,0.120845,141,204,177.400536,1774.00536,0.425201,0.378393,0.383844,0.594965,0.485474,0.509133
Mode_train-Model_random_forest-TrainWw_1920-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_7-UseDaysClose_True,0.250932,0.400593,0.03336,0.086945,0.075618,2.241804,1.498235,0.135444,7.322732,0.098028,135,202,164.788234,1647.88234,0.457034,0.374799,0.397328,0.597455,0.488532,0.512126
Mode_train-Model_random_forest-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_5-UseDaysClose_True,0.160791,0.228782,0.026826,0.087832,0.136508,4.819391,1.429676,0.129114,7.689886,0.184479,62,209,152.0515,1520.515,0.41247,0.318413,0.342895,0.642795,0.512997,0.544763
Mode_train-Model_random_forest-TrainWw_960-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_7-UseDaysClose_True,0.237777,0.391447,0.030093,0.083712,0.098128,2.29472,1.476063,0.13527,7.333023,0.124839,119,185,150.821425,1508.21425,0.562292,0.495803,0.515272,0.586188,0.506881,0.530607
Mode_train-Model_gradient_boosting-TrainWw_5760-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_12-SL_10-RR_5-UseDaysClose_True,0.202693,0.337255,0.025243,0.080007,0.105959,2.811703,1.430807,0.133923,7.4122,0.169613,86,169,139.065808,1390.65808,0.530978,0.454104,0.429639,0.634387,0.522936,0.552196
Mode_train-Model_gradient_boosting-TrainWw_960-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_5-UseDaysClose_True,0.159107,0.223022,0.027519,0.079254,0.115653,5.066083,1.454153,0.131022,7.569416,0.23556,62,216,134.931,1349.31,0.510502,0.450062,0.453114,0.623343,0.479358,0.512016
Mode_train-Model_random_forest-TrainWw_480-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_24-SL_10-RR_7-UseDaysClose_True,0.127141,0.176991,0.022372,0.078276,0.119884,6.483507,1.394303,0.110519,8.985767,0.16997,40,186,130.9376,1309.376,0.77663,0.697965,0.718193,0.686799,0.550459,0.5918
Mode_train-Model_random_forest-TrainWw_1440-TrainPd_24-TradingStgy_ml_strategy-PeriodsFwTg_6-SL_10-RR_7-UseDaysClose_True,0.249785,0.389685,0.034548,0.077629,0.115746,2.186852,1.3963,0.116358,8.502071,0.150033,136,213,130.239087,1302.39087,0.412222,0.336135,0.358136,0.579391,0.474006,0.498593


In [11]:
# columns = [
#     'profits_in_EURUSD',
#     'profits_in_GBPUSD', 
#     'profits_in_USDJPY', 
#     'profits_in_USDCAD',
#     'profits_in_AUDUSD', 
#     'profits_in_USDCHF'
# ]

# positive_results[columns].describe()

In [12]:
filter_results

NameError: name 'filter_results' is not defined

In [None]:
# filter_results = results[results['wallet']>0]
# filter_results.shape

In [None]:
import time
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output

filter_results = positive_results
fig, ax = plt.subplots(figsize=(20, 10))
for path in filter_results.index:

    wallet = pd.read_csv(os.path.join('./backbone/data/backtest/experiments', path, 'wallet.csv'))

    # Datos de ejemplo
    x_wallet = pd.to_datetime(wallet['date'])
    y_wallet = wallet['wallet']  # Datos para la línea
    
    # Crear una figura y ejes
    
    # Agregar la línea
    ax.plot(x_wallet, y_wallet, label=f'{path}')
    
# Etiquetas de los ejes y leyenda
ax.set_xlabel('Fechas')
ax.set_ylabel('Precios')
ax.legend()
    
    # Mostrar el gráfico
    # print(path)
plt.show()
    # time.sleep(5)
    # clear_output(wait=True)

In [None]:
import re

In [None]:
filter_results = results[results['wallet']>0]


In [None]:
filter_results = filter_results.reset_index().rename(columns={'index':'experiment'})

In [None]:
filter_results['model'] = filter_results['experiment'].apply(lambda x: re.search(r'-Model_(.*?)-', x).group(1))
filter_results['window'] = filter_results['experiment'].apply(lambda x: re.search(r'-TrainWw_(.*?)-', x).group(1))
filter_results['train_period'] = filter_results['experiment'].apply(lambda x: re.search(r'-TrainPd_(.*?)-', x).group(1))
filter_results['strategy'] = filter_results['experiment'].apply(lambda x: re.search(r'-TradingStgy_(.*?)-', x).group(1))
filter_results['periods_forward_target'] = filter_results['experiment'].apply(lambda x: re.search(r'-PeriodsFwTg_(.*?)-', x).group(1))
filter_results['sl'] = filter_results['experiment'].apply(lambda x: re.search(r'-SL_(.*?)-', x).group(1))
filter_results['rr'] = filter_results['experiment'].apply(lambda x: re.search(r'-RR_(.*?)-', x).group(1))
# filter_results['use_days_to_close'] = filter_results['experiment'].apply(lambda x: x.split('UseDaysClose_')[1])

In [None]:
gb = filter_results.groupby(
    by=['strategy', 'model', 'window', 'rr', 'periods_forward_target']
).agg(
    {
        'max_drawdown':['mean', 'std']
    }
)


gb.sort_values(by=('max_drawdown', 'mean'), ascending=False).dropna().round(4)

In [None]:
gb = filter_results.groupby(
    by=['rr']
).agg(
    {
        'wallet':['mean', 'std']
    }
)


gb

In [None]:
filter_results[filter_results['rr']=='8'].shape

In [None]:
gb = filter_results.groupby(
    by=['window']
).agg(
    {
        'winning_rate':['mean', 'std']
    }
)


gb.sort_values(by=('winning_rate', 'mean'))

In [None]:
gb = filter_results.groupby(
    by=['periods_forward_target']
).agg(
    {
        'winning_rate':['mean', 'std']
    }
)


gb.sort_values(by=('winning_rate', 'mean'))

In [None]:
0/0

In [None]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('test_'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')


Mode_train-Model_random_forest-TrainWw_720-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_8-SL_15-RR_3-UseDaysClose_False

Mode_train-Model_random_forest-TrainWw_1440-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_8-SL_15-RR_3-UseDaysClose_False

Mode_train-Model_random_forest-TrainWw_1440-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_8-SL_15-RR_2-UseDaysClose_False

Mode_train-Model_random_forest-TrainWw_720-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_8-SL_15-RR_2-UseDaysClose_False

Mode_train-Model_random_forest-TrainWw_960-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_5-SL_15-RR_3-UseDaysClose_False







Mode_train-Model_gradient_boosting-TrainWw_1440-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_8-SL_15-RR_3-UseDaysClose_False

Mode_train-Model_random_forest-TrainWw_960-TrainPd_24-TradingStgy_macd_strategy-PeriodsFwTg_8-SL_15-RR_3-UseDaysClose_False