In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean 

In [2]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_proba'].notna()][f'{ticker}_proba']
        
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        auc_score = auc(fpr, tpr)
        
        auc_list.append(auc_score)

    return mean(auc_list)

In [3]:
def max_drawdown(serie):
    max_valor_acumulado = serie[0]
    max_dd = 0

    for valor_actual in serie[1:]:
        if valor_actual > max_valor_acumulado:
            max_valor_acumulado = valor_actual
        else:
            dd = (max_valor_acumulado - valor_actual) / max_valor_acumulado
            if dd > max_dd:
                max_dd = dd

    return max_dd

In [4]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['EURUSD', 'GBPUSD', 'USDJPY', 'USDCAD', 'AUDUSD', 'USDCHF']

In [5]:
results_dict = {}

for path in os.listdir('./backbone/data/experiments'):
    if not path.endswith('.csv') and path.startswith('Mode_train'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./backbone/data/experiments', path, 'wallet.csv'))

            final_wallet_value = wallet.tail(1).iloc[0]['wallet']
            initial_wallet_value = 100
            results_dict[path]['wallet'] = ((final_wallet_value - initial_wallet_value) / initial_wallet_value) * 100 
            results_dict[path]['max_drawdown'] = max_drawdown(wallet['wallet'])
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./backbone/data/experiments', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] >= 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] < 0].shape[0]

            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

            results_dict[path]['sharpe_ratio'] = (orders['profit'].mean() - 0.04) / orders['profit'].std()
        
            results_dict[path]['winning_rate'] = orders[orders['profit'] >= 0]['profit'].mean() / (-1 * orders[orders['profit'] < 0]['profit'].mean())
            
            results_dict[path]['recover_factor'] = orders[orders['profit'] >= 0]['profit'].sum() / (-1 * orders[orders['profit'] < 0]['profit'].sum())
        
        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./backbone/data/experiments', path, 'trainres.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./backbone/data/experiments', path, 'preds.csv'))
            true_values = pd.read_csv(os.path.join('./backbone/data/experiments', path, 'truevals.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_1-SL_15-TP_30-UseDaysInClosePos_False
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_2-SL_15-TP_30-UseDaysInClosePos_False
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_4-SL_15-TP_30-UseDaysInClosePos_False
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_8-SL_15-TP_30-UseDaysInClosePos_False
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_macd_strategy-PeriodsForwardTarget_1-SL_15-TP_30-UseDaysInClosePos_False
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_macd_strategy-PeriodsForwardTarget_2-SL_15-TP_30-UseDaysInClosePos_False
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrateg

In [6]:
beta = 0.05

results['buy_ratio'] = results['buys'] / 30 
results['operation_ratio_2'] = ((1 + beta**2) * results['operation_ratio'] * results['buy_ratio']) / (beta * results['operation_ratio'] + results['buy_ratio'])

In [7]:
results.sort_values(by=['wallet'], ascending=[False])[
    [
        'operation_ratio_2',
        'operation_ratio', 
        'buy_ratio',
        'winning_rate',
        'recover_factor',
        'sharpe_ratio',
        'max_drawdown',
        'good_operations',
        'bad_operations',
        'avg_train_auc',
        'avg_test_auc',
        'wallet',
    ]
].head(15).sort_values(by=['operation_ratio'], ascending=[False])

Unnamed: 0,operation_ratio_2,operation_ratio,buy_ratio,winning_rate,recover_factor,sharpe_ratio,max_drawdown,good_operations,bad_operations,avg_train_auc,avg_test_auc,wallet
Mode_train-Model_random_forest-TrainWindow_48-TrainPeriod_1-TradingStrategy_macd_strategy-PeriodsForwardTarget_2-SL_15-TP_45-UseDaysInClosePos_False,0.475763,0.5,0.466667,2.897787,2.897787,0.42815,0.028418,7,7,0.900266,0.52331,22.27287
Mode_train-Model_random_forest-TrainWindow_72-TrainPeriod_1-TradingStrategy_macd_strategy-PeriodsForwardTarget_4-SL_15-TP_30-UseDaysInClosePos_False,0.482551,0.484848,3.333333,1.525298,1.435575,0.154623,0.138231,48,51,0.994612,0.52556,42.34412
Mode_train-Model_random_forest-TrainWindow_72-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_2-SL_15-TP_30-UseDaysInClosePos_False,0.445682,0.445455,11.2,1.589593,1.276886,0.098671,0.227326,147,183,0.846902,0.510132,196.60399
Mode_train-Model_gradient_boosting-TrainWindow_48-TrainPeriod_1-TradingStrategy_macd_strategy-PeriodsForwardTarget_2-SL_15-TP_30-UseDaysInClosePos_False,0.441918,0.444444,2.7,1.655697,1.324558,0.113769,0.071947,36,45,0.895848,0.527495,25.95434
Mode_train-Model_gradient_boosting-TrainWindow_48-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_2-SL_15-TP_30-UseDaysInClosePos_False,0.426644,0.426724,7.933333,1.564523,1.164569,0.056198,0.342859,99,133,0.895848,0.527495,59.33657
Mode_train-Model_gradient_boosting-TrainWindow_48-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_4-SL_15-TP_30-UseDaysInClosePos_False,0.41226,0.412245,8.366667,1.575741,1.105207,0.033276,0.35163,101,144,0.900256,0.525664,38.257
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_4-SL_15-TP_30-UseDaysInClosePos_False,0.412059,0.411765,11.533333,1.523449,1.066414,0.017412,0.439419,140,200,0.947379,0.52203,37.29035
Mode_train-Model_gradient_boosting-TrainWindow_72-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_8-SL_15-TP_30-UseDaysInClosePos_False,0.408362,0.408,12.666667,1.669326,1.150482,0.049599,0.305369,153,222,0.976463,0.543608,82.84681
Mode_train-Model_gradient_boosting-TrainWindow_24-TrainPeriod_1-TradingStrategy_bband_strategy-PeriodsForwardTarget_2-SL_15-TP_30-UseDaysInClosePos_False,0.406999,0.406728,11.1,1.529175,1.048352,0.008849,0.453643,133,194,0.944477,0.527083,22.42946
Mode_train-Model_random_forest-TrainWindow_72-TrainPeriod_1-TradingStrategy_macd_strategy-PeriodsForwardTarget_8-SL_15-TP_30-UseDaysInClosePos_False,0.40599,0.40625,6.466667,1.678874,1.148703,0.048193,0.184605,78,114,0.984165,0.536403,31.23499


In [8]:
0/0

ZeroDivisionError: division by zero

In [None]:
# results.loc[
#     [
#         'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy',
#         'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy',
#         'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.bband_strategy',
#         'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.macd_strategy',
#         'train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy'
#     ]
# ]

In [None]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('test_'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

In [None]:
results

In [None]:
results_test.loc[
    [
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy'
    ]
]