In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean 

In [2]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_proba'].notna()][f'{ticker}_proba']
        
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        auc_score = auc(fpr, tpr)
        
        auc_list.append(auc_score)

    return mean(auc_list)

In [3]:
def max_drawdown(serie):
    max_valor_acumulado = serie[0]
    max_dd = 0

    for valor_actual in serie[1:]:
        if valor_actual > max_valor_acumulado:
            max_valor_acumulado = valor_actual
        else:
            dd = (max_valor_acumulado - valor_actual) / max_valor_acumulado
            if dd > max_dd:
                max_dd = dd

    return max_dd

In [4]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['EURUSD', 'GBPUSD', 'USDJPY']

In [5]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('mode_train'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
            results_dict[path]['max_drawdown'] = max_drawdown(wallet['wallet'])
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] >= 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] < 0].shape[0]

            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

            results_dict[path]['sharpe_ratio'] = (orders['profit'].mean() - 0.04) / orders['profit'].std()
        
            results_dict[path]['winning_rate'] = orders[orders['profit'] >= 0]['profit'].mean() / (-1 * orders[orders['profit'] < 0]['profit'].mean())
            
            results_dict[path]['recover_factor'] = orders[orders['profit'] >= 0]['profit'].sum() / (-1 * orders[orders['profit'] < 0]['profit'].sum())
        
        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_1
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_10
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_5
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.macd_strategy-stop_loss_in_pips_15-periods_forward_target_1
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.macd_strategy-stop_loss_in_pips_15-periods_forward_target_10
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.macd_strategy-stop_loss_in_pips_15-periods_forward_target_5
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_14-tradingstrategy_strategi

  results_dict[path]['recover_factor'] = orders[orders['profit'] >= 0]['profit'].sum() / (-1 * orders[orders['profit'] < 0]['profit'].sum())


In [6]:
beta = 0.1

results['buy_ratio'] = results['buys'] / 30 
results['operation_ratio_2'] = ((1 + beta**2) * results['operation_ratio'] * results['buy_ratio']) / (beta * results['operation_ratio'] + results['buy_ratio'])

In [7]:
results.sort_values(by=['operation_ratio_2'], ascending=[False])[
    [
        'operation_ratio_2',
        'operation_ratio', 
        'buy_ratio',
        'winning_rate',
        'recover_factor',
        'sharpe_ratio',
        'max_drawdown',
        'good_operations',
        'bad_operations',
        'avg_train_auc',
        'avg_test_auc',
        'wallet',
    ]
]

Unnamed: 0,operation_ratio_2,operation_ratio,buy_ratio,winning_rate,recover_factor,sharpe_ratio,max_drawdown,good_operations,bad_operations,avg_train_auc,avg_test_auc,wallet
mode_train-model_random_forest-trainwindow_76-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_5,0.950428,0.964912,3.800000,1.584610,43.576763,0.537206,0.010895,110,4,0.989325,0.746934,240.872245
mode_train-model_random_forest-trainwindow_38-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_5,0.938604,0.953704,3.633333,1.154504,23.782774,0.595406,0.012966,103,5,1.000000,0.762893,197.739240
mode_train-model_random_forest-trainwindow_38-trainperiod_7-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_10,0.872910,0.887755,3.266667,0.383601,3.033936,0.241660,0.101683,87,11,1.000000,0.672588,151.656940
mode_train-model_random_forest-trainwindow_38-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_10,0.821886,0.827586,4.866667,0.827564,3.972308,0.358332,0.027369,120,25,0.999605,0.832566,175.209815
mode_train-model_gradient_boosting-trainwindow_38-trainperiod_1-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_5,0.815573,0.817708,6.466667,0.592523,2.657887,0.255337,0.147637,157,35,0.916807,0.671539,214.062785
...,...,...,...,...,...,...,...,...,...,...,...,...
mode_train-model_gradient_boosting-trainwindow_76-trainperiod_14-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_1,,0.000000,0.000000,,,,,0,0,0.716881,0.498490,0.000000
mode_train-model_gradient_boosting-trainwindow_76-trainperiod_14-tradingstrategy_strategies.macd_strategy-stop_loss_in_pips_15-periods_forward_target_1,,0.000000,0.000000,,,,,0,0,0.716881,0.498490,0.000000
mode_train-model_gradient_boosting-trainwindow_76-trainperiod_7-tradingstrategy_strategies.bband_strategy-stop_loss_in_pips_15-periods_forward_target_1,,0.000000,0.000000,,,,,0,0,0.713123,0.492644,0.000000
mode_train-model_gradient_boosting-trainwindow_76-trainperiod_7-tradingstrategy_strategies.macd_strategy-stop_loss_in_pips_15-periods_forward_target_1,,0.000000,0.000000,,,,,0,0,0.713123,0.492644,0.000000


In [8]:
0/0

ZeroDivisionError: division by zero

In [None]:
# results.loc[
#     [
#         'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy',
#         'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy',
#         'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.bband_strategy',
#         'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.macd_strategy',
#         'train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy'
#     ]
# ]

In [None]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('test_'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

In [None]:
results

In [None]:
results_test.loc[
    [
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy'
    ]
]