In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean
import re

In [2]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_proba'].notna()][f'{ticker}_proba']
        
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        auc_score = auc(fpr, tpr)
        
        auc_list.append(auc_score)

    return mean(auc_list)

In [3]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['EURUSD', 'GBPUSD', 'USDJPY']

In [4]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('mode_train'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

mode_train-model_gradient_boosting-trainwindow_114-trainperiod_1-tradingstrategy_strategies.bband_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_1-tradingstrategy_strategies.macd_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_1-tradingstrategy_strategies.ma_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_1-tradingstrategy_strategies.ml_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_14-tradingstrategy_strategies.bband_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_14-tradingstrategy_strategies.macd_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_14-tradingstrategy_strategies.ma_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_14-tradingstrategy_strategies.ml_strategy
mode_train-model_gradient_boosting-trainwindow_114-trainperiod_30-tradingstrategy_strategies.bband_strategy
mode_train-model_gradient_boosting-trainwindow

In [5]:
beta = 0.1

results['buy_ratio'] = results['buys'] / results['buys'].max()
results['operation_ratio_2'] = ((1 + beta**2) * results['operation_ratio'] * results['buy_ratio']) / (beta * results['operation_ratio'] + results['buy_ratio'])

results = results.sort_values(by=['operation_ratio_2'], ascending=[False])[
    [
        'operation_ratio_2',
        'operation_ratio', 
        'buy_ratio',
        'good_operations',
        'bad_operations',
        'avg_train_auc',
        'avg_test_auc',
        'wallet',
    ]
]

In [6]:
results = results.reset_index().rename(columns={'index':'config'})

# With / without model

In [7]:
results['window'] = results['config'].apply(lambda x: re.search(r"trainwindow_(.*?)-", x).group(1))
results['with_model'] = np.where(results['window']!='0', 1, 0)

pd.DataFrame(
    results.groupby('with_model')['operation_ratio_2'].mean()
).sort_values(by='operation_ratio_2', ascending=False)

Unnamed: 0_level_0,operation_ratio_2
with_model,Unnamed: 1_level_1
0,0.467023
1,0.20425


In [8]:
results[results['with_model']==0].sort_values(by='operation_ratio_2', ascending=False).head()

Unnamed: 0,config,operation_ratio_2,operation_ratio,buy_ratio,good_operations,bad_operations,avg_train_auc,avg_test_auc,wallet,window,with_model
3,mode_train-model_None-trainwindow_0-trainperio...,0.63155,0.667005,1.0,657,328,0.0,0,95.694334,0,0
46,mode_train-model_None-trainwindow_0-trainperio...,0.391626,0.428571,0.407071,171,228,0.0,0,130.672527,0,0
53,mode_train-model_None-trainwindow_0-trainperio...,0.377891,0.515625,0.136364,66,62,0.0,0,118.399752,0,0


In [9]:
results[results['with_model']==1].sort_values(by='operation_ratio_2', ascending=False).head(3)

Unnamed: 0,config,operation_ratio_2,operation_ratio,buy_ratio,good_operations,bad_operations,avg_train_auc,avg_test_auc,wallet,window,with_model
0,mode_train-model_gradient_boosting-trainwindow...,0.70336,0.859116,0.367677,311,51,0.99583,0,164.52684,38,1
1,mode_train-model_gradient_boosting-trainwindow...,0.665776,0.826087,0.326263,266,56,0.99625,0,143.678815,38,1
2,mode_train-model_gradient_boosting-trainwindow...,0.645991,0.860082,0.249495,209,34,0.99981,0,148.151279,461,1


# Window

In [10]:
results = results[results['with_model'] == 1]
pd.DataFrame(
    results.groupby('window')['operation_ratio_2'].mean()
).sort_values(by='operation_ratio_2', ascending=False)

Unnamed: 0_level_0,operation_ratio_2
window,Unnamed: 1_level_1
38,0.263682
114,0.255792
76,0.20177
461,0.170956
228,0.164837
689,0.163348


# trainperiod

In [11]:
results['trainperiod'] = results['config'].apply(lambda x: re.search(r'trainperiod_(.*?)-', x).group(1))
pd.DataFrame(
    results.groupby('trainperiod')['operation_ratio_2'].mean()
).sort_values(by='operation_ratio_2', ascending=False)

Unnamed: 0_level_0,operation_ratio_2
trainperiod,Unnamed: 1_level_1
1,0.220786
7,0.205062
14,0.201694
30,0.189456


# Model

In [12]:
results['model'] = results['config'].apply(lambda x: re.search(r'model_(.*?)-', x).group(1))
pd.DataFrame(
    results.groupby('model')['operation_ratio_2'].mean()
).sort_values(by='operation_ratio_2', ascending=False)

Unnamed: 0_level_0,operation_ratio_2
model,Unnamed: 1_level_1
gradient_boosting,0.25273
random_forest,0.218578
neural_network,0.193541
logistic_regression,0.147414


# Strategy

In [13]:
results['tradingstrategy'] = results['config'].apply(lambda x: re.search(r'tradingstrategy_(.*)', x).group(1))
pd.DataFrame(
    results.groupby('tradingstrategy')['operation_ratio_2'].mean()
).sort_values(by='operation_ratio_2', ascending=False)

Unnamed: 0_level_0,operation_ratio_2
tradingstrategy,Unnamed: 1_level_1
strategies.bband_strategy,0.349199
strategies.macd_strategy,0.201416
strategies.ma_strategy,0.162157
strategies.ml_strategy,0.108512
