In [27]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean 

In [28]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_proba'].notna()][f'{ticker}_proba']
        
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        auc_score = auc(fpr, tpr)
        
        auc_list.append(auc_score)

    return mean(auc_list)

In [29]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['YPF', 'BBAR', 'BMA', 'CEPU', 'EDN', 'TEO', 'LOMA']

In [31]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('train_'):
        print(path)
        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
        except:
            orders = pd.DataFrame()
            
        wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))
        except:
            pass

        results_dict[path] = {}
        try:
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0

        try:
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0


        try:
            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_auc'] = 0

        try:
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 
            
        except:
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0


results = pd.DataFrame.from_dict(results_dict, orient='index')

train_None_train_window_0_train_period_0_trading_strategy_strategies.bband_strategy
train_None_train_window_0_train_period_0_trading_strategy_strategies.macd_strategy
train_None_train_window_0_train_period_0_trading_strategy_strategies.ma_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ma_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy


In [37]:
results.sort_values(by=['operation_ratio'], ascending=[False])

Unnamed: 0,buys,sells,wallet,avg_auc,avg_incomes,good_operations,bad_operations,operation_ratio
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy,142,142,128.299121,0.76519,4.042732,131,11,0.922535
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy,14,7,104.107113,0.76519,0.58673,6,1,0.857143
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy,100,99,142.099468,0.76519,6.01421,73,26,0.737374
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ma_strategy,46,40,113.71308,0.76519,1.959011,27,13,0.675
train_None_train_window_0_train_period_0_trading_strategy_strategies.bband_strategy,990,985,95.694334,0.583195,-0.615095,657,328,0.667005
train_None_train_window_0_train_period_0_trading_strategy_strategies.ma_strategy,135,128,118.399752,0.583195,2.628536,66,62,0.515625
train_None_train_window_0_train_period_0_trading_strategy_strategies.macd_strategy,403,399,130.672527,0.583195,4.38179,171,228,0.428571


In [6]:
results.loc[
    [
        'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy',
        'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy',
        'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.bband_strategy',
        'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.macd_strategy',
        'train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy'
    ]
]

Unnamed: 0,buys,sells,wallet,avg_auc,avg_incomes,good_operations,bad_operations,operation_ratio
train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy,731,728,95.667083,0.761967,-0.618988,487,241,0.668956
train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy,288,287,113.124462,0.761967,1.874923,127,160,0.442509
train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.bband_strategy,462,459,109.780353,0.750839,1.397193,305,154,0.664488
train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.macd_strategy,188,187,126.969446,0.750839,3.852778,83,104,0.44385
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy,731,728,95.667083,0.679663,-0.618988,487,241,0.668956


In [25]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('test_'):
        print(path)
        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
        except:
            orders = pd.DataFrame()
            
        wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
        stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
        true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

        results_dict[path] = {}
        try:
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0

        try:
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0


        try:
            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_auc'] = 0

        try:
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 
            
        except:
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0


results_test = pd.DataFrame.from_dict(results_dict, orient='index')

test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy
test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy
test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy
test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy
test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_macd_strategy
test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_strategy
test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy
test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy
test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy
test_logistic_regression_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy
test_logi

In [26]:
results_test.loc[
    [
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy'
    ]
]

Unnamed: 0,buys,sells,wallet,avg_auc,avg_incomes,good_operations,bad_operations,operation_ratio
test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy,19,16,112.223917,0.703471,1.746274,14,2,0.875
test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy,10,8,122.732629,0.703471,3.247518,8,0,1.0
test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy,62,57,164.620972,0.727385,9.231567,49,8,0.859649
test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_macd_strategy,31,28,141.174221,0.727385,5.882032,23,5,0.821429
test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy,30,27,125.291723,0.665824,3.613103,24,3,0.888889
