In [6]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean 

In [2]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_proba'].notna()][f'{ticker}_proba']
        
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        auc_score = auc(fpr, tpr)
        
        auc_list.append(auc_score)

    return mean(auc_list)

In [3]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['YPF', 'BBAR', 'BMA', 'CEPU', 'EDN', 'TEO', 'LOMA']

In [7]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('train_'):
        orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
        wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
        stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
        true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

        results_dict[path] = {}
        results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
        results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]

        try:
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0


        avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
        results_dict[path]['avg_auc'] = avg_auc_score

        avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
        results_dict[path]['avg_incomes'] = avg_incomes


        results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
        results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]


results = pd.DataFrame.from_dict(results_dict, orient='index')

In [8]:
results.sort_values(by=['avg_incomes','wallet'], ascending=[False, False])

Unnamed: 0,buys,sells,wallet,avg_auc,avg_incomes,good_operations,bad_operations
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy,70,70,140.212217,0.767831,5.744602,61,9
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy,69,68,128.533188,0.767831,4.07617,63,5
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_rsi_strategy,20,19,115.732161,0.767831,2.622027,19,0
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy,14,7,104.107113,0.767831,0.58673,6,1


In [13]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('test_'):
        orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
        wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
        stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
        true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

        results_dict[path] = {}
        results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
        results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]

        try:
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0


        avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
        results_dict[path]['avg_auc'] = avg_auc_score

        avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
        results_dict[path]['avg_incomes'] = avg_incomes


        results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
        results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]


results = pd.DataFrame.from_dict(results_dict, orient='index')

In [14]:
results.sort_values(by=['avg_incomes','wallet'], ascending=[False, False])

Unnamed: 0,buys,sells,wallet,avg_auc,avg_incomes,good_operations,bad_operations
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy,36,32,139.177399,0.786438,5.596771,29,3
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy,18,16,129.362787,0.786438,4.194684,14,2
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_rsi_strategy,11,9,114.157683,0.786438,2.359614,9,0
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy,20,13,104.408988,0.786438,0.629855,12,1


In [None]:
results.loc['test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True']