In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, auc, roc_curve
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean 

In [2]:
def calculate_auc_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    auc_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_proba'].notna()][f'{ticker}_proba']
        
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        auc_score = auc(fpr, tpr)
        
        auc_list.append(auc_score)

    return mean(auc_list)

In [3]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['YPF', 'BBAR', 'BMA', 'CEPU', 'EDN', 'TEO', 'LOMA']

In [11]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('train_'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

train_None_train_window_0_train_period_0_trading_strategy_strategies.bband_strategy
train_None_train_window_0_train_period_0_trading_strategy_strategies.macd_strategy
train_None_train_window_0_train_period_0_trading_strategy_strategies.ma_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ma_strategy
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy


In [12]:
results.sort_values(by=['operation_ratio'], ascending=[False])

Unnamed: 0,wallet,buys,sells,avg_incomes,good_operations,bad_operations,operation_ratio,avg_test_auc,avg_train_auc
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy,115.068151,77,77,2.152593,72,5,0.935065,0.721908,0.899962
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy,103.335692,14,7,0.476527,5,2,0.714286,0.721908,0.899962
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy,122.160818,67,66,3.165831,47,19,0.712121,0.721908,0.899962
train_None_train_window_0_train_period_0_trading_strategy_strategies.bband_strategy,95.694334,990,985,-0.615095,657,328,0.667005,0.0,0.0
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ma_strategy,109.282468,45,39,1.326067,26,13,0.666667,0.721908,0.899962
train_None_train_window_0_train_period_0_trading_strategy_strategies.ma_strategy,118.399752,135,128,2.628536,66,62,0.515625,0.0,0.0
train_None_train_window_0_train_period_0_trading_strategy_strategies.macd_strategy,130.672527,403,399,4.38179,171,228,0.428571,0.0,0.0


In [6]:
# results.loc[
#     [
#         'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy',
#         'train_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.macd_strategy',
#         'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.bband_strategy',
#         'train_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.macd_strategy',
#         'train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy'
#     ]
# ]

In [13]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv') and path.startswith('test_'):
        print(path)
        results_dict[path] = {}

        try:
            wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0

        try:
            orders = pd.read_csv(os.path.join('./data', path, 'orders.csv'))
            results_dict[path]['buys'] = orders[orders['open_date'].notna()].shape[0]
            results_dict[path]['sells'] = orders[orders['close_date'].notna()].shape[0]
           
            avg_incomes = orders.groupby('ticker')['profit'].sum().mean()
            results_dict[path]['avg_incomes'] = avg_incomes
            results_dict[path]['good_operations'] = orders[orders['profit'] > 0].shape[0]
            results_dict[path]['bad_operations'] = orders[orders['profit'] <= 0].shape[0]
            results_dict[path]['operation_ratio'] = results_dict[path]['good_operations'] / (results_dict[path]['good_operations'] + results_dict[path]['bad_operations']) 

        except:
            results_dict[path]['buys'] = 0
            results_dict[path]['sells'] = 0
            avg_incomes = 0
            results_dict[path]['avg_incomes'] = 0
            results_dict[path]['good_operations'] = 0
            results_dict[path]['bad_operations'] = 0
            results_dict[path]['operation_ratio'] = 0

        try:
            train_results = pd.read_csv(os.path.join('./data', path, 'train_results.csv'))
            avg_train_auc = train_results['auc'].mean()
            results_dict[path]['avg_train_auc'] = avg_train_auc
        except:
            results_dict[path]['avg_train_auc'] = 0
            
        try:
            stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
            true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

            avg_auc_score = calculate_auc_avg(tickers, stock_predictions, true_values)
            results_dict[path]['avg_test_auc'] = avg_auc_score
        except:
            results_dict[path]['avg_test_auc'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

test_None_train_window_0_train_period_0_trading_strategy_strategies.bband_strategy
test_None_train_window_0_train_period_0_trading_strategy_strategies.ma_strategy
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ma_strategy


In [14]:
results

Unnamed: 0,wallet,buys,sells,avg_incomes,good_operations,bad_operations,operation_ratio,avg_test_auc,avg_train_auc
test_None_train_window_0_train_period_0_trading_strategy_strategies.bband_strategy,101.992645,231,226,0.284664,151,75,0.668142,0.0,0.0
test_None_train_window_0_train_period_0_trading_strategy_strategies.ma_strategy,115.21188,51,44,2.173126,24,20,0.545455,0.0,0.0
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.bband_strategy,118.385549,56,54,2.626507,47,7,0.87037,0.800286,1.0
test_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ma_strategy,108.34915,28,22,1.192736,17,5,0.772727,0.800286,1.0


In [9]:
results_test.loc[
    [
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_38_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_bband_strategy',
        'test_gradient_boosting_train_window_461_train_period_1_trading_strategy_strategies.ml_macd_strategy',
        'test_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_bband_strategy'
    ]
]

NameError: name 'results_test' is not defined