In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc
from statistics import mean 

In [2]:
def calculate_accuracy_avg(tickers, predictions, true_values):
    predictions = predictions.sort_values(by='fecha')
    true_values = true_values.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in predictions.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_proba'
    
    predictions = predictions.rename(columns=column_names)
    
    column_names = {}
    for column in true_values.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_true'
    
    true_values = true_values.rename(columns=column_names)
    
    # calculo la prediccion en base al threshold
    threshold_down = 0.5
    threshold_up = 0.5
    
    for ticker in tickers:
        predictions[f'{ticker}_pred'] = np.where(predictions[f'{ticker}_proba']<threshold_down, 0, predictions[f'{ticker}_proba'])
        predictions[f'{ticker}_pred'] = np.where(predictions[f'{ticker}_proba']>=threshold_up, 1, predictions[f'{ticker}_pred'])
    
    # calculo el accuracy final
    performance = pd.concat(
        [
            predictions,
            true_values
        ], axis=1, join="inner"
    )
    
    
    accuracy_list = []
    for ticker in tickers:
        y_true = performance[performance[f'{ticker}_true'].notna()][f'{ticker}_true']
        y_pred = performance[performance[f'{ticker}_pred'].notna()][f'{ticker}_pred']
        
        accuracy = accuracy_score(
            y_true, 
            y_pred
        )
        accuracy_list.append(accuracy)

    return mean(accuracy_list)

In [3]:
def calculate_avg_incomes(tickers, buys, sells):
    buys = buys.sort_values(by='fecha')
    sells = sells.sort_values(by='fecha')
    
    
    # renombro las columnas
    column_names = {}
    for column in buys.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_buy'
    
    buys = buys.rename(columns=column_names)
    
    column_names = {}
    for column in sells.columns:
        if column != 'fecha':
            column_names[column] = f'{column}_sell'
    
    sells = sells.rename(columns=column_names)
    
    operations = pd.concat(
        [
            buys,
            sells
        ], axis=1, join="inner"
    )
    
    sum_buy = 0
    sum_sell = 0
    profit_list = []
    
    for ticker in tickers:
        sum_buy += operations[f'{ticker}_buy'].sum()
        sum_sell += operations[f'{ticker}_sell'].sum()
    
        final_profit = sum_sell - sum_buy
        profit_list.append(final_profit)
        
    return mean(profit_list)

In [4]:
with open('configs/project_config.yml', 'r') as archivo:
    config = yaml.safe_load(archivo)

tickers = config["tickers"] 
tickers

['YPF', 'BBAR', 'BMA', 'CEPU', 'EDN', 'TEO', 'LOMA']

In [5]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv'):
        buys = pd.read_csv(os.path.join('./data', path, 'buys.csv'))
        sells = pd.read_csv(os.path.join('./data', path, 'sells.csv'))
        wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
        stock_predictions = pd.read_csv(os.path.join('./data', path, 'stock_predictions.csv'))
        true_values = pd.read_csv(os.path.join('./data', path, 'stock_true_values.csv'))

        results_dict[path] = {}
        results_dict[path]['buys'] = buys.drop(columns=['fecha']).notna().sum().sum()
        results_dict[path]['sells'] = sells.drop(columns=['fecha']).notna().sum().sum()

        try:
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0


        accurac = calculate_accuracy_avg(tickers, stock_predictions, true_values)
        results_dict[path]['avg_accuracy_th_0505'] = accurac

        avg_incomes = calculate_avg_incomes(tickers, buys, sells)
        results_dict[path]['avg_incomes'] = avg_incomes
        
        # try:
        #     results_dict[path]['auc_wallet'] = auc(wallet.index, wallet['wallet'])
        # except:
        #     results_dict[path]['auc_wallet'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')
results

Unnamed: 0,buys,sells,wallet,avg_accuracy_th_0505,avg_incomes
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,84,84,121.880777,0.629203,13.223591
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,132,132,132.365953,0.629203,20.37353
train_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,86,86,123.071868,0.559357,16.49716
train_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,143,143,126.63627,0.559357,18.801853
train_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,42,42,111.282157,0.554457,5.857611
train_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,67,65,115.182329,0.554457,5.747342
train_logistic_regression_train_window_76_train_period_7_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,44,44,106.444371,0.499594,4.285716
train_logistic_regression_train_window_76_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,80,79,113.073895,0.499594,9.724274
train_random_forest_train_window_114_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,93,93,136.030319,0.68612,17.406905
train_random_forest_train_window_38_train_period_14_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,39,39,105.401406,0.551542,2.12414


In [6]:
results.sort_values(by=['avg_incomes','wallet'], ascending=[False, False])


Unnamed: 0,buys,sells,wallet,avg_accuracy_th_0505,avg_incomes
train_random_forest_train_window_76_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,144,143,155.582507,0.709299,27.17879
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,111,111,156.406147,0.705135,27.111734
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,132,132,132.365953,0.629203,20.37353
train_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,143,143,126.63627,0.559357,18.801853
train_random_forest_train_window_38_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,70,70,138.844331,0.705135,18.74776
train_random_forest_train_window_114_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,93,93,136.030319,0.68612,17.406905
train_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,86,86,123.071868,0.559357,16.49716
train_random_forest_train_window_76_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,74,73,134.28829,0.709299,14.324715
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,84,84,121.880777,0.629203,13.223591
train_random_forest_train_window_38_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,98,98,122.381661,0.587262,11.262947


In [7]:
results.tail(50)

Unnamed: 0,buys,sells,wallet,avg_accuracy_th_0505,avg_incomes
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,84,84,121.880777,0.629203,13.223591
train_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,132,132,132.365953,0.629203,20.37353
train_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,86,86,123.071868,0.559357,16.49716
train_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,143,143,126.63627,0.559357,18.801853
train_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,42,42,111.282157,0.554457,5.857611
train_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,67,65,115.182329,0.554457,5.747342
train_logistic_regression_train_window_76_train_period_7_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,44,44,106.444371,0.499594,4.285716
train_logistic_regression_train_window_76_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,80,79,113.073895,0.499594,9.724274
train_random_forest_train_window_114_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,93,93,136.030319,0.68612,17.406905
train_random_forest_train_window_38_train_period_14_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,39,39,105.401406,0.551542,2.12414


In [8]:
df_ = pd.read_csv('./data/df_features.csv')
df_

FileNotFoundError: [Errno 2] No such file or directory: './data/df_features.csv'

In [None]:
df_[df_['macd_flag']!=0].sample(5)

In [None]:
df_.iloc[600:604]