In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
import itertools
import yaml
import os
import numpy as np
from sklearn.metrics import auc

In [2]:
results_dict = {}

for path in os.listdir('./data'):
    if not path.endswith('.csv'):
        buys = pd.read_csv(os.path.join('./data', path, 'buys.csv'))
        sells = pd.read_csv(os.path.join('./data', path, 'sells.csv'))
        wallet = pd.read_csv(os.path.join('./data', path, 'wallet.csv'))
        performance = pd.read_csv(os.path.join('./data', path, 'ml_results.csv'))

        results_dict[path] = {}
        results_dict[path]['buys'] = buys.shape[0]
        results_dict[path]['sells'] = sells.shape[0]

        try:
            results_dict[path]['wallet'] = wallet.tail(1).iloc[0]['wallet']
        except:
            results_dict[path]['wallet'] = 0


        threshold_down = 0.5
        threshold_up = 0.5
        
        df_predictions = performance[(performance['y_pred']<threshold_down) | (performance['y_pred']>threshold_up)].copy()
        df_predictions['y_pred'] = np.where(df_predictions['y_pred']<threshold_down, 0, df_predictions['y_pred'])
        df_predictions['y_pred'] = np.where(df_predictions['y_pred']>threshold_up, 1, df_predictions['y_pred'])
        results_dict[path]['accuracy_th_0505'] = np.round(accuracy_score(df_predictions.y_true, df_predictions.y_pred), 4)

        threshold_down = 0.85
        threshold_up = 0.15
        df_predictions = performance[(performance['y_pred']<threshold_down) | (performance['y_pred']>threshold_up)].copy()
        df_predictions['y_pred'] = np.where(df_predictions['y_pred']<threshold_down, 0, df_predictions['y_pred'])
        df_predictions['y_pred'] = np.where(df_predictions['y_pred']>threshold_up, 1, df_predictions['y_pred'])
        results_dict[path]['accuracy_th_8515'] = np.round(accuracy_score(df_predictions.y_true, df_predictions.y_pred), 4)

        
        try:
            results_dict[path]['auc_wallet'] = auc(wallet.index, wallet['wallet'])
        except:
            results_dict[path]['auc_wallet'] = 0

results = pd.DataFrame.from_dict(results_dict, orient='index')

In [3]:
results.sort_values(by=['wallet', 'auc_wallet'], ascending=[False, False])

Unnamed: 0,buys,sells,wallet,accuracy_th_0505,accuracy_th_8515,auc_wallet
YPF_random_forest_train_window_76_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,40,40,126.920001,0.7612,0.6368,4374.175039
YPF_random_forest_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,48,48,125.599996,0.7824,0.6550,5166.779962
YPF_gradient_boosting_train_window_76_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,46,46,125.340001,0.7068,0.6510,5007.345011
YPF_logistic_regression_train_window_38_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,47,46,124.529999,0.7476,0.6731,4932.155020
YPF_random_forest_train_window_114_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,34,34,122.549998,0.7309,0.6070,3622.994980
...,...,...,...,...,...,...
YPF_gradient_boosting_train_window_76_train_period_30_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,25,25,96.470002,0.4847,0.5011,2305.580002
YPF_logistic_regression_train_window_114_train_period_30_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,16,16,95.930001,0.5034,0.5124,1431.765004
YPF_logistic_regression_train_window_76_train_period_30_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,17,17,95.480000,0.4869,0.4737,1524.244998
YPF_logistic_regression_train_window_114_train_period_30_trading_strategy_strategies.sma_ml_strategy_only_one_tunning_True,18,18,95.450000,0.5034,0.5124,1612.865004


In [4]:
results.tail(50)

Unnamed: 0,buys,sells,wallet,accuracy_th_0505,accuracy_th_8515,auc_wallet
YPF_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.ml_strategy_only_one_tunning_True,44,43,113.009999,0.6102,0.5847,4326.955005
YPF_logistic_regression_train_window_38_train_period_7_trading_strategy_strategies.sma_ml_strategy_only_one_tunning_True,26,26,110.48,0.6102,0.5847,2607.970001
YPF_logistic_regression_train_window_76_train_period_14_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,16,15,100.38,0.5263,0.5197,1392.49999
YPF_logistic_regression_train_window_76_train_period_14_trading_strategy_strategies.ml_strategy_only_one_tunning_True,40,39,99.940001,0.5263,0.5197,3733.580023
YPF_logistic_regression_train_window_76_train_period_14_trading_strategy_strategies.sma_ml_strategy_only_one_tunning_True,20,20,98.559999,0.5263,0.5197,1881.569982
YPF_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,18,17,110.620001,0.6674,0.6127,1690.655003
YPF_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.ml_strategy_only_one_tunning_True,43,42,117.630001,0.6674,0.6127,4401.410015
YPF_logistic_regression_train_window_76_train_period_1_trading_strategy_strategies.sma_ml_strategy_only_one_tunning_True,20,20,110.32,0.6674,0.6127,2004.37499
YPF_logistic_regression_train_window_76_train_period_30_trading_strategy_strategies.macd_ml_strategy_only_one_tunning_True,17,17,95.48,0.4869,0.4737,1524.244998
YPF_logistic_regression_train_window_76_train_period_30_trading_strategy_strategies.ml_strategy_only_one_tunning_True,36,35,100.740001,0.4869,0.4737,3284.45503


In [5]:
df_ = pd.read_csv('./data/df_features.csv')
df_

Unnamed: 0,Date,Close,Volume,target,ema_12,ema_26,ema_50,ema_200,rsi,upper_bband,...,macdhist,macdhist_yesterday,macd_flag,change_percent_1_day,change_percent_2_day,change_percent_3_day,change_percent_4_day,change_percent_5_day,change_percent_6_day,change_percent_7_day
0,2019-09-23,8.99,993700,1,9.019446,9.818937,11.408880,14.290672,38.041546,9.386512,...,0.288576,0.299758,0,-2.0,-3.0,2.0,3.0,0.0,4.0,3.0
1,2019-09-24,8.92,1054700,1,9.004147,9.752349,11.311277,14.237232,37.349364,9.345148,...,0.271891,0.288576,0,-1.0,-3.0,-3.0,1.0,2.0,-1.0,3.0
2,2019-09-25,9.08,1756500,1,9.015817,9.702545,11.223776,14.185917,40.035123,9.310130,...,0.266692,0.271891,0,2.0,1.0,-1.0,-2.0,3.0,4.0,1.0
3,2019-09-26,9.10,1268300,1,9.028768,9.657912,11.140491,14.135311,40.379191,9.234489,...,0.259421,0.266692,0,0.0,2.0,1.0,-1.0,-1.0,3.0,4.0
4,2019-09-27,9.46,2293900,0,9.095111,9.643252,11.074589,14.088790,46.346672,9.483095,...,0.272339,0.259421,0,4.0,4.0,6.0,5.0,3.0,2.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
963,2023-10-30,10.45,2652500,0,11.641105,12.115328,12.604269,11.963632,30.323897,11.696616,...,-0.168835,-0.127706,0,-6.0,-8.0,-8.0,-8.0,-10.0,-18.0,-21.0
964,2023-10-31,9.93,3516900,1,11.377858,11.953452,12.499396,11.943397,27.051939,11.859275,...,-0.216165,-0.168835,0,-5.0,-11.0,-14.0,-13.0,-13.0,-16.0,-24.0
965,2023-11-01,10.35,2634500,0,11.219726,11.834678,12.415106,11.927542,33.310985,11.600517,...,-0.204418,-0.216165,0,4.0,-1.0,-7.0,-9.0,-9.0,-9.0,-11.0
966,2023-11-02,10.70,1685900,0,11.139768,11.750627,12.347847,11.915328,38.078973,11.232279,...,-0.160260,-0.204418,0,3.0,7.0,2.0,-3.0,-6.0,-5.0,-5.0


In [6]:
df_[df_['macd_flag']!=0].sample(5)

Unnamed: 0,Date,Close,Volume,target,ema_12,ema_26,ema_50,ema_200,rsi,upper_bband,...,macdhist,macdhist_yesterday,macd_flag,change_percent_1_day,change_percent_2_day,change_percent_3_day,change_percent_4_day,change_percent_5_day,change_percent_6_day,change_percent_7_day
556,2022-02-08,4.05,1564000,1,4.124955,4.086858,4.088541,4.378344,48.584894,4.325727,...,-0.002572,0.007464,-1,-2.0,-1.0,-3.0,-6.0,-8.0,-7.0,-5.0
779,2023-01-30,11.15,3362900,1,11.311228,10.472626,9.506539,6.940523,58.947363,12.478045,...,-0.02395,0.053567,-1,-3.0,-8.0,-8.0,-8.0,-7.0,-7.0,-1.0
544,2022-01-21,3.83,2925500,1,4.043368,4.025099,4.069251,4.412295,43.270824,4.442562,...,-0.012438,0.007639,-1,-4.0,-7.0,-9.0,-14.0,-10.0,-8.0,-7.0
494,2021-11-09,4.56,2839100,0,4.374514,4.484175,4.587117,4.701188,52.146125,4.602881,...,0.008788,-0.016674,1,5.0,8.0,10.0,6.0,9.0,5.0,8.0
923,2023-08-31,14.44,1721600,0,14.639557,14.590743,14.301717,11.759738,47.539046,15.039638,...,-0.001363,0.02252,-1,-1.0,-3.0,-3.0,-2.0,-2.0,-4.0,-1.0


In [7]:
df_.iloc[600:604]

Unnamed: 0,Date,Close,Volume,target,ema_12,ema_26,ema_50,ema_200,rsi,upper_bband,...,macdhist,macdhist_yesterday,macd_flag,change_percent_1_day,change_percent_2_day,change_percent_3_day,change_percent_4_day,change_percent_5_day,change_percent_6_day,change_percent_7_day
600,2022-04-12,5.07,1489000,0,5.001315,4.818829,4.618428,4.463457,59.309425,5.170365,...,0.005272,0.014196,0,1.0,-1.0,-1.0,0.0,-2.0,-5.0,0.0
601,2022-04-13,5.14,1312500,0,5.022651,4.842619,4.638882,4.470189,61.236384,5.190904,...,0.002254,0.005272,0,1.0,2.0,0.0,0.0,1.0,-1.0,-3.0
602,2022-04-14,5.18,1467400,0,5.046859,4.867611,4.660102,4.477251,62.334063,5.220369,...,0.001177,0.002254,0,1.0,2.0,3.0,1.0,1.0,2.0,-0.0
603,2022-04-18,5.23,1810600,0,5.075034,4.894454,4.682451,4.484742,63.717149,5.274775,...,0.002007,0.001177,0,1.0,2.0,3.0,4.0,2.0,2.0,3.0
