This notebook will test different clf algoritms

In [5]:
from trading import Asset
from trading.func_brokers import get_assets
import pandas as pd
from datetime import date
import numpy as np
from trading.features import pct
from functions import *
np.random.seed(1)

# Pre trained models

In [3]:
import pickle

# Random Forest Classifier

### 40 Assets

In [4]:
with open( f"Models/RFC_40Assets_BruteFroce", 'rb') as fp:
    RFC_MODEL = pickle.load(fp)

In [6]:
asset = new( "ETH" )

In [9]:
asset = features( asset, clf = True )

df = asset.df.drop(columns = ["target"]).replace( [ np.nan, np.inf, -np.inf ], 0 )

p = RFC_MODEL.predict( df )

In [16]:
df["predict"] = p

In [13]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score


In [17]:
precision_recall_fscore_support( asset.df["target"], df["predict"] )

(array([0.75      , 0.64634146]),
 array([0.65060241, 0.74647887]),
 array([0.69677419, 0.69281046]),
 array([83, 71], dtype=int64))

In [18]:
accuracy_score( asset.df["target"], df["predict"] )

0.6948051948051948

# Somethiing

In [25]:
random_assets = np.random.choice( list(get_assets()["binance"].keys()), 15)

In [26]:
df = pd.DataFrame()
for a in random_assets:
    a = Asset(
        symbol=a,
        fiat = "usdt",
        broker = "binance",
        start = date(2022,6,1),
        end = date(2022,10,1),
        frequency = "1d",
        from_ = "ext_api"
    )

    break

    ori_cols = a.df.columns

    df_aux = pct( a.df, cols = ["close", "volume"], lags = 12 , shift=True)

    df_aux["target"] = df_aux["close"].pct_change(1).shift(-1).apply(lambda x: 1 if x > 0 else 0)

    df_aux.drop(columns = ori_cols, inplace = True)

    df_aux.dropna(axis=0, inplace = True)

    df_aux = df_aux.round(3)

    df = pd.concat([ df, df_aux ], axis =0) 

In [None]:
df["target"].value_counts()

In [None]:
x = df.columns.to_list()
y = 'target'
x.remove(y)

Almost balance dataset

In [None]:
train_size = int( len(df)*0.8 )
df_train = df.iloc[ :train_size ]
df_test = df.iloc[train_size:]

In [None]:
len(df_train), len(df_test), len(df)

# H20

In [None]:
import h2o
from h2o.automl import H2OAutoML
h2o.init()

In [None]:
h2o_frame = h2o.H2OFrame(df_train)
x = h2o_frame.columns
y = 'target'
x.remove(y)

In [None]:
h2o_frame[y] = h2o_frame[y].asfactor()

In [None]:
h2o_automl = H2OAutoML(sort_metric='AUC', max_runtime_secs=5*60, seed=666, max_models = 10)
h2o_automl.train(x=x, y=y, training_frame=h2o_frame)

In [None]:
h2o_models = h2o.automl.get_leaderboard(h2o_automl, extra_columns = "ALL")
h2o_models

# Sklearn

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf = RandomForestClassifier()
clf.fit( df_train[x], df_train[y] )

In [None]:
y_predict = clf.predict( df_test[x] )

In [None]:
from sklearn.metrics import mean_squared_error, precision_score

In [None]:
mean_squared_error( df_test[y], y_predict )

In [None]:
precision_score( df_test[y], y_predict , pos_label="1")

In [None]:
results = df_test[[y]]
results["predict"] = y_predict.tolist()

In [None]:
results.head()

In [None]:
results["diff"] = results["target"] != results["predict"]

In [None]:
results["diff"].sum()

In [None]:
len(results)

In [None]:
from trading.grid_search.brute_force import BruteGridSearch
from trading.variables.params_grid import RF_C_GRID
from trading.variables import params_grid as pg

In [None]:
bgs = BruteGridSearch(
    df = df,
    regr = RandomForestClassifier(),
    parameters = params_grid,
    error = "precision",
    error_ascending=False,
)


In [None]:
bgs.test(pos_label = 1)

In [None]:
bgs.best

In [None]:
cache = bgs.cache

In [None]:
cache["param"].iloc[0]

In [None]:
cache["n_estimators"] = cache["param"].apply(lambda x : x["n_estimators"])
cache["criterion"] = cache["param"].apply(lambda x : x["criterion"])

In [None]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

In [None]:
params_grid = RF_C_GRID

In [None]:
params_grid["n_estimators"] = range( 10, 300, 30 )
params_grid["max_features"] = ["sqrt", "log2", 3,5,7, 0.5, 0.25]
params_grid["max_depth"] = [3,4, None]

In [None]:
sh = HalvingGridSearchCV(
    RandomForestClassifier(), 
    params_grid, 
    cv=5,
    factor=3, 
    resource='n_samples',
    max_resources=30,
    scoring="precision"
    )

In [None]:
sh.fit( df[x], df[y] )

In [None]:
sh.cv_results_.keys()

In [None]:
sh.best_score_

In [None]:
sh.best_estimator_