In [1]:
from jesse import helpers, research

from custom_indicators.all_features import feature_matrix
from research_tools import TrailingStopLabel

warmup_1m, trading_1m = research.get_candles(
    "Binance Perpetual Futures",
    "BTC-USDT",
    "1m",
    helpers.date_to_timestamp("2021-01-01"),
    helpers.date_to_timestamp("2024-12-31"),
    warmup_candles_num=0,
    caching=False,
    is_for_jesse=False,
)

In [2]:
trailing_stop_label = TrailingStopLabel(trading_1m, n_bar=15, k=1)
features = feature_matrix(trading_1m, sequential=True)

In [15]:
from copy import deepcopy
import numpy as np
import pandas as pd

SKIP_N = 240
TRAIN_TEST_SPLIT_INDEX = int((len(trailing_stop_label.labels) - SKIP_N) * 0.7)

x_train = pd.DataFrame(
    features.features[SKIP_N:TRAIN_TEST_SPLIT_INDEX], columns=features.names
)
y_train = trailing_stop_label.labels[SKIP_N:TRAIN_TEST_SPLIT_INDEX]

mask = deepcopy(y_train)
mask = mask != 0
y_train = y_train[mask]
y_train = pd.Series((y_train == 1).astype(np.int8), name="label")
x_train = x_train[mask]
x_train.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)

x_test = pd.DataFrame(
    features.features[TRAIN_TEST_SPLIT_INDEX + SKIP_N :], columns=features.names
)
y_test = trailing_stop_label.labels[TRAIN_TEST_SPLIT_INDEX + SKIP_N :]

mask = deepcopy(y_test)
mask = mask != 0
y_test = y_test[mask]
y_test = pd.Series((y_test == 1).astype(np.int8), name="label")
x_test = x_test[mask]
x_test.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1334848, 426)
(1334848,)
(604082, 426)
(604082,)


In [16]:
from featurewiz import FeatureWiz

fwiz = FeatureWiz()
X_train_selected, y_train = fwiz.fit_transform(x_train, y_train)
X_test_selected = fwiz.transform(x_test)
fwiz.features

featurewiz is given 0.9 as correlation limit...
    Skipping feature engineering since no feature_engg input...
    final list of category encoders given: ['label', 'label']
    final list of scalers given: []
Loaded input data. Shape = (1334848, 426)
#### Starting featurewiz transform for train data ####
    Single_Label Binary_Classification problem 
Shape of dataset: (1334848, 426). Now we classify variables into different types...
Time taken to define data pipeline = 4 second(s)
No model input given...
Lazy Transformer Pipeline created...
    Time taken to fit dataset = 10 second(s)
    Time taken to transform dataset = 16 second(s)
    Shape of transformed dataset: (1334848, 426)
    Single_Label Binary_Classification problem 
Starting SULOV with 426 features...
    there are no null values in dataset...
    there are no null values in target column...
Completed SULOV. 189 features selected
Performing recursive XGBoost feature selection from 189 features...
    time taken to run e

['evenbetter_sinewave_long',
 'evenbetter_sinewave_short',
 'bandpass_dt',
 'evenbetter_sinewave_long_lag3',
 'bandpass_ddt',
 'roofing_filter_lag2',
 'roofing_filter_ddt',
 'comb_spectrum_pwr_38',
 'comb_spectrum_pwr_33',
 'hurst_coef_slow_lag2',
 'roofing_filter_dt',
 'comb_spectrum_pwr_10',
 'ac_40',
 'ac_45',
 'ac_36',
 'hurst_coef_fast_lag2',
 'comb_spectrum_pwr_21',
 'ac_1',
 'acp_pwr_6',
 'phase_accumulation_lag3',
 'acp_pwr_3',
 'phase_accumulation_lag1',
 'acp_pwr_5',
 'acp_pwr_33',
 'ac_38',
 'phase_accumulation_lag2',
 'ac_34',
 'dft_dom_cycle_lag2',
 'acp_pwr_7',
 'acp_pwr_20',
 'comb_spectrum_pwr_4',
 'mod_stochastic',
 'ac_32',
 'ac_29',
 'phase_accumulation_ddt',
 'comb_spectrum_pwr_26',
 'ac_43',
 'comb_spectrum_pwr_13',
 'acp_pwr_11',
 'phase_accumulation_dt',
 'dft_spectrum_15',
 'ac_11',
 'homodyne_lag1',
 'comb_spectrum_pwr_2',
 'acp_pwr_25',
 'acp_pwr_13',
 'comb_spectrum_pwr_18',
 'ac_16',
 'acp_pwr_9',
 'highpass_bp',
 'ac_22',
 'bandpass_lag3',
 'adaptive_cci',


In [18]:
import json

with open("research/fwiz.json", "w") as f:
    json.dump(fwiz.features, f)