## Imports

In [36]:
import numpy as np
import pandas as pd
import pandas_ta as ta
from keras.layers import LSTM, Dense
from keras.models import Sequential
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.preprocessing import StandardScaler

## Functions

In [37]:
def load_data(data_path):
    df = pd.read_csv(data_path)
    return df


def preprocess_data(df):
    df["color_change"] = df["color"].diff().ne(0).astype(int)
    df["color_change"].fillna(0, inplace=True)

    return df


def scale_data(df):
    scaler = StandardScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
    return df_scaled


def timeseries_cv_score(X, y, n_splits):
    tscv = TimeSeriesSplit(n_splits=n_splits)

    f1_scores = []
    auc_scores = []  # list to store ROC AUC scores for each split
    for train_index, test_index in tscv.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Define LSTM model
        model = Sequential()
        model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
        model.add(Dense(1, activation="sigmoid"))  # because of binary classification

        model.compile(
            loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]
        )

        # Train the model
        model.fit(X_train, y_train, epochs=10, verbose=0)

        # Make predictions on the test set
        y_pred = model.predict(X_test).ravel()

        # Calculate F1 score of the model on the test set
        f1 = f1_score(y_test, (y_pred > 0.5).astype("int32"))
        f1_scores.append(f1)

        # Calculate ROC AUC score of the model on the test set
        auc = roc_auc_score(y_test, y_pred)
        auc_scores.append(auc)

    return np.mean(f1_scores), np.mean(auc_scores)

## Globals

In [38]:
# Set display options to show all rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# Load the data
data_path = "../../../data/kc/btc/raw/kc_btc_15min.csv"

## Preprocessing

In [46]:
df = load_data(data_path)

# Preprocess the data
df = preprocess_data(df)

# Prepare TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=3)

## FEATURE PROCESSING ##

df["time"] = pd.to_datetime(df["time"])
df.set_index("time", inplace=True)

# print(df.index.duplicated().any())

df = df.loc[~df.index.duplicated(keep="first")]

print(df.ta.categories)

df.ta.strategy("all")

# Check your results and exclude as necessary.
df.ta.strategy(fast=10, slow=50, verbose=True)

# Sanity check. Make sure all the columns are there
print(df.columns)

# Forward Fill
df.ffill(inplace=True)

# Backward Fill
df.bfill(inplace=True)

# Re-scale the data to include the new feature
df_scaled = scale_data(df)

X = df.drop("color_change", axis=1)
y = df["color_change"]

df.tail()
# print(df.isna().sum())

['candles', 'cycles', 'momentum', 'overlap', 'performance', 'statistics', 'trend', 'volatility', 'volume']


131it [00:02, 52.87it/s]


[+] Strategy: All
[i] Indicator arguments: {'fast': 10, 'slow': 50, 'append': True}
[i] Excluded[12]: above, above_value, below, below_value, cross, cross_value, long_run, short_run, td_seq, tsignals, vp, xsignals
[i] Multiprocessing 131 indicators with 3 chunks and 12/12 cpus.


131it [00:02, 49.57it/s]

[i] Total indicators: 131
[i] Columns added: 30
[i] Last Run: Tuesday June 20, 2023, NYSE: 20:16:45, Local: 0:16:45 Pacific Daylight Time, Day 171/365 (47.00%)
Index(['open', 'close', 'high', 'low', 'volume', 'color', 'color_change',
       'ABER_ZG_5_15', 'ABER_SG_5_15', 'ABER_XG_5_15',
       ...
       'PVOs_10_50_9', 'SMI_10_50_5', 'SMIs_10_50_5', 'SMIo_10_50_5',
       'STC_10_10_50_0.5', 'STCmacd_10_10_50_0.5', 'STCstoch_10_10_50_0.5',
       'TSI_10_50_13', 'TSIs_10_50_13', 'UO_10_14_50'],
      dtype='object', length=315)





Unnamed: 0_level_0,open,close,high,low,volume,color,color_change,ABER_ZG_5_15,ABER_SG_5_15,ABER_XG_5_15,ABER_ATR_5_15,ACCBL_20,ACCBM_20,ACCBU_20,AD,ADOSC_3_10,ADX_14,DMP_14,DMN_14,ALMA_10_6.0_0.85,AMATe_LR_8_21_2,AMATe_SR_8_21_2,AO_5_34,OBV,OBV_min_2,OBV_max_2,OBVe_4,OBVe_12,AOBV_LR_2,AOBV_SR_2,APO_12_26,AROOND_14,AROONU_14,AROONOSC_14,ATRr_14,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,BIAS_SMA_26,BOP,AR_26,BR_26,CCI_14_0.015,CDL_2CROWS,CDL_3BLACKCROWS,CDL_3INSIDE,CDL_3LINESTRIKE,CDL_3OUTSIDE,CDL_3STARSINSOUTH,CDL_3WHITESOLDIERS,CDL_ABANDONEDBABY,CDL_ADVANCEBLOCK,CDL_BELTHOLD,CDL_BREAKAWAY,CDL_CLOSINGMARUBOZU,CDL_CONCEALBABYSWALL,CDL_COUNTERATTACK,CDL_DARKCLOUDCOVER,CDL_DOJI_10_0.1,CDL_DOJISTAR,CDL_DRAGONFLYDOJI,CDL_ENGULFING,CDL_EVENINGDOJISTAR,CDL_EVENINGSTAR,CDL_GAPSIDESIDEWHITE,CDL_GRAVESTONEDOJI,CDL_HAMMER,CDL_HANGINGMAN,CDL_HARAMI,CDL_HARAMICROSS,CDL_HIGHWAVE,CDL_HIKKAKE,CDL_HIKKAKEMOD,CDL_HOMINGPIGEON,CDL_IDENTICAL3CROWS,CDL_INNECK,CDL_INSIDE,CDL_INVERTEDHAMMER,CDL_KICKING,CDL_KICKINGBYLENGTH,CDL_LADDERBOTTOM,CDL_LONGLEGGEDDOJI,CDL_LONGLINE,CDL_MARUBOZU,CDL_MATCHINGLOW,CDL_MATHOLD,CDL_MORNINGDOJISTAR,CDL_MORNINGSTAR,CDL_ONNECK,CDL_PIERCING,CDL_RICKSHAWMAN,CDL_RISEFALL3METHODS,CDL_SEPARATINGLINES,CDL_SHOOTINGSTAR,CDL_SHORTLINE,CDL_SPINNINGTOP,CDL_STALLEDPATTERN,CDL_STICKSANDWICH,CDL_TAKURI,CDL_TASUKIGAP,CDL_THRUSTING,CDL_TRISTAR,CDL_UNIQUE3RIVER,CDL_UPSIDEGAP2CROWS,CDL_XSIDEGAP3METHODS,open_Z_30_1,high_Z_30_1,low_Z_30_1,close_Z_30_1,CFO_9,CG_10,CHOP_14_1_100,CKSPl_10_3_20,CKSPs_10_3_20,CMF_20,CMO_14,COPC_11_14_10,CTI_12,LDECAY_5,DEC_1,DEMA_10,DCL_20_20,DCM_20_20,DCU_20_20,DPO_20,EBSW_40_10,EFI_13,EMA_10,ENTP_10,EOM_14_100000000,ER_10,BULLP_13,BEARP_13,FISHERT_9_1,FISHERTs_9_1,FWMA_10,HA_open,HA_high,HA_low,HA_close,HILO_13_21,HILOl_13_21,HILOs_13_21,HL2,HLC3,HMA_10,HWM,HWU,HWL,HWMA_0.2_0.1_0.1,ISA_9,ISB_26,ITS_9,IKS_26,ICS_26,INC_1,INERTIA_20_14,JMA_7_0,KAMA_10_2_30,KCLe_20_2,KCBe_20_2,KCUe_20_2,K_9_3,D_9_3,J_9_3,KST_10_15_20_30_10_10_10_15,KSTs_9,KURT_30,KVO_34_55_13,KVOs_34_55_13,LR_14,LOGRET_1,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,MAD_30,MASSI_9_25,MCGD_10,MEDIAN_30,MFI_14,MIDPOINT_2,MIDPRICE_2,MOM_10,NATR_14,NVI_1,OHLC4,PDIST,PCTRET_1,PGO_14,PPO_12_26_9,PPOh_12_26_9,PPOs_12_26_9,PSARl_0.02_0.2,PSARs_0.02_0.2,PSARaf_0.02_0.2,PSARr_0.02_0.2,PSL_12,PVI_1,PVO_12_26_9,PVOh_12_26_9,PVOs_12_26_9,PVOL,PVR,PVT,PWMA_10,QQE_14_5_4.236,QQE_14_5_4.236_RSIMA,QQEl_14_5_4.236,QQEs_14_5_4.236,QS_10,QTL_30_0.5,RMA_10,ROC_10,RSI_14,RSX_14,RVGI_14_4,RVGIs_14_4,RVI_14,SINWMA_14,SKEW_30,SLOPE_1,SMA_10,SMI_5_20_5,SMIs_5_20_5,SMIo_5_20_5,SQZ_20_2.0_20_1.5,SQZ_ON,SQZ_OFF,SQZ_NO,SQZPRO_20_2.0_20_2_1.5_1,SQZPRO_ON_WIDE,SQZPRO_ON_NORMAL,SQZPRO_ON_NARROW,SQZPRO_OFF,SQZPRO_NO,SSF_10_2,STC_10_12_26_0.5,STCmacd_10_12_26_0.5,STCstoch_10_12_26_0.5,STDEV_30,STOCHk_14_3_3,STOCHd_14_3_3,STOCHRSIk_14_14_3_3,STOCHRSId_14_14_3_3,SUPERT_7_3.0,SUPERTd_7_3.0,SUPERTl_7_3.0,SUPERTs_7_3.0,SWMA_10,T3_10_0.7,TEMA_10,THERMO_20_2_0.5,THERMOma_20_2_0.5,THERMOl_20_2_0.5,THERMOs_20_2_0.5,TOS_STDEVALL_LR,TOS_STDEVALL_L_1,TOS_STDEVALL_U_1,TOS_STDEVALL_L_2,TOS_STDEVALL_U_2,TOS_STDEVALL_L_3,TOS_STDEVALL_U_3,TRIMA_10,TRIX_30_9,TRIXs_30_9,TRUERANGE_1,TSI_13_25_13,TSIs_13_25_13,TTM_TRND_6,UI_14,UO_7_14_28,VAR_30,VHF_28,VIDYA_14,VTXP_14,VTXM_14,VWAP_D,VWMA_10,WCP,WILLR_14,WMA_10,ZL_EMA_10,ZS_30,ADOSC_10_50,AMATe_LR_10_50_2,AMATe_SR_10_50_2,AO_10_50,OBVe_10,OBVe_50,APO_10_50,COPC_10_50_10,KAMA_10_10_50,KVO_10_50_13,KVOs_10_50_13,MACD_10_50_9,MACDh_10_50_9,MACDs_10_50_9,MASSI_10_50,PPO_10_50_9,PPOh_10_50_9,PPOs_10_50_9,PVO_10_50_9,PVOh_10_50_9,PVOs_10_50_9,SMI_10_50_5,SMIs_10_50_5,SMIo_10_50_5,STC_10_10_50_0.5,STCmacd_10_10_50_0.5,STCstoch_10_10_50_0.5,TSI_10_50_13,TSIs_10_50_13,UO_10_14_50
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1
1970-01-01 00:00:01.679913900,27890.9,27889.5,27928.0,27888.5,42.059436,0,1,27877.326667,27950.242908,27804.410425,72.916241,27743.416193,27923.41,28107.941193,20455.571636,-3.91184,14.576909,212.373825,207.467671,27854.497251,1,0,-17.263824,13674.912509,13632.853073,13674.912509,13641.429803,13635.354292,1,0,-52.202564,50.0,14.285714,-35.714286,72.036007,27839.46458,27877.54,27915.61542,0.273162,0.657057,-0.001343,-0.035443,98.28971,98.524683,26.298176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,-100.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.169045,-0.152821,0.298286,-0.206085,0.010032,-5.499008,50.839204,27894.75155,27958.684932,-0.138051,-0.578452,-0.836149,-0.227304,27889.5,0,27867.681236,27778.3,27964.15,28150.0,-34.505,0.606806,-68.48026,27882.775595,3.319406,-1095674000.0,0.001555,40.513693,1.013693,0.043352,-0.550643,27878.3,27866.011531,27928.0,27866.011531,27899.225,27913.730769,27839.180952,27913.730769,27908.25,27902.0,27879.277071,27896.54652,28002.017644,27791.075395,27896.54652,27783.85,27840.0,27853.15,27957.25,27903.4,1,43.838647,27868.565838,27901.638565,27756.789991,27892.992947,28029.195904,60.686712,47.100122,87.859893,-21.211465,216.6501,-1.104464,-0.453824,-1.207027,27852.890769,5.7e-05,-6.258948,-7.830829,1.571881,76.098889,24.559296,27892.080722,27888.9,34.6173,27888.7,27878.2,-0.4,0.258291,1031.095597,27899.225,80.6,5.7e-05,-0.002738,-0.186925,-0.156012,-0.030913,27919.702505,27985.952318,0.06,0,50.0,1023.767444,-12.552042,-1.294574,-11.257468,1173017.0,1.0,17662.604179,27865.391797,52.240738,48.069327,59.812861,52.240738,-0.31,27888.9,27892.68757,-0.001434,49.710774,37.909106,-0.097863,-0.133571,58.704181,27877.608943,0.036732,1.6,27865.95,-0.064755,-0.090131,0.025376,-103.683333,0,1,0,-103.683333,1,0,0,0,0,27876.390465,0.087958,-6.258948,0.010921,86.938335,38.376835,36.434225,36.982012,34.228848,27792.248881,1,27792.248881,27979.366961,27864.56,27885.323482,27867.957815,60.1,42.151976,1,1,26411.995581,23540.125134,29283.866028,20668.254688,32155.736475,17796.384241,35027.606922,27864.56,0.012238,0.014064,40.1,-2.58876,2.690762,1,0.545362,53.322244,7558.274053,0.308837,27881.156228,0.950426,1.082729,22899.265389,27866.254922,27898.875,-54.649266,27870.976364,27876.760261,-0.206085,35.253735,1,0,-3.582,13632.131449,13649.517327,-1.99,-0.398192,27762.850063,4.292881,-4.946866,16.479005,-16.126349,32.605354,47.23467,-0.007141,-0.106191,0.09905,-21.346937,-3.036943,-18.309993,0.02344,0.029697,-0.006257,7.752701,16.479005,1.149699,2.343979,5.659906,57.346802
1970-01-01 00:00:01.679914800,27889.6,27930.3,27930.9,27872.4,29.465377,1,1,27885.286667,27957.241825,27813.331508,71.955158,27740.106977,27917.135,28096.056977,20484.432595,0.163171,13.738826,197.204266,208.748551,27853.606011,1,0,-13.136765,13704.377885,13674.912509,13704.377885,13666.609036,13645.973307,1,0,-59.583974,42.857143,7.142857,-35.714286,71.069149,27829.809485,27887.16,27944.510515,0.411304,0.876108,-9.3e-05,0.695726,102.596903,102.859343,55.512484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.205392,-0.157832,0.057184,0.206558,0.097075,-5.497763,50.375858,27894.75155,27977.83991,-0.109032,8.700842,-0.659622,0.44927,27930.3,0,27886.136212,27778.3,27964.15,28150.0,-34.505,0.850441,113.043688,27891.416396,3.320386,-935695700.0,0.189455,37.297452,-21.202548,0.551277,0.043352,27898.171329,27882.618265,27930.9,27872.4,27905.8,27888.219048,27888.219048,27913.730769,27901.65,27911.2,27896.283839,27898.737518,27998.919268,27798.555768,27898.737518,27783.85,27908.85,27854.6,27960.8,27903.4,1,49.582693,27880.733526,27902.552772,27762.171897,27896.546,28030.920103,73.66008,55.953441,109.073357,-55.875099,158.881969,-1.085484,0.167911,-1.010607,27870.526154,0.001462,-2.263685,-3.068453,0.804768,72.096,24.499644,27895.881774,27889.7,41.174635,27909.9,27901.65,53.9,0.254452,1031.241888,27905.8,76.4,0.001463,0.54191,-0.213311,-0.145918,-0.067393,27919.702505,27973.493179,0.06,0,58.333333,1023.767444,-12.346974,-0.871605,-11.475369,822976.8,2.0,17666.914716,27869.696289,52.240738,50.163025,59.812861,52.240738,5.11,27889.7,27896.448813,0.193354,54.350421,41.831009,-0.078652,-0.110892,62.733047,27873.285012,0.05137,40.8,27871.34,-0.00502,-0.061761,0.05674,-79.633333,0,1,0,-79.633333,1,0,0,0,0,27893.04912,0.043979,-2.263685,0.00546,82.107875,50.679717,42.137873,57.965632,43.504593,27792.248881,1,27792.248881,27979.366961,27870.026667,27880.119798,27894.392284,16.1,39.670835,1,0,26412.944113,23541.073666,29284.81456,20669.203219,32156.685007,17797.332773,35028.555454,27870.026667,0.011802,0.01364,58.5,-0.987071,2.165357,1,0.541845,59.534584,6741.703064,0.299559,27881.700609,0.946795,1.063997,22899.538454,27872.029587,27915.975,-38.009788,27882.676364,27894.131122,0.206558,34.820969,1,0,-1.14,13645.267165,13651.668721,-0.062,-0.136237,27763.584622,7.492356,-3.169834,22.609868,-7.996388,30.606257,47.084591,-0.000222,-0.079418,0.079196,-21.338345,-2.422682,-18.915664,0.032374,0.03059,0.001785,53.876351,22.609868,9.796761,3.237418,5.313836,59.2705
1970-01-01 00:00:01.679915700,27930.3,27930.1,27937.0,27889.9,30.737865,0,1,27892.113333,27962.411481,27821.815185,70.298148,27740.804063,27911.0,28077.804063,20506.164461,8.730073,12.843625,189.218247,193.83794,27861.965915,1,0,-9.541471,13673.64002,13673.64002,13704.377885,13669.42143,13650.229724,1,0,-59.791667,35.714286,0.0,-35.714286,69.357067,27828.560621,27895.52,27962.479379,0.480073,0.758216,-0.000227,-0.004246,94.283586,94.529867,99.509545,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.205742,-0.157821,0.197925,0.149213,0.030855,-5.497678,50.545991,27894.75155,27972.015919,0.01067,8.647926,-0.510334,0.718707,27930.1,1,27899.884214,27778.3,27934.15,28090.0,-34.505,0.957544,96.016365,27898.449779,3.321472,-1516708000.0,0.434506,38.18353,-8.91647,1.059646,0.551277,27910.728671,27894.209133,27937.0,27889.9,27921.825,27882.642857,27882.642857,27913.730769,27913.45,27919.0,27914.529596,27900.140923,27995.207794,27805.074052,27900.140923,27783.85,27908.85,27875.75,27964.15,27903.4,0,53.223852,27895.082004,27905.483152,27769.193621,27899.741619,28030.289617,80.562502,64.156462,113.374584,-58.108391,107.823375,-1.000718,0.760106,-0.757648,27891.908132,-7e-06,0.876349,0.057265,0.819084,68.113333,24.132852,27899.286858,27910.0,42.193195,27930.2,27904.7,102.5,0.248324,1031.241888,27921.825,94.0,-7e-06,0.605776,-0.214027,-0.117308,-0.09672,27919.702505,27961.781588,0.06,0,58.333333,1023.766727,-11.748421,-0.218442,-11.529979,858511.7,3.0,17666.892706,27872.915625,52.240738,51.550004,59.812861,52.240738,9.98,27910.0,27899.813932,0.368339,54.323963,45.943901,-0.067401,-0.090253,57.586851,27872.224301,0.039481,-0.2,27881.59,0.032249,-0.030424,0.062673,-58.283333,0,1,0,-58.283333,1,0,0,0,0,27909.280927,0.021989,0.876349,0.00273,78.143106,57.067652,48.708068,75.196616,56.714753,27792.248881,1,27792.248881,27979.366961,27876.693333,27878.132353,27912.132961,17.5,37.559327,1,0,26413.892645,23542.022198,29285.763092,20670.151751,32157.633539,17798.281305,35029.503985,27876.693333,0.011408,0.013203,47.1,0.307809,1.899993,1,0.538763,61.102853,6106.345064,0.302852,27882.705307,0.930289,1.09509,22899.823723,27883.039507,27921.775,-36.137989,27893.36,27917.0891,0.149213,37.331513,1,0,3.212,13650.425866,13652.530341,7.66,0.099204,27765.289216,10.13549,-1.269074,27.239585,-2.693337,29.932922,46.906886,0.027481,-0.041372,0.068853,-20.722365,-1.445361,-19.277004,0.03949,0.033556,0.005933,76.938175,27.239585,24.950544,3.948964,5.118854,63.558362
1970-01-01 00:00:01.679916600,27930.0,27889.2,27930.0,27889.1,18.444079,0,0,27901.48,27969.824938,27833.135062,68.344938,27739.379241,27904.74,28065.404241,20487.810574,5.601198,12.028202,175.702658,180.792373,27865.438591,1,0,-2.187353,13655.195941,13655.195941,13673.64002,13663.731234,13650.993757,1,0,-54.694231,28.571429,0.0,-28.571429,67.331562,27864.887286,27905.4,27945.912714,0.290358,0.300063,-0.001547,-0.997555,77.781896,78.004451,85.359221,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,-100.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14717,-0.298388,0.13337,-0.435936,-0.096501,-5.497845,60.989791,27894.75155,27972.015919,0.025887,-1.872247,-0.378508,0.766549,27929.9,1,27896.565629,27778.3,27908.35,28038.4,-34.505,0.921556,-25.466377,27896.768001,3.321904,-1098241000.0,0.093895,32.557311,-8.342689,1.465542,1.059646,27902.528671,27908.017066,27930.0,27889.1,27909.575,27882.642857,27882.642857,27882.642857,27909.55,27902.766667,27921.624344,27891.950153,27982.689109,27801.211198,27891.950153,27830.275,27908.85,27875.75,27964.15,27903.4,0,55.123027,27902.194857,27905.244534,27772.813276,27898.737655,28024.662035,74.034865,67.449263,87.206071,-81.191695,60.062072,-0.909755,0.243071,-0.614688,27900.603956,-0.001465,0.063825,-0.604208,0.668032,64.806667,23.717245,27898.276712,27910.0,42.869426,27909.65,27913.05,22.3,0.241425,1031.095451,27909.575,41.1,-0.001464,0.139413,-0.195809,-0.079271,-0.116538,27919.702505,27950.772693,0.06,0,58.333333,1023.766727,-14.301609,-2.217303,-12.084305,514390.6,4.0,17664.19181,27879.008594,52.240738,50.721295,59.812861,52.240738,1.97,27910.0,27898.752539,0.080023,49.063876,48.400954,-0.066727,-0.076116,53.301306,27874.162861,0.081885,-40.9,27883.82,0.010379,-0.016823,0.027202,-48.016667,1,0,0,-48.016667,1,1,0,0,0,27910.87172,0.010995,0.063825,0.001365,74.659332,63.695839,57.147736,78.548818,70.570356,27792.248881,1,27792.248881,27979.366961,27882.983333,27877.952514,27905.248126,7.0,34.648915,1,0,26414.841177,23542.97073,29286.711624,20671.100283,32158.582071,17799.229836,35030.452517,27882.983333,0.01101,0.01276,41.0,-0.041887,1.622582,-1,0.53042,55.538593,5574.01582,0.243595,27882.834242,0.959417,1.100224,22899.994329,27885.428363,27899.375,-34.764706,27894.743636,27912.254718,-0.435936,36.421554,1,0,10.253,13651.293152,13652.634874,8.278,0.218274,27765.632117,5.127512,-0.355276,24.852325,-4.064478,28.916803,46.767863,0.029696,-0.031325,0.061021,-24.968404,-4.55312,-20.415284,0.036337,0.034483,0.001854,88.469088,24.852325,37.247547,3.633681,4.906687,57.358823
1970-01-01 00:00:01.679917500,27889.2,27903.4,27916.6,27874.6,14.993546,1,1,27906.633333,27973.221942,27840.044725,66.588609,27741.181383,27901.405,28054.031383,20493.379605,5.583968,11.566491,163.152468,182.378632,27871.564357,0,1,3.161765,13670.189487,13655.195941,13670.189487,13666.314535,13653.946947,1,0,-45.096795,21.428571,0.0,-21.428571,65.522165,27871.607725,27908.5,27945.392275,0.26438,0.43088,-0.000922,0.338095,74.509615,74.956639,64.163125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.436218,-0.500987,-0.114105,-0.305574,-0.038386,-5.498218,58.869156,27894.75155,27972.015919,0.066297,1.687032,-0.236531,0.697253,27903.4,0,27898.794821,27778.3,27900.9,28023.5,-34.505,0.755427,8.587156,27897.973819,3.322435,-338538700.0,0.233205,18.306267,-23.693733,1.399821,1.465542,27903.017483,27908.796033,27916.6,27874.6,27895.95,27869.852381,27869.852381,27882.642857,27895.6,27898.2,27920.302526,27887.558036,27974.160327,27800.955745,27887.558036,27867.225,27908.85,27882.7,27964.15,27903.4,1,56.829477,27905.277566,27905.167062,27777.250107,27899.181688,28021.113269,72.376835,69.091787,78.946931,-79.754976,18.127945,-0.791978,-0.153565,-0.548814,27907.548132,0.000509,0.559268,-0.087012,0.646279,61.37,23.417577,27898.788665,27916.75,51.50038,27896.3,27902.3,54.5,0.234818,1031.146367,27895.95,69.8,0.000509,0.318068,-0.161469,-0.035945,-0.125524,27919.702505,27940.424331,0.06,0,66.666667,1023.766727,-17.322003,-4.190158,-13.131845,418370.9,2.0,17664.955218,27890.236719,52.240738,50.762035,59.812861,52.240738,5.19,27916.75,27899.217285,0.195699,50.843516,50.244922,-0.054294,-0.066867,57.062567,27878.362583,0.114621,14.2,27889.27,0.012203,-0.007148,0.019351,-11.7,1,0,0,-11.7,1,1,0,0,0,27909.634118,0.005497,0.559268,0.000683,71.286619,69.308427,63.357306,77.344831,77.030088,27792.248881,1,27792.248881,27979.366961,27890.693333,27879.033644,27906.735987,14.5,32.729971,1,0,26415.789709,23543.919262,29287.660156,20672.048815,32159.530602,17800.178368,35031.401049,27890.693333,0.010625,0.01232,42.0,0.167928,1.414774,1,0.506983,56.020315,5081.782053,0.256117,27882.939021,1.089552,1.041311,22900.132882,27889.462619,27899.5,-21.172023,27898.303636,27913.172042,-0.305574,36.240706,0,1,12.037,13654.728849,13653.32329,11.464,0.35127,27766.355686,1.646255,-0.069343,24.823463,-3.274672,28.098135,46.725548,0.041122,-0.015919,0.057042,-29.785089,-7.495844,-22.289245,0.036694,0.03522,0.001474,94.234544,24.823463,58.436226,3.669422,4.729935,59.300931


## Univariate Feature Selection Process

In [40]:
# Re-scale the data to include the new feature
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# feature selection
selector = SelectKBest(score_func=f_classif, k="all")
selector.fit(X_scaled, y)

# Get columns to keep and create new dataframe with those only
cols = selector.get_support(indices=True)
features_df_new = X.iloc[:, cols]

# Store the scores of each feature in a dictionary
feature_scores = {
    feature_name: score for feature_name, score in zip(X.columns, selector.scores_)
}

# Sort the dictionary by value in descending order and print the scores
for feature_name, score in sorted(
    feature_scores.items(), key=lambda item: item[1], reverse=True
):
    print(f"{feature_name}: {score}")

# Now we can apply Logistic Regression and Random Forests on the new features_df_new
# Logistic Regression
log_reg = LogisticRegression(random_state=42, max_iter=500)

# Cross-validation
cv_scores = cross_val_score(log_reg, features_df_new, y, cv=tscv, scoring="f1")

print(f"\nLogistic Regression CV F1 score: {np.mean(cv_scores)}")

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Cross-validation
cv_scores = cross_val_score(rf, features_df_new, y, cv=tscv, scoring="f1")

print(f"Random Forest CV F1 score: {np.mean(cv_scores)}")

# Reshape input to be 3D [samples, timesteps, features]
X_array = X.values
X_reshaped = X_array.reshape((X_array.shape[0], 1, X_array.shape[1]))

# Call the function
mean_f1_score = timeseries_cv_score(X_reshaped, y.values, n_splits=5)
print(f"\nLSTM CV F1 score: {mean_f1_score}")

print("\n", features_df_new.columns)

  f = msb / msw


THERMOs_20_2_0.5: 273.60362108048645
volume: 91.47185887138416
BBB_5_2.0: 79.13500197982871
CDL_IDENTICAL3CROWS: 22.908645526841614
CDL_HANGINGMAN: 19.791770412852728
DMN_14: 11.907639542415298
AROONU_14: 9.647634376719118
DMP_14: 8.054813920069284
AROOND_14: 3.6653826739854787
AMATe_LR_8_21_2: 1.2859566508917912
AMATe_SR_8_21_2: 1.065560252719997
ABER_ATR_5_15: 0.7788502422916445
ADOSC_3_10: 0.6883592208277559
OBV_min_2: 0.5591854195833879
OBVe_12: 0.4798570820528162
OBVe_4: 0.4771745840486053
color: 0.4452904087414569
OBV: 0.41619803294575647
OBV_max_2: 0.34522088140368123
low: 0.30409701012902224
ALMA_10_6.0_0.85: 0.2976418393705159
AO_5_34: 0.28576718207771223
ABER_XG_5_15: 0.275972186704456
ABER_ZG_5_15: 0.2552379590935276
ACCBL_20: 0.24436544363719015
ABER_SG_5_15: 0.23539790386047119
open: 0.22470195021284903
close: 0.2124001771743569
ACCBM_20: 0.2110670855398578
AD: 0.20718604906113675
ACCBU_20: 0.1777763069827223
APO_12_26: 0.17109364723535517
ADX_14: 0.16572563017629532
high:

## Base Model

In [41]:
# Logistic Regression
log_reg = LogisticRegression(random_state=42, max_iter=500)

# Cross-validation
cv_scores = cross_val_score(log_reg, X, y, cv=tscv, scoring="roc_auc")

print(f"Logistic Regression CV ROC AUC score: {np.mean(cv_scores)}")

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Cross-validation
cv_scores = cross_val_score(rf, X, y, cv=tscv, scoring="roc_auc")

print(f"Random Forest CV ROC AUC score: {np.mean(cv_scores)}")

print("\n", X.columns)

Logistic Regression CV ROC AUC score: 0.5517433276575191
Random Forest CV ROC AUC score: 0.8991939050899015

 Index(['open', 'close', 'high', 'low', 'volume', 'color', 'ABER_ZG_5_15',
       'ABER_SG_5_15', 'ABER_XG_5_15', 'ABER_ATR_5_15',
       ...
       'VIDYA_14', 'VTXP_14', 'VTXM_14', 'VWAP_D', 'VWMA_10', 'WCP',
       'WILLR_14', 'WMA_10', 'ZL_EMA_10', 'ZS_30'],
      dtype='object', length=284)


## LSTM

In [42]:
# Reshape input to be 3D [samples, timesteps, features]
X_array = X.values
X_reshaped = X_array.reshape((X_array.shape[0], 1, X_array.shape[1]))

# Call the function
mean_auc_score = timeseries_cv_score(X_reshaped, y.values, n_splits=5)
print(f"\nLSTM CV ROC AUC score: {mean_auc_score}")


LSTM CV ROC AUC score: (0.6959629098786181, 0.4926542232731929)
