**Imports** 🕵️‍♂️

In [84]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from scipy.signal import detrend
from sklearn.metrics import accuracy_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.seasonal import seasonal_decompose


**Functions** 🤌

In [85]:
def create_sequences(X, y, time_steps=60):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X.iloc[i:(i + time_steps)].values)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)



**Globals** 🌎

In [86]:
# # Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

**Preprocessing** 👻

In [99]:
# Load the data
df = pd.read_csv('../../../data/kc/btc/heiken_ashi/with_trade_indicators/raw/kc_btc_60min_ha_ti.csv')

# Convert color to 0 for 'red' and 1 for 'green'
df['color'] = df['color'].map({'red': 0, 'green': 1})

# Add 'color_change' column: 1 if color changes from the previous row, 0 otherwise
df['color_change'] = df['color'].diff().abs()

# Fill the first row's 'color_change' with 0
df['color_change'].fillna(0, inplace=True)

# Drop 'time', 'color', and 'turnover' columns
df = df.drop(['time', 'color', 'turnover'], axis=1)

# Separate features and target
features = df.drop('color_change', axis=1)
target = df['color_change']

# Fill NaNs in specific columns with 0
features['PSARl_0.01_0.1'] = features['PSARl_0.01_0.1'].fillna(0)
features['PSARs_0.01_0.1'] = features['PSARs_0.01_0.1'].fillna(0)

# Identify the first non-null row
first_valid_index = features.dropna().index[0]

# Drop the rows before this index
features = features.loc[first_valid_index:]
target = target.loc[first_valid_index:]

# Use ffill to fill any remaining missing values
features.ffill(inplace=True)

cols_to_scale = [
    'open', 'close', 'high', 'low', 'volume', 'avg_vol_last_100', 'obv',
    'RSI_5', 'RSI_10', 'RSI_14', 'ROC_14', 'ROC_10', 'ROC_5', 'ATR_14', 'ATR_10', 'ATR_5',
    'PP', 'R1', 'S1', 'R2', 'S2', 'R3', 'S3'
]

# Scale the selected columns
scaler = MinMaxScaler()
features[cols_to_scale] = scaler.fit_transform(features[cols_to_scale])

# Determine the split point
split_point = int(len(features) * 0.8)

# Split the data into train and test sets
X_train, X_test = features[:split_point], features[split_point:]
y_train, y_test = target[:split_point], target[split_point:]

# Reset indices
X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

# Reshape input to be 3D [samples, timesteps, features] for LSTM
X_train = X_train.values.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.values.reshape(X_test.shape[0], 1, X_test.shape[1])

print('Shape of train data:', X_train.shape)
print('Shape of test data:', X_test.shape)

# print(features.head(1))



Shape of train data: (1577, 1, 85)
Shape of test data: (395, 1, 85)


**Cross Validation** 🏴‍☠️

In [100]:
# Initialize TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)

# Define model architecture outside the loop
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Loop through the splits
for train_index, test_index in tscv.split(X_train):
    X_train_cv, X_test_cv = X_train[train_index], X_train[test_index]
    y_train_cv, y_test_cv = y_train[train_index], y_train[test_index]

    # Fit the model
    model.fit(X_train_cv, y_train_cv, epochs=50, verbose=0)

    y_pred = model.predict(X_test_cv)
    y_pred = (y_pred > 0.5).astype(int)

    # Calculate accuracy (or any other metric you are interested in)
    accuracy = accuracy_score(y_test_cv, y_pred)

    print(f'Accuracy: {accuracy}')


Accuracy: 0.5114503816793893
Accuracy: 0.46564885496183206
Accuracy: 0.5114503816793893
Accuracy: 0.48854961832061067
Accuracy: 0.42748091603053434
