In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from datetime import timedelta

# Define function for data preprocessing
def preprocess_data(df, cols, scaler=None):
    if scaler is None:
        scaler = StandardScaler()
        scaler = scaler.fit(df[cols].astype(float))
    df_scaled = scaler.transform(df[cols].astype(float))
    return df_scaled, scaler

# Load and preprocess data
df = pd.read_csv(r"C:\Users\Mcc\Desktop\Project\Data Preprocessing for ML\BTC Aprill-Indicators-5M.csv")
df['date_time'] = pd.to_datetime(df['date_time'])
df = df.iloc[500:-50]
df.reset_index(inplace=True)
df = df.drop(columns=['index'])
df.rename(columns={'date_time': 'Date'}, inplace=True)
cols = ['quote_volume', 'count', 'volume_true', 'volume_false', 'vdiff(false-true)', 
        'original_volume', 'RSI', 'stochastic_%K', 'stochastic_%D', 'EMA_12', 
        'EMA_26', 'EMA_50', 'EMA_100', 'EMA_200', 'EMA_500', 'close_pct']
df_for_training_scaled, scaler = preprocess_data(df, cols)

# Variables for prediction
n_past = 18  # Number of past data points to use
n_future = 1  # Number of future data points to predict

# LSTM model
model = Sequential([
    LSTM(64, activation='relu', input_shape=(n_past, len(cols)), return_sequences=True),
    LSTM(32, activation='relu', return_sequences=False),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train-test split
trainX, testX, trainY, testY = train_test_split(df_for_training_scaled, df['close_pct'], test_size=0.1, random_state=42)

# Fit the model
history = model.fit(trainX, trainY, epochs=5, batch_size=16, validation_split=0.1, verbose=1)

# Evaluate the model
y_pred = (model.predict(testX) > 0.5).astype("int32")
accuracy = accuracy_score(testY, y_pred)
precision = precision_score(testY, y_pred)
recall = recall_score(testY, y_pred)
f1 = f1_score(testY, y_pred)

print("Test Accuracy:", accuracy)
print("Test Precision:", precision)
print("Test Recall:", recall)
print("Test F1-score:", f1)

# Predict next 5-minute 'close_pct'
last_n_data_points = df_for_training_scaled[-n_past:].reshape(1, n_past, len(cols))
prediction = model.predict(last_n_data_points)
predicted_label = "Positive" if prediction >= 0.5 else "Negative"
print("Predicted next 5-minute 'close_pct':", predicted_label)

# Calculate timestamp for the next 5-minute interval
last_data_timestamp = df['Date'].iloc[-1]
next_prediction_timestamp = last_data_timestamp + timedelta(minutes=5)
print("Timestamp of Prediction:", next_prediction_timestamp)


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 18, 64)            20736     
                                                                 
 lstm_3 (LSTM)               (None, 32)                12416     
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 33185 (129.63 KB)
Trainable params: 33185 (129.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/5

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Validation Accuracy: 0.5011600928074246
Validation Precision: 0.4930555555555556
Validation Recal