In [48]:
import tensorflow as tf
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix
import seaborn as sns

file_path = "data/AAPL_time_series.csv"

df = pd.read_csv(file_path)
df

Unnamed: 0,datetime,open,high,low,close,volume,open-trend,open-change,high-trend,high-change,...,close-change,volume-trend,volume-change,percent_b,macd,macd_signal,macd_hist,adx,ema,rsi
0,2023-12-04 10:40:00,187.85,187.90,187.80,187.84,119927,1,,1,,...,,1,,0.22,-0.32,-0.33,0.01,53.77,187.97,23.57
1,2023-12-04 10:41:00,187.85,187.90,187.77,187.87,128851,1,,1,,...,,1,,0.25,-0.31,-0.33,0.01,54.43,187.95,25.75
2,2023-12-04 10:42:00,187.86,188.02,187.84,188.00,107009,1,,1,,...,,1,,0.39,-0.29,-0.32,0.03,53.50,187.96,34.47
3,2023-12-04 10:43:00,187.99,188.14,187.98,188.10,96643,1,,1,,...,,1,,0.54,-0.26,-0.31,0.05,51.37,187.99,40.29
4,2023-12-04 10:44:00,188.12,188.14,188.02,188.02,123641,1,,1,,...,,1,,0.46,-0.24,-0.29,0.05,49.40,187.99,37.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,2023-12-20 15:55:00,195.50,195.50,195.21,195.26,497956,1,0.01,0,-0.05,...,-0.26,1,342059.0,0.22,-0.00,-0.01,0.00,14.89,195.46,40.40
4996,2023-12-20 15:56:00,195.26,195.34,195.21,195.23,309867,0,-0.26,0,-0.20,...,-0.20,0,217634.0,0.14,-0.02,-0.01,-0.01,15.49,195.42,39.28
4997,2023-12-20 15:57:00,195.22,195.28,195.06,195.07,446759,0,-0.21,0,-0.23,...,-0.41,1,331992.0,-0.10,-0.05,-0.02,-0.03,16.89,195.35,34.04
4998,2023-12-20 15:58:00,195.07,195.24,195.06,195.12,540699,0,-0.42,0,-0.29,...,-0.30,1,420099.0,0.03,-0.07,-0.03,-0.04,18.19,195.30,37.11


In [49]:
def get_recent_time_series_batch() -> tuple:
    seq_length: int = 10
    target_column_name: str = "target"
    time_series_batch = []
    time_series_target = []

    for row in range(len(df) - seq_length):
        time_series_batch.append(df.iloc[row:row + seq_length].values)
        time_series_target.append(df[target_column_name].iloc[row + seq_length - 1])

    time_series_batch = np.array(time_series_batch)
    time_series_target= np.array(time_series_target)
    return time_series_batch, time_series_target

X, y = get_recent_time_series_batch()

"""
X is a list with contains (len(df) - seq_length) entries. The first entry is a list of the first seq_length rows of df. X[index][-1] gives you the (index + seq_length)-th row of df.
y is a list that contains (len(df) - seq_length) entries. The first entry is the "target" column of the seq_length row. X[index][-1][-1] and y[index] are the same "target" value.
If df has 5000 entries, X.shape y.shape will give you: ((4980, 10, 25), (4980,))
"""
X.shape, y.shape

KeyError: 'target'

In [None]:
# Split the data into training and temporary sets

test_size = 0.2

X_train_temp, X_test, y_train_temp, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

# Further split the temporary set into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_temp, y_train_temp, test_size=test_size, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Print the shapes of the sets
print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Test set shape:", X_test.shape, y_test.shape)

In [None]:
from keras.src.layers import LSTM, Dropout, Dense
from keras import Sequential

model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid', kernel_regularizer=l2(0.001)))

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Compile the model
learning_rate = 0.0001
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

start_time = time.time()
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_val, y_val), callbacks=[early_stopping])
end_time = time.time()
training_time = end_time - start_time
print(f"Training time: {training_time:.2f} seconds")

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.2f}%")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

ax1.plot(history.history['accuracy'][1:], label='Training Accuracy', color="green")
ax1.plot(history.history['val_accuracy'][1:], label='Validation Accuracy', color="#ff4d4d")

ax1.set_title('Accuracy Plot')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()

ax2.plot(history.history['loss'][1:], label='Training Loss', color="green")
ax2.plot(history.history['val_loss'][1:], label='Validation Loss', color="#ff4d4d")

ax2.set_title('Accuracy Plot')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()

plt.tight_layout()
plt.show()

In [None]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

y_actual = y_test
results_df = pd.DataFrame({'Actual': y_actual, 'Predicted': y_pred.flatten(), 'Predicted_Prob': y_pred_prob.flatten()})
results_df

In [None]:
# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred)

# Display the confusion matrix using a heatmap
plt.figure(figsize=(4, 4))

group_names = ["True Neg.","False Pos.","False Neg.","True Pos."]
group_counts = ["{0:0.0f}".format(value) for value in conf_matrix.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in conf_matrix.flatten()/np.sum(conf_matrix)]

labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)

sns.heatmap(conf_matrix, annot=labels, fmt='', cmap='Blues', cbar=True,
            xticklabels=['Predicted 0', 'Predicted 1'],
            yticklabels=['Actual 0', 'Actual 1'])
plt.title('Confusion Matrix')
plt.show()