In [None]:
from google.colab import files
uploaded = files.upload()
# Upload your 3 CSV files

Saving aligned_all_stock_features.csv to aligned_all_stock_features.csv
Saving aligned_exogenous_features.csv to aligned_exogenous_features.csv
Saving aligned_nasdaq10_log_returns.csv to aligned_nasdaq10_log_returns.csv


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# STEP 2: Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout,Input
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import gc
from tensorflow.keras import backend as K

In [None]:
features_df = pd.read_csv("aligned_all_stock_features.csv", index_col=0, parse_dates=True)
exogenous_df = pd.read_csv("aligned_exogenous_features.csv", index_col=0, parse_dates=True)
log_returns_df = pd.read_csv("aligned_nasdaq10_log_returns.csv", index_col=0, parse_dates=True)

In [None]:
stock_name = "AMZN"          # Change this to your desired stock symbol
stock_prefix = "Amazon"       # Prefix used in feature columns
time_steps = 10              # Length of input sequence to LSTM
rolling_window = 500         # Training window size (days)
epochs = 10                  # Training epochs per window
batch_size = 32              # Batch size


In [None]:
# STEP 5: Prepare input data
def get_model_input(stock_prefix):
    stock_features = features_df[[col for col in features_df.columns if col.startswith(stock_prefix)]]
    exog_shifted = exogenous_df.shift(1)  # shift exogenous by 1 day
    merged = pd.concat([stock_features, exog_shifted], axis=1)
    return merged

In [None]:
X_all = get_model_input(stock_prefix)
y_all = log_returns_df[stock_name]


In [None]:
X_all.head()

Unnamed: 0_level_0,Amazon_lag_return_1d,Amazon_lag_return_2d,Amazon_lag_return_3d,Amazon_lag_return_4d,Amazon_lag_return_5d,Amazon_ema_12,Amazon_ema_26,Amazon_macd_line,Amazon_macd_signal,Amazon_rsi_14,SPY_ret,QQQ_ret,VIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2015-01-23,0.043456,0.026595,-0.004826,0.013154,-0.021373,15.001515,15.074325,-0.072809,-0.155173,52.835821,-0.005498,0.002209,16.66
2015-01-26,0.006423,0.043456,0.026595,-0.004826,0.013154,15.075128,15.104375,-0.029246,-0.129987,55.835962,0.002339,-0.001152,15.52
2015-01-27,-0.009003,0.006423,0.043456,0.026595,-0.004826,15.115878,15.121828,-0.00595,-0.10518,59.797297,-0.013278,-0.026268,17.219999
2015-01-28,-0.009085,-0.009003,0.006423,0.043456,0.026595,15.12882,15.127619,0.001201,-0.083904,54.761905,-0.012907,-0.00514,20.440001
2015-01-29,-0.009168,-0.009085,-0.009003,0.006423,0.043456,15.199771,15.161869,0.037901,-0.059543,58.823529,0.009201,0.009566,18.76


In [None]:
output_file = f"/content/drive/MyDrive/{stock_name}_LSTM_predictions_n.csv"

start_idx =2000
# Try to resume from saved file
import os
try:
    existing_df = pd.read_csv(output_file, index_col=0)
    predictions = existing_df[f"{stock_name}_pred"].tolist()
    dates = existing_df.index.tolist()
    start_idx = start_idx + len(existing_df)
    print(f"✅ Resuming from index {start_idx}")
except FileNotFoundError:
    predictions, dates = [], []
    start_idx = start_idx
    print("🆕 Starting fresh (no saved file found)")

✅ Resuming from index 2350


In [None]:
print(start_idx)
print(len(X_all))

2350
2502


In [None]:
num_features = X_all.shape[1]
print(num_features)
for end_idx in range(start_idx, len(X_all) - 1):
    # Rolling window
    X_window = X_all.iloc[end_idx - 500:end_idx]
    y_window = y_all.iloc[end_idx - 500:end_idx]

    # Scale
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_window)

    # Build sequences
    X_seq, y_seq = [], []
    for i in range(10, 500):
        X_seq.append(X_scaled[i - time_steps:i])
        y_seq.append(y_window.iloc[i])
    X_seq, y_seq = np.array(X_seq), np.array(y_seq)


    model = Sequential([
        Input(shape=(time_steps, num_features)),
        LSTM(32, recurrent_dropout=0.1),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_seq, y_seq, epochs=10, batch_size=32, verbose=0,
              callbacks=[EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)])

    # Predict
    X_pred_input = X_scaled[-10:].reshape(1, 10, -1)
    y_pred = model.predict(X_pred_input, verbose=0)[0][0]
    predictions.append(y_pred)
    dates.append(X_all.index[end_idx + 1])

    # Save to Google Drive every 50 steps
    if (end_idx - 500 + 1) % 50 == 0:
        pd.DataFrame({f"{stock_name}_pred": predictions}, index=dates).to_csv(output_file)
        print(f"📦 Saved {len(predictions)} predictions so far for {stock_name} at index {end_idx}")
        print(f"💾 Checkpoint saved to: {output_file}")

    # Clean up memory
    from keras import backend as K
    import gc
    K.clear_session()
    del model
    gc.collect()

13




📦 Saved 400 predictions so far for AMZN at index 2399
💾 Checkpoint saved to: /content/drive/MyDrive/AMZN_LSTM_predictions_n.csv
📦 Saved 450 predictions so far for AMZN at index 2449
💾 Checkpoint saved to: /content/drive/MyDrive/AMZN_LSTM_predictions_n.csv
📦 Saved 500 predictions so far for AMZN at index 2499
💾 Checkpoint saved to: /content/drive/MyDrive/AMZN_LSTM_predictions_n.csv


In [None]:
# Save final results
final_df = pd.DataFrame({f"{stock_name}_pred": predictions}, index=dates)
final_df.to_csv(output_file)
print(f"✅ Final saved to Google Drive: {output_file}")

# Optional: download to your computer
from google.colab import files
files.download(output_file)

✅ Final saved to Google Drive: /content/drive/MyDrive/AMZN_LSTM_predictions_n.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>