In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import os
import joblib
from pandas import DataFrame as df
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [8]:
# --- 1. Load combined CSV (created in 2.1) ---
data_path = "/content/historical_prices_combined.csv"
df = pd.read_csv(data_path, index_col=[0,1])  # only works if CSV exists


In [12]:

# Strip spaces and standardize column names
df.columns = df.columns.str.strip()

# Check columns
print("Columns in CSV:", df.columns.tolist())

Columns in CSV: ['High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock_Splits', 'Symbol', 'Capital_Gains']


In [13]:

# Features for LSTM
features = ['High', 'Low', 'Close', 'Volume']  # core numeric features
processed_data = {}

In [22]:
for ticker, data_dict in processed_data.items():
    train_data = data_dict['train']
    val_data = data_dict['val']
    scaler = data_dict['scaler']  # get scaler

    def create_sequences(data, seq_length=60):
        X, y = [], []
        for i in range(seq_length, len(data)):
            X.append(data[i-seq_length:i])
            y.append(data[i, 2])  # predict 'Close'
        return np.array(X), np.array(y)

    X_train, y_train = create_sequences(train_data)
    X_val, y_val = create_sequences(val_data)

    sequence_data[ticker] = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'scaler': scaler  # add this here
    }

    print(f"{ticker} sequences created: X_train {X_train.shape}, X_val {X_val.shape}")


AAPL sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
MSFT sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
GOOGL sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
AMZN sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
TSLA sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
META sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
NVDA sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
JPM sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
V sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
JNJ sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
SPY sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
QQQ sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
VTI sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
VOO sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
IWM sequences created: X_train (944, 60, 4), X_val (192, 60, 4)
DIA sequences created: X_train (94

In [15]:
# Parameters
sequence_length = 60  # use last 60 days to predict next day

# Dictionary to store sequences per ticker
sequence_data = {}

In [16]:

for ticker, data_dict in processed_data.items():
    train_data = data_dict['train']
    val_data = data_dict['val']

    def create_sequences(data, seq_length=sequence_length):
        X, y = [], []
        for i in range(seq_length, len(data)):
            X.append(data[i-seq_length:i])  # last seq_length days
            y.append(data[i, 2])  # predict 'Close' price (index 2 in ['High','Low','Close','Volume'])
        return np.array(X), np.array(y)

    X_train, y_train = create_sequences(train_data)
    X_val, y_val = create_sequences(val_data)

    sequence_data[ticker] = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val
    }

    print(f"{ticker} sequences created:")
    print(f"  X_train: {X_train.shape}, y_train: {y_train.shape}")
    print(f"  X_val: {X_val.shape}, y_val: {y_val.shape}")

AAPL sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
MSFT sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
GOOGL sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
AMZN sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
TSLA sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
META sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
NVDA sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
JPM sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
V sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4), y_val: (192,)
JNJ sequences created:
  X_train: (944, 60, 4), y_train: (944,)
  X_val: (192, 60, 4)

In [18]:
# Parameters
num_features = 4  # ['High','Low','Close','Volume']
epochs = 50
batch_size = 32

# Directory to save trained models
os.makedirs("models/lstm", exist_ok=True)


In [24]:
for ticker, seq_data in sequence_data.items():
    X_train, y_train = seq_data['X_train'], seq_data['y_train']
    X_val, y_val = seq_data['X_val'], seq_data['y_val']

    # Build LSTM model
    model = Sequential()
    model.add(LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))  # predict next-day Close price

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # Train model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        verbose=1
    )

    # Save model reference in sequence_data
    sequence_data[ticker]['model'] = model
    sequence_data[ticker]['history'] = history


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 0.0356 - mae: 0.1396 - val_loss: 0.0138 - val_mae: 0.1041
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0017 - mae: 0.0314 - val_loss: 0.0035 - val_mae: 0.0462
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 9.7921e-04 - mae: 0.0252 - val_loss: 0.0028 - val_mae: 0.0396
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 9.2608e-04 - mae: 0.0243 - val_loss: 0.0029 - val_mae: 0.0408
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - loss: 9.2024e-04 - mae: 0.0240 - val_loss: 0.0025 - val_mae: 0.0366
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 9.1007e-04 - mae: 0.0241 - val_loss: 0.0025 - val_mae: 0.0371
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28m

In [25]:
# Base paths on your laptop
base_path = "C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System"
processed_data_path = os.path.join(base_path, "data/processed")
models_path = os.path.join(base_path, "models/lstm")

# Create folders if they don't exist
os.makedirs(processed_data_path, exist_ok=True)
os.makedirs(models_path, exist_ok=True)

# Save processed sequence data and scalers per ticker
for ticker, data in sequence_data.items():
    # --- Save sequences (X_train, y_train, X_val, y_val) ---
    seq_file = os.path.join(processed_data_path, f"sequence_data_{ticker}.npz")
    np.savez(seq_file,
             X_train=data['X_train'],
             y_train=data['y_train'],
             X_val=data['X_val'],
             y_val=data['y_val'])
    print(f"Saved sequences for {ticker} → {seq_file}")

    # --- Save scaler ---
    scaler_file = os.path.join(processed_data_path, f"scaler_{ticker}.pkl")
    joblib.dump(data['scaler'], scaler_file)
    print(f"Saved scaler for {ticker} → {scaler_file}")

    # --- Save trained LSTM model ---
    model_file = os.path.join(models_path, f"lstm_model_{ticker}.h5")
    data['model'].save(model_file)
    print(f"Saved LSTM model for {ticker} → {model_file}")

print("\nAll models and processed data saved locally!")




Saved sequences for AAPL → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/sequence_data_AAPL.npz
Saved scaler for AAPL → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/scaler_AAPL.pkl
Saved LSTM model for AAPL → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/models/lstm/lstm_model_AAPL.h5
Saved sequences for MSFT → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/sequence_data_MSFT.npz
Saved scaler for MSFT → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/scaler_MSFT.pkl
Saved LSTM model for MSFT → C:/Users/Swara/Desktop/Projects/Personalized Investme



Saved sequences for V → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/sequence_data_V.npz
Saved scaler for V → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/scaler_V.pkl
Saved LSTM model for V → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/models/lstm/lstm_model_V.h5
Saved sequences for JNJ → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/sequence_data_JNJ.npz
Saved scaler for JNJ → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/scaler_JNJ.pkl
Saved LSTM model for JNJ → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation Syste



Saved LSTM model for TLT → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/models/lstm/lstm_model_TLT.h5
Saved sequences for BTC-USD → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/sequence_data_BTC-USD.npz
Saved scaler for BTC-USD → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/scaler_BTC-USD.pkl
Saved LSTM model for BTC-USD → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/models/lstm/lstm_model_BTC-USD.h5
Saved sequences for ETH-USD → C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/processed/sequence_data_ETH-USD.npz
Saved scaler for ETH-USD → C:/Users/Swara/Desktop/Proje

In [26]:
!zip -r saved_models_data.zip /content/data/processed /content/models/lstm
from google.colab import files
files.download("saved_models_data.zip")


  adding: content/data/processed/ (stored 0%)
  adding: content/data/processed/scaler_AAPL.pkl (deflated 32%)
  adding: content/data/processed/scaler_VOO.pkl (deflated 32%)
  adding: content/data/processed/scaler_V.pkl (deflated 31%)
  adding: content/data/processed/scaler_NVDA.pkl (deflated 31%)
  adding: content/data/processed/scaler_QQQ.pkl (deflated 33%)
  adding: content/data/processed/scaler_MSFT.pkl (deflated 32%)
  adding: content/data/processed/scaler_META.pkl (deflated 31%)
  adding: content/data/processed/scaler_TLT.pkl (deflated 32%)
  adding: content/data/processed/scaler_VTI.pkl (deflated 32%)
  adding: content/data/processed/scaler_SPY.pkl (deflated 32%)
  adding: content/data/processed/scaler_BTC-USD.pkl (deflated 33%)
  adding: content/data/processed/scaler_ETH-USD.pkl (deflated 33%)
  adding: content/data/processed/scaler_GOOGL.pkl (deflated 32%)
  adding: content/data/processed/scaler_AMZN.pkl (deflated 33%)
  adding: content/data/processed/scaler_DIA.pkl (deflated 3

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Evaluation

In [27]:
!pip install pytorch-forecasting pytorch-lightning


Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.5.0-py3-none-any.whl.metadata (13 kB)
Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.6-py3-none-any.whl.metadata (20 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.5.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading pytorch_forecasting-1.5.0-py3-none-any.whl (391 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m391.5/391.5 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytorch_lightning-2.5.6-py3-none-any.whl (831 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━

In [30]:
import os
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# --- Paths ---
base_path = "C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System"
processed_data_path = os.path.join(base_path, "data/processed")
models_path = os.path.join(base_path, "models/lstm")
raw_data_path = os.path.join(base_path, "data/raw")  # CSVs stored here

# --- Parameters ---
sequence_length = 60  # same as during training
features = ['High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock_Splits', 'Capital_Gains']

tickers = ['AAPL','MSFT','GOOGL','AMZN','TSLA','META','NVDA','JPM','SPY','QQQ','VTI','VOO','IWM','DIA','GLD','TLT','BTC-USD','ETH-USD','BNB-USD']  # add more if needed

# --- Sliding window sequence creation ---
def create_sequences(data, seq_len=60):
    X, y = [], []
    for i in range(seq_len, len(data)):
        X.append(data[i-seq_len:i])
        y.append(data[i, 2])  # 'Close' column index in features
    return np.array(X), np.array(y)

# --- Evaluation loop ---
results = {}

for ticker in tickers:
    try:
        # Load raw CSV
        csv_file = os.path.join(raw_data_path, f"{ticker}.csv")
        df = pd.read_csv(csv_file)
        df = df.sort_values('Date')  # ensure chronological order

        # Select features
        df_features = df[features].values

        # Load scaler and apply
        scaler_file = os.path.join(processed_data_path, f"scaler_{ticker}.pkl")
        scaler = joblib.load(scaler_file)
        scaled_data = scaler.transform(df_features)

        # Create sequences
        X, y = create_sequences(scaled_data, sequence_length)

        # Train/val split (same as before)
        train_size = int(len(X) * 0.8)
        X_val, y_val = X[train_size:], y[train_size:]

        # Load trained model
        model_file = os.path.join(models_path, f"lstm_model_{ticker}.h5")
        model = load_model(model_file)

        # Predict
        y_pred = model.predict(X_val)

        # Inverse scale for 'Close' column
        close_index = features.index('Close')
        y_val_scaled = np.zeros((len(y_val), len(features)))
        y_val_scaled[:, close_index] = y_val
        y_val_actual = scaler.inverse_transform(y_val_scaled)[:, close_index]

        y_pred_scaled = np.zeros((len(y_pred), len(features)))
        y_pred_scaled[:, close_index] = y_pred.flatten()
        y_pred_actual = scaler.inverse_transform(y_pred_scaled)[:, close_index]

        # Metrics
        rmse = np.sqrt(mean_squared_error(y_val_actual, y_pred_actual))
        mae = mean_absolute_error(y_val_actual, y_pred_actual)
        mape = np.mean(np.abs((y_val_actual - y_pred_actual)/y_val_actual)) * 100

        results[ticker] = {'RMSE': rmse, 'MAE': mae, 'MAPE': mape}

        # Plot predicted vs actual
        plt.figure(figsize=(10,4))
        plt.plot(y_val_actual, label='Actual')
        plt.plot(y_pred_actual, label='Predicted')
        plt.title(f"{ticker} - Predicted vs Actual")
        plt.xlabel("Time")
        plt.ylabel("Close Price")
        plt.legend()
        plt.show()

        print(f"✅ {ticker} - RMSE: {rmse:.2f}, MAE: {mae:.2f}, MAPE: {mape:.2f}%")

    except Exception as e:
        print(f"[❌] Error with {ticker}: {e}")

# --- Summary table ---
results_df = pd.DataFrame(results).T
print("\nAll tickers evaluation metrics:")
print(results_df)


[❌] Error with AAPL: [Errno 2] No such file or directory: 'C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/raw/AAPL.csv'
[❌] Error with MSFT: [Errno 2] No such file or directory: 'C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/raw/MSFT.csv'
[❌] Error with GOOGL: [Errno 2] No such file or directory: 'C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/raw/GOOGL.csv'
[❌] Error with AMZN: [Errno 2] No such file or directory: 'C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/raw/AMZN.csv'
[❌] Error with TSLA: [Errno 2] No such file or directory: 'C:/Users/Swara/Desktop/Projects/Personalized Investment Recommendation System/Personalized-Investment-Recommendation-System/data/ra