## Imports 

In [3]:
import numpy as np
import keras
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import tensorflow as tf
from datetime import datetime, timedelta
import os

## Getting data from yfinance

In [None]:
ticker = 'AAPL'
stockDataName = f'{ticker}_StockData.csv'
end_date = datetime.today()
# print(end_date)
start_date = end_date - timedelta(days=365 * 5)
# print(start_date)
data = yf.download(ticker, start=start_date, end=end_date)
output_path = os.path.join(os.getcwd(), 'dataset', stockDataName)
data.to_csv(output_path)

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


## Cleaning data

In [19]:
# # Load data and parse dates
# df = pd.read_csv(f'dataset/{stockDataName}', skiprows=[1, 2], parse_dates=['Price'])

# # Rename 'Price' to 'Date'
# df = df.rename(columns={'Price': 'Date'})
# df = df.set_index('Date')
df = pd.read_csv(f'dataset/AAPL_CleanedStockData.csv', parse_dates=['Date'])
df = df.set_index('Date')

print(df.head())  # Check the first few rows
print(df.isnull().sum())  # Check for missing values

# df = df.drop('Volume', axis=1)  # Drop the 'Volume' column

# df['Range'] = df['High'] - df['Low']  # Create a new column 'Range'
# df = df.drop(['High', 'Low'], axis=1)  # Drop the 'High' and 'Low' columns

# Calculate the 20-day moving average
# df['MA-20'] = df['Close'].rolling(window=20).mean()
# # Calculate the 50-day moving average
# df['MA-50'] = df['Close'].rolling(window=50).mean()
df = df.dropna()  # Drop rows with missing values


# Fill missing values

# Generate a complete date range and forward-fill missing values
df = df.asfreq('D', method='ffill')  # Forward-fill prices on non-trading days
df = df[~df.index.duplicated(keep='first')]  # Remove duplicate dates

# Save cleaned data to a new file
cleanedStockDataName = f'{ticker}_CleanedStockData.csv'
df.to_csv(f'dataset/{cleanedStockDataName}')

               Close      Open     Range     MA-20     MA-50
Date                                                        
1995-07-18  0.359957  0.366501  0.013557  0.357058  0.335771
1995-07-19  0.340323  0.351542  0.022438  0.356357  0.336536
1995-07-20  0.352010  0.344062  0.017764  0.355492  0.337423
1995-07-21  0.327233  0.321624  0.014024  0.353482  0.337786
1995-07-22  0.327233  0.321624  0.014024  0.353482  0.337786
Close    0
Open     0
Range    0
MA-20    0
MA-50    0
dtype: int64


## Defining the function to build the model

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dropout, BatchNormalization,
    AdditiveAttention, Dense, Flatten, Permute, Reshape, Multiply
)
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error


features = ['Close', 'Open', 'High', 'Low', 'Volume', 'MA20', 'MA50']


def train_lstm_model(
    csv_path,
    sequence_length=100,
    batch_size=25,
    epochs=100,
    validation_split=0.2,
    patience=10
):
    """
    Trains an LSTM model with attention mechanism on stock Close price data.

    Args:
        csv_path (str): Path to the cleaned CSV file.
        sequence_length (int): Number of days to use for each input sequence.
        batch_size (int): Batch size for model training.
        epochs (int): Number of epochs to train.
        validation_split (float): Fraction of training data to use for validation.
        patience (int): Number of epochs to wait for EarlyStopping.

    Returns:
        model (tf.keras.Model): Trained Keras model.
        history (History object): Training history.
        scaler (MinMaxScaler): Fitted scaler (for inverse transforming predictions later).
    """
    # --- Load and Preprocess Data ---
    data = pd.read_csv(csv_path)
    data = data.drop(columns=['Date'])

    # Scale the 'Close' price
    scaler = MinMaxScaler(feature_range=(0, 1))
    close_scaled = scaler.fit_transform(data['Close'].values.reshape(-1, 1))

    # Create sequences
    X, y = [], []
    for i in range(sequence_length, len(close_scaled)):
        X.append(close_scaled[i-sequence_length:i, 0])
        y.append(close_scaled[i, 0])

    X, y = np.array(X), np.array(y)

    # Train/Test Split
    train_size = int(0.8 * len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Reshape input for LSTM
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # --- Build Model ---
    input_layer = Input(shape=(X_train.shape[1], 1))

    x = LSTM(50, return_sequences=True)(input_layer)
    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    x = LSTM(50, return_sequences=True)(x)

    # Attention mechanism
    permute = Permute((2, 1))(x)
    reshape = Reshape((50, X_train.shape[1]))(permute)
    attention = AdditiveAttention(name='attention_weight')([reshape, reshape])
    attention = Permute((2, 1))(attention)
    attention = Reshape((X_train.shape[1], 50))(attention)

    x = Multiply()([x, attention])
    x = Flatten()(x)
    output = Dense(1)(x)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error')

    # --- Callbacks ---
    early_stopping = EarlyStopping(
        monitor='val_loss', patience=patience, restore_best_weights=True)
    csv_logger = CSVLogger('training_log.csv')

    # --- Train Model ---
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        callbacks=[early_stopping, csv_logger]
    )

    # --- Evaluate Model ---
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and actual values
    y_pred_real = scaler.inverse_transform(y_pred)
    y_test_real = scaler.inverse_transform(y_test.reshape(-1, 1))

    # ERROR metrics
    # mae = mean_absolute_error(y_test, y_pred)
    # rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae_real = mean_absolute_error(y_test_real, y_pred_real)
    # rmse_real = mean_squared_error(y_test_real, y_pred_real, squared=False)
    rmse_real = np.sqrt(mean_squared_error(y_test_real, y_pred_real))

    print(f"Real MAE: ${mae_real:.2f}")
    print(f"Real RMSE: ${rmse_real:.2f}")

    # print(f"\nTest MAE: {mae:.5f}")
    # print(f"Test RMSE: {rmse:.5f}")

    return model, history, scaler


def plot_training_history(history, save_path=None):
    """
    Plots the training and validation loss from a Keras model history.

    Args:
        history: History object returned by model.fit()
        save_path (str, optional): If provided, saves the plot to this filepath.
    """
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
    plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
    plt.title('Training vs Validation Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid(True)

    if save_path:
        plt.savefig(save_path, bbox_inches='tight')  # Save the plot
        print(f"Training history plot saved to {save_path}")

    plt.show()

SyntaxError: invalid syntax (3879831280.py, line 117)

## Building the model

In [37]:
print(cleanedStockDataName)
model, history, scaler = train_lstm_model(f'dataset/AAPL_CleanedStockData.csv',)
plot_training_history(history, save_path='training_history.png')
model.save('models/my_lstm_model.h5')  # or use .keras

AAPL_CleanedStockData.csv
X shape: (10778, 100, 5)
Epoch 1/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 33ms/step - loss: 9.8584e-05 - val_loss: 0.0113
Epoch 2/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 32ms/step - loss: 2.3326e-05 - val_loss: 0.0051
Epoch 3/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - loss: 2.4111e-05 - val_loss: 0.0029
Epoch 4/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - loss: 1.3051e-05 - val_loss: 0.0036
Epoch 5/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - loss: 1.3049e-05 - val_loss: 0.0014
Epoch 6/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - loss: 9.4109e-06 - val_loss: 0.0012
Epoch 7/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - loss: 5.6955e-06 - val_loss: 0.0011
Epoch 8/100
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

ValueError: non-broadcastable output operand with shape (2156,1) doesn't match the broadcast shape (2156,5)

In [36]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, LSTM, Dropout, BatchNormalization,
    AdditiveAttention, Dense, Flatten, Permute, Reshape, Multiply
)
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error


features = ['Close', 'Open', 'Range', 'MA20', 'MA50']


def train_lstm_model(
    csv_path,
    sequence_length=100,
    batch_size=25,
    epochs=100,
    validation_split=0.2,
    patience=10
):
    """
    Trains an LSTM model with attention mechanism on stock Close price data.

    Args:
        csv_path (str): Path to the cleaned CSV file.
        sequence_length (int): Number of days to use for each input sequence.
        batch_size (int): Batch size for model training.
        epochs (int): Number of epochs to train.
        validation_split (float): Fraction of training data to use for validation.
        patience (int): Number of epochs to wait for EarlyStopping.

    Returns:
        model (tf.keras.Model): Trained Keras model.
        history (History object): Training history.
        scaler (MinMaxScaler): Fitted scaler (for inverse transforming predictions later).
    """
    # --- Load and Preprocess Data ---
    data = pd.read_csv(csv_path)
    data = data.drop(columns=['Date'])

    # Scale the 'Close' price
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data[features])

    # Create sequences
    X, y = [], []
    for i in range(sequence_length, len(data_scaled)):
        X.append(data_scaled[i-sequence_length:i])  # (100, 5)
        y.append(data_scaled[i, 0])

    X, y = np.array(X), np.array(y)

    print("X shape:", X.shape)  # should be (samples, 100, 5)

    # Train/Test Split
    train_size = int(0.8 * len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]



    # --- Build Model ---
    input_layer = Input(shape=(X_train.shape[1], X_train.shape[2]))

    x = LSTM(50, return_sequences=True)(input_layer)
    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    x = LSTM(50, return_sequences=True)(x)

    # Attention mechanism
    permute = Permute((2, 1))(x)
    reshape = Reshape((50, X_train.shape[1]))(permute)
    attention = AdditiveAttention(name='attention_weight')([reshape, reshape])
    attention = Permute((2, 1))(attention)
    attention = Reshape((X_train.shape[1], 50))(attention)

    x = Multiply()([x, attention])
    x = Flatten()(x)
    output = Dense(1)(x)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error')

    # --- Callbacks ---
    early_stopping = EarlyStopping(
        monitor='val_loss', patience=patience, restore_best_weights=True)
    csv_logger = CSVLogger('training_log.csv')

    # --- Train Model ---
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        callbacks=[early_stopping, csv_logger]
    )

    # --- Evaluate Model ---
    y_pred = model.predict(X_test)

    # Inverse transform the predictions and actual values
    y_pred_real = scaler.inverse_transform(y_pred)
    y_test_real = scaler.inverse_transform(y_test.reshape(-1, 1))

    # ERROR metrics
    # mae = mean_absolute_error(y_test, y_pred)
    # rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae_real = mean_absolute_error(y_test_real, y_pred_real)
    # rmse_real = mean_squared_error(y_test_real, y_pred_real, squared=False)
    rmse_real = np.sqrt(mean_squared_error(y_test_real, y_pred_real))

    print(f"Real MAE: ${mae_real:.2f}")
    print(f"Real RMSE: ${rmse_real:.2f}")

    # print(f"\nTest MAE: {mae:.5f}")
    # print(f"Test RMSE: {rmse:.5f}")

    return model, history, scaler


def plot_training_history(history, save_path=None):
    """
    Plots the training and validation loss from a Keras model history.

    Args:
        history: History object returned by model.fit()
        save_path (str, optional): If provided, saves the plot to this filepath.
    """
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
    plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
    plt.title('Training vs Validation Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid(True)

    if save_path:
        plt.savefig(save_path, bbox_inches='tight')  # Save the plot
        print(f"Training history plot saved to {save_path}")

    plt.show()