# Predict apple stock evolution using LSTM 

## libraries needed


In [15]:
from datetime import datetime
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import yfinance as yf
import matplotlib.pyplot as plt

## data 


### data laoding and visualization

In [16]:
# Load data
def load_data(ticker):
    end_date = datetime.now().date()
    data = yf.download('AAPL', start='2010-01-01', end=end_date)
    data.reset_index(inplace=True)
    return data

# Fetch Apple stock data
data = load_data('AAPL')

[*********************100%%**********************]  1 of 1 completed


In [17]:
# Display raw data
print("Raw Data:")
print(data.tail())

Raw Data:
           Date        Open        High         Low       Close   Adj Close  \
3742 2024-11-14  225.020004  228.869995  225.000000  228.220001  228.220001   
3743 2024-11-15  226.399994  226.919998  224.270004  225.000000  225.000000   
3744 2024-11-18  225.250000  229.740005  225.169998  228.020004  228.020004   
3745 2024-11-19  226.979996  230.160004  226.660004  228.279999  228.279999   
3746 2024-11-20  228.059998  229.929993  225.889999  229.000000  229.000000   

        Volume  
3742  44923900  
3743  47923700  
3744  44686000  
3745  36211800  
3746  35112500  


In [18]:
# Plot raw data
def plot_raw_data(data):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data['Date'], y=data['Open'], name="stock_open"))
    fig.add_trace(go.Scatter(x=data['Date'], y=data['Close'], name="stock_close"))
    fig.layout.update(title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True)
    fig.show()

plot_raw_data(data)

### data preprocessing

In [None]:

close_prices = data['Close'].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(close_prices)

In [None]:

sequence_length = 60
X, y = [], []
for i in range(sequence_length, len(scaled_prices)):
    X.append(scaled_prices[i-sequence_length:i, 0])
    y.append(scaled_prices[i, 0])
X, y = np.array(X), np.array(y)
X = X.reshape((X.shape[0], X.shape[1], 1))

In [21]:
import pandas as pd

# Ensure the index is in datetime format
if not isinstance(data.index, pd.DatetimeIndex):
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])  # Convert the date column to datetime
        data.set_index('Date', inplace=True)  # Set the datetime column as the index

# Calculate the test size dynamically as the last quintile (10%)
test_size = int(len(data) * 0.1)

# Define the start index for the test set
test_start_index = len(data) - test_size

# Get the test set index (dates) for slicing
test_index = data.index[test_start_index:]

# Get the first test set observation date
first_test_date = test_index[0]
print(f"The first observation date in the test set is: {first_test_date}")

# Split the test set
X_test, y_test = X[-test_size:], y[-test_size:]

# Remaining data for training and validation
X_train_val, y_train_val = X[:-test_size], y[:-test_size]

# Define train and validation split sizes (80% training, 20% validation)
train_size = int(len(X_train_val) * 0.8)

# Training and validation splits
X_train, y_train = X_train_val[:train_size], y_train_val[:train_size]
X_val, y_val = X_train_val[train_size:], y_train_val[train_size:]

# Get the date indices for training and validation sets
train_index = data.index[:train_size]
val_index = data.index[train_size:len(data) - test_size]

# Print Shapes for Verification
print(f"Train set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}")



The first observation date in the test set is: 2023-05-30 00:00:00
Train set: (2650, 60, 1), Validation set: (663, 60, 1), Test set: (374, 60, 1)


## model building 

In [22]:
from tensorflow.keras.optimizers import Adam

model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

# Define the Adam optimizer
adam_optimizer = Adam(
    learning_rate=0.001,  # Set your desired learning rate
    beta_1=0.9,          # First moment decay rate
    beta_2=0.999,        # Second moment decay rate
    epsilon=1e-07        # Small value to prevent division by zero
)
model.compile(loss='mse', 
              optimizer=adam_optimizer,
              metrics=['mean_absolute_error'])

# Print the model summary
model.summary()



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [23]:

#Train the model
model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 38ms/step - loss: 0.0027 - mean_absolute_error: 0.0314 - val_loss: 9.6424e-04 - val_mean_absolute_error: 0.0256
Epoch 2/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - loss: 2.6494e-04 - mean_absolute_error: 0.0093 - val_loss: 6.1748e-04 - val_mean_absolute_error: 0.0204
Epoch 3/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 47ms/step - loss: 1.9794e-04 - mean_absolute_error: 0.0085 - val_loss: 6.7934e-04 - val_mean_absolute_error: 0.0206
Epoch 4/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - loss: 1.5523e-04 - mean_absolute_error: 0.0078 - val_loss: 0.0012 - val_mean_absolute_error: 0.0285
Epoch 5/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 1.7824e-04 - mean_absolute_error: 0.0080 - val_loss: 5.5571e-04 - val_mean_absolute_error: 0.0192
Epoch 6/10
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x1426e8c10>

## evaluation

In [24]:
# Step 7: Evaluate the Model on the Test Set
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.9397e-04 - mean_absolute_error: 0.0178
Test Loss: [0.0006366003071889281, 0.01985158957540989]


In [25]:
import plotly.graph_objs as go
import pandas as pd
import numpy as np

# Ensure a date index or column exists
if 'Date' in data.columns:
    data['Date'] = pd.to_datetime(data['Date'])  # Convert 'Date' column to datetime
    data.set_index('Date', inplace=True)  # Set 'Date' as index
elif isinstance(data.index, pd.DatetimeIndex):
    print("The index is already a datetime object.")
else:
    raise KeyError("No 'Date' column or datetime index is present. Check your dataset.")

# Predictions for the test set
predictions_test = model.predict(X_test)

# Inverse transform predictions and actual test values
scaled_zeros = np.zeros((len(predictions_test), 1))  # Ensure one column for 'Close'
scaled_zeros[:, 0] = predictions_test.flatten()  # Place predictions in the first column
predictions_test = scaler.inverse_transform(scaled_zeros).flatten()  # Inverse transform and flatten

scaled_zeros_actual = np.zeros((len(y_test), 1))  # Ensure one column for 'Close'
scaled_zeros_actual[:, 0] = y_test.flatten()  # Place actual values in the first column
y_test_scaled = scaler.inverse_transform(scaled_zeros_actual).flatten()  # Inverse transform and flatten

# Compute test_start_index and extract test dates
test_dates = data.index[test_start_index:]

# Plot Actual vs. Predicted Prices for the Test Set
trace_actual_test = go.Scatter(
    x=test_dates,
    y=y_test_scaled,
    mode='lines',
    name='Actual Test Prices',
    line=dict(color='blue')
)

trace_predicted_test = go.Scatter(
    x=test_dates,
    y=predictions_test,
    mode='lines',
    name='Predicted Test Prices',
    line=dict(color='red')
)

evaluation_layout = go.Layout(
    title='Evaluation: Actual vs. Predicted Prices on Test Set',
    xaxis=dict(title='Date', tickformat='%b %d, %Y', showgrid=True, gridcolor='lightgray'),
    yaxis=dict(title='Stock Price', showgrid=True, gridcolor='lightgray'),
    legend=dict(x=0, y=1),
    plot_bgcolor='white'
)

evaluation_fig = go.Figure(data=[trace_actual_test, trace_predicted_test], layout=evaluation_layout)
evaluation_fig.show()



The index is already a datetime object.
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step


In [26]:
import plotly.graph_objs as go
import numpy as np

# Inverse transform actual and predicted values
real_close_prices = scaler.inverse_transform(y.reshape(-1, 1)).flatten()

# Predictions
train_predictions = model.predict(X_train).flatten()
val_predictions = model.predict(X_val).flatten()
test_predictions = model.predict(X_test).flatten()

# Inverse transform predictions
train_predictions = scaler.inverse_transform(train_predictions.reshape(-1, 1)).flatten()
val_predictions = scaler.inverse_transform(val_predictions.reshape(-1, 1)).flatten()
test_predictions = scaler.inverse_transform(test_predictions.reshape(-1, 1)).flatten()

# Define date ranges for each dataset
train_dates = data.index[:len(y_train)]
val_dates = data.index[len(y_train):len(y_train) + len(y_val)]
test_dates = data.index[len(y_train) + len(y_val):]

# Plot Real Data
trace_real = go.Scatter(
    x=data.index,
    y=real_close_prices,
    mode='lines',
    name='Real Closing Prices',
    line=dict(color='blue', width=2),
    hoverinfo='x+y',
)

# Plot Training Predictions
trace_train = go.Scatter(
    x=train_dates,
    y=train_predictions,
    mode='lines',
    name='Training Predictions',
    line=dict(color='green', width=2),
    hoverinfo='x+y',
)

# Plot Validation Predictions
trace_val = go.Scatter(
    x=val_dates,
    y=val_predictions,
    mode='lines',
    name='Validation Predictions',
    line=dict(color='orange', width=2),
    hoverinfo='x+y',
)

# Plot Test Predictions
trace_test = go.Scatter(
    x=test_dates,
    y=test_predictions,
    mode='lines',
    name='Test Predictions',
    line=dict(color='red', width=2),
    hoverinfo='x+y',
)

# Layout customization
layout = go.Layout(
    title=dict(
        text='Real and Predicted Closing Prices',
        x=0.5,  # Center title
        font=dict(size=20, color='black'),
    ),
    xaxis=dict(
        title='Date',
        tickformat='%b %d, %Y',
        showgrid=True,
        gridcolor='lightgray',
    ),
    yaxis=dict(
        title='Stock Price',
        showgrid=True,
        gridcolor='lightgray',
    ),
    legend=dict(
        x=0.01, y=0.99,
        bgcolor='rgba(255, 255, 255, 0.8)',
        bordercolor='lightgray',
        borderwidth=1,
    ),
    plot_bgcolor='white',
)

# Combine all traces into a single figure
fig = go.Figure(data=[trace_real, trace_train, trace_val, trace_test], layout=layout)

# Show the figure
fig.show()


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


### saving the model


In [27]:
# Save the model to a Keras-compatible .h5 file
model.save('lstm_apple_stock_model.h5')
print("Model saved successfully to 'lstm_apple_stock_model.h5'!")




Model saved successfully to 'lstm_apple_stock_model.h5'!


## forcasting 

In [28]:
from datetime import datetime
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from pandas.tseries.offsets import BDay
from tensorflow.keras.metrics import MeanSquaredError
# Step 1: Load the trained model
model = load_model('lstm_apple_stock_model.h5',custom_objects={'mse': MeanSquaredError()})
print("Model loaded successfully!")

# Step 2: Fetch historical and real-time data
symbol = 'AAPL'
start_date = '2023-11-01'
end_date = datetime.now().date()

real_time_data = yf.download(symbol, start=start_date, end=end_date)
real_close_prices = real_time_data['Close'].values.reshape(-1, 1)

# Step 3: Scale the data using the same scaler used during training
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(real_close_prices)
scaled_prices = scaler.transform(real_close_prices)

# Step 4: Forecast future prices, skipping weekends
forecast_days = 10
sequence_length = 60
forecast_start_sequence = scaled_prices[-sequence_length:].reshape(1, sequence_length, 1)

forecasted_prices = []
forecast_dates = []

last_date = real_time_data.index[-1]  # Start from the last available date

for _ in range(forecast_days):
    predicted_price = model.predict(forecast_start_sequence)
    forecasted_prices.append(predicted_price[0, 0])
    next_date = last_date + BDay(1)  # Skip weekends
    forecast_dates.append(next_date)
    last_date = next_date
    predicted_price_reshaped = np.reshape(predicted_price, (1, 1, 1))
    forecast_start_sequence = np.append(forecast_start_sequence[:, 1:, :], predicted_price_reshaped, axis=1)

forecasted_prices = scaler.inverse_transform(np.array(forecasted_prices).reshape(-1, 1))

# Step 5: Enhanced Interactive Visualization

# Historical Prices
trace_historical = go.Scatter(
    x=real_time_data.index,
    y=scaler.inverse_transform(scaled_prices).flatten(),
    mode='lines',
    name='Historical Prices',
    line=dict(color='blue', width=2),
    hoverinfo='x+y',
    text="Historical Price",
)

# Forecasted Prices
trace_forecasted = go.Scatter(
    x=forecast_dates,
    y=forecasted_prices.flatten(),
    mode='lines+markers',
    name='Forecasted Prices (Future)',
    line=dict(color='orange', width=2, dash='dot'),
    marker=dict(size=10, color='red'),
    hoverinfo='x+y',
    text="Forecasted Price",
)

# Layout customization
layout = go.Layout(
    title=dict(
        text=f'Interactive {symbol} Stock Price: Predictions and Forecasts',
        x=0.5,  # Center title
        font=dict(size=20, color='black')
    ),
    xaxis=dict(
        title='Date',
        tickformat='%b %d, %Y',
        showgrid=True,
        gridcolor='lightgray',
        rangeslider=dict(visible=True)
    ),
    yaxis=dict(
        title='Stock Price (USD)',
        showgrid=True,
        gridcolor='lightgray'
    ),
    legend=dict(
        x=0.01, y=0.99,
        bgcolor='rgba(255, 255, 255, 0.8)',
        bordercolor='lightgray',
        borderwidth=1
    ),
    plot_bgcolor='white'
)

# Combine traces into figure
fig = go.Figure(data=[trace_historical, trace_forecasted], layout=layout)
fig.show()

# Display forecasted prices
print("\nForecasted Prices (Next 10 Business Days):")
for date, price in zip(forecast_dates, forecasted_prices.flatten()):
    print(f"Date: {date.date()}, Forecasted Price: ${price:.2f}")



[*********************100%%**********************]  1 of 1 completed


Model loaded successfully!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 470ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step



Forecasted Prices (Next 10 Business Days):
Date: 2024-11-21, Forecasted Price: $225.44
Date: 2024-11-22, Forecasted Price: $225.74
Date: 2024-11-25, Forecasted Price: $225.87
Date: 2024-11-26, Forecasted Price: $225.90
Date: 2024-11-27, Forecasted Price: $225.85
Date: 2024-11-28, Forecasted Price: $225.76
Date: 2024-11-29, Forecasted Price: $225.64
Date: 2024-12-02, Forecasted Price: $225.49
Date: 2024-12-03, Forecasted Price: $225.34
Date: 2024-12-04, Forecasted Price: $225.18
