This dataset from neptune.ai: https://app.neptune.ai/o/showcase/org/StockPrediction/metadata?path=&attribute=data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("/content/stock_market_data-AAPL.csv")
df.head()

Unnamed: 0,Date,Low,High,Close,Open
0,1999-11-01,77.37,80.69,77.62,80.0
1,1999-11-02,77.31,81.69,80.25,78.0
2,1999-11-03,81.0,83.25,81.5,81.62
3,1999-11-04,80.62,85.37,83.62,82.06
4,1999-11-05,84.0,88.37,88.31,84.62


In [3]:
df.shape

(5454, 5)

In [4]:
df.describe()

Unnamed: 0,Low,High,Close,Open
count,5454.0,5454.0,5454.0,5454.0
mean,172.681087,176.702928,174.747033,174.761299
std,156.094862,158.912581,157.542041,157.608887
min,12.72,13.19,13.12,12.99
25%,62.885,65.0,63.885,64.2075
50%,122.24,126.03,124.39,124.145
75%,213.2275,219.42,215.8875,216.29
max,699.57,705.07,702.1,702.41


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5454 entries, 0 to 5453
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    5454 non-null   object 
 1   Low     5454 non-null   float64
 2   High    5454 non-null   float64
 3   Close   5454 non-null   float64
 4   Open    5454 non-null   float64
dtypes: float64(4), object(1)
memory usage: 213.2+ KB


In [6]:
test_ratio = 0.2
train_ratio = 1 - test_ratio

# Calculate the split point
split_point = int(len(df) * train_ratio)

# Split the data
train = df.iloc[:split_point][["Close"]]
test = df.iloc[split_point:][["Close"]]

print(f"Train size: {len(train)}")
print(f"Test size: {len(test)}")

Train size: 4363
Test size: 1091


In [7]:
import plotly.graph_objects as go
import pandas as pd

df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')  # Set Date as the index

# Create the figure
fig = go.Figure()

# Add the line
fig.add_trace(go.Scatter(
    x=df.index,
    y=df['Close'],
    mode="lines",
    name="Close Price",
    line=dict(color="blue"),
    hovertemplate="Date: %{x|%Y-%m-%d}<br>Close: %{y:.2f}<extra></extra>"
))

# Update the layout
fig.update_layout(
    title="Apple Stock Closing Price (1999-2021)",
    xaxis_title="Date",
    yaxis_title="Close Price",
    width=800,
    height=400,
    hovermode="x unified"
)

# Show the plot
fig.show()

In [8]:
def seqX_outY(data, N, offset):
  X = []
  y = []

  for i in range(offset, len(data)):
    X.append(data[i - N : i])
    y.append(data[i])

  return np.array(X), np.array(y)

In [9]:
def calculate_rmse(y_true, y_pred):
  rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
  return rmse

def calculate_mape(y_true, y_pred):
  y_pred, y_true = np.array(y_pred), np.array(y_true)
  mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
  return mape



In [10]:
def calculate_perf_metrics(df, split_point, var):
    ### RMSE
    rmse = calculate_rmse(
        np.array(df.iloc[split_point:]["Close"]),
        np.array(df.iloc[split_point:][var]),
    )
    ### MAPE
    mape = calculate_mape(
        np.array(df.iloc[split_point:]["Close"]),
        np.array(df.iloc[split_point:][var]),
    )
    print(f"RMSE: {rmse}")
    print(f"MAPE: {mape}")
    return rmse, mape

In [11]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_stock_trend(var, cur_title, stockprices=df):
    # Create figure
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Add traces
    fig.add_trace(
        go.Scatter(x=stockprices.index, y=stockprices["Close"], name="Close"),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(x=stockprices.index, y=stockprices[var], name=var),
        secondary_y=True,
    )

    fig.add_trace(
        go.Scatter(x=stockprices.index, y=stockprices["200day"], name="200 day MA"),
        secondary_y=False,
    )

    # Set x-axis title
    fig.update_xaxes(title_text="Date")

    # Set y-axes titles
    fig.update_yaxes(title_text="Stock Price ($)", secondary_y=False)
    fig.update_yaxes(title_text=var, secondary_y=True)

    # Update layout
    fig.update_layout(
        title_text=cur_title,
        width=1200,
        height=600,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )

    # Show plot
    fig.show()

# **Simple Moving Average**

In [12]:
window_size = 50

window_var = f"{window_size}day"

df[window_var] = df["Close"].rolling(window_size).mean()

### Include a 200-day SMA for reference
df["200day"] = df["Close"].rolling(200).mean()

### Plot and performance metrics for SMA model
plot_stock_trend(var=window_var, cur_title="Simple Moving Averages")
rmse_sma, mape_sma = calculate_perf_metrics(df=df, split_point=split_point, var=window_var)


RMSE: 43.76848947969163
MAPE: 12.529053089133422


# **Exponential Moving Average**

In [13]:
window_ema_var = f"{window_var}_EMA"

# Calculate the 50-day exponentially weighted moving average
df[window_ema_var] = (
    df["Close"].ewm(span=window_size, adjust=False).mean()
)
df["200day"] = df["Close"].rolling(200).mean()

### Plot and performance metrics for EMA model
plot_stock_trend(
    var=window_ema_var, cur_title="Exponential Moving Averages")
rmse_ema, mape_ema = calculate_perf_metrics(df=df, split_point=split_point, var=window_ema_var)

RMSE: 36.67593508712043
MAPE: 10.71389916884245


# **LSTM**

In [14]:
layer_units = 50
optimizer = "adam"
cur_epochs = 15
cur_batch_size = 20

cur_LSTM_args = {
    "units": layer_units,
    "optimizer": optimizer,
    "batch_size": cur_batch_size,
    "epochs": cur_epochs,
}


In [15]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[["Close"]])
scaled_data_train = scaled_data[: train.shape[0]]

X_train, y_train = seqX_outY(scaled_data_train, window_size, window_size)


In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import plotly.graph_objs as go

# Define the LSTM model in PyTorch
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size)

    def forward(self, x):
        h_0 = torch.zeros(1, x.size(0), self.hidden_layer_size).requires_grad_()
        c_0 = torch.zeros(1, x.size(0), self.hidden_layer_size).requires_grad_()

        lstm_out, _ = self.lstm(x, (h_0.detach(), c_0.detach()))
        predictions = self.linear(lstm_out[:, -1])
        return predictions

# Convert numpy data to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train).float()
y_train_tensor = torch.from_numpy(y_train).float()

# Instantiate the model, loss function, and optimizer
model = LSTMModel(input_size=1, hidden_layer_size=layer_units, output_size=1)
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = cur_epochs
batch_size = cur_batch_size
train_loss = []

for epoch in range(epochs):
    epoch_loss = 0
    for i in range(0, len(X_train_tensor), batch_size):
        X_batch = X_train_tensor[i:i + batch_size]
        y_batch = y_train_tensor[i:i + batch_size]

        optimizer.zero_grad()
        model_output = model(X_batch)
        loss = loss_function(model_output, y_batch)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    epoch_loss /= (len(X_train_tensor) / batch_size)
    train_loss.append(epoch_loss)
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}')

# Plot training loss using Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(epochs)), y=train_loss, mode='lines', name='Training Loss'))
fig.update_layout(title='Training Loss Over Epochs', xaxis_title='Epoch', yaxis_title='Loss')
fig.show()


Epoch 1/15, Loss: 0.3019
Epoch 2/15, Loss: 0.2149
Epoch 3/15, Loss: 0.0490
Epoch 4/15, Loss: 0.0353
Epoch 5/15, Loss: 0.0192
Epoch 6/15, Loss: 0.0119
Epoch 7/15, Loss: 0.0115
Epoch 8/15, Loss: 0.0095
Epoch 9/15, Loss: 0.0089
Epoch 10/15, Loss: 0.0083
Epoch 11/15, Loss: 0.0080
Epoch 12/15, Loss: 0.0078
Epoch 13/15, Loss: 0.0076
Epoch 14/15, Loss: 0.0074
Epoch 15/15, Loss: 0.0072


In [17]:
# predict stock prices using past window_size stock prices
def preprocess_testdata(data=df, scaler=scaler, window_size=window_size, test=test):
    raw = data["Close"][len(data) - len(test) - window_size:].values
    raw = raw.reshape(-1,1)
    raw = scaler.transform(raw)

    X_test = [raw[i-window_size:i, 0] for i in range(window_size, raw.shape[0])]
    X_test = np.array(X_test)

    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    return X_test

X_test = preprocess_testdata()
X_test_tensor = torch.Tensor(X_test)

model.eval() # Set the model to evaluation mode
with torch.no_grad(): # Disable gradient calculations as we are making predictions
    predicted_price_ = model(X_test_tensor)

predicted_price_ = predicted_price_.numpy() # Convert to numpy array
predicted_price = scaler.inverse_transform(predicted_price_)

# Plot predicted price vs actual closing price
test["Predictions_lstm"] = predicted_price


X does not have valid feature names, but StandardScaler was fitted with feature names



In [19]:
import pandas as pd
import plotly.graph_objs as go

# Evaluate performance
rmse_lstm = calculate_rmse(np.array(test["Close"]), np.array(test["Predictions_lstm"]))
mape_lstm = calculate_mape(np.array(test["Close"]), np.array(test["Predictions_lstm"]))
# Print performance metrics
print(f"RMSE: {rmse_lstm:.2f}")
print(f"MAPE: {mape_lstm:.2f}%")

# Assuming the start date of your dataset
start_date = "1999-11-01"
end_date = "2021-07-09"

# Generate the full date range for the entire dataset
all_dates = pd.date_range(start=start_date, end=end_date, freq='B')  # 'B' frequency assumes business days

# Ensure the dates match the length of your train and test data
dates_train = all_dates[:len(train)]
dates_test = all_dates[len(train):len(train) + len(test)]

# Assign these date ranges to your DataFrames
train.index = dates_train
test.index = dates_test

# Plotting function for LSTM predictions
def plot_stock_trend_lstm(train, test):
    fig = go.Figure()

    # Add traces for training, testing, and predicted closing prices
    fig.add_trace(go.Scatter(x=train.index, y=train["Close"], mode='lines', name='Train Closing Price'))
    fig.add_trace(go.Scatter(x=test.index, y=test["Close"], mode='lines', name='Test Closing Price'))
    fig.add_trace(go.Scatter(x=test.index, y=test["Predictions_lstm"], mode='lines', name='Predicted Closing Price'))

    # Update layout to add titles and axis labels
    fig.update_layout(
        title="LSTM Model",
        xaxis_title="Date",
        yaxis_title="Stock Price ($)",
        legend=dict(x=0, y=1)
    )

    # Show the plot
    fig.show()

# Call the function to plot
plot_stock_trend_lstm(train, test)


RMSE: 15.62
MAPE: 2.89%
