In [22]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

In [23]:
# Load the historical data
data = pd.read_csv('historical_data.csv')

In [24]:
# Filter the required columns and sort by date and ticker
data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Ticker']]
data['Date'] = pd.to_datetime(data['Date'])
data.sort_values(['Ticker', 'Date'], inplace=True)

In [25]:
# Encode the Ticker column
encoder = OneHotEncoder(sparse=False)
tickers_encoded = encoder.fit_transform(data[['Ticker']])



In [26]:
# Add the encoded tickers as additional features
encoded_columns = [f'Ticker_{i}' for i in range(tickers_encoded.shape[1])]
encoded_df = pd.DataFrame(tickers_encoded, columns=encoded_columns, index=data.index)
data = pd.concat([data.reset_index(drop=True), encoded_df], axis=1)

In [27]:
# Scale numerical features (Open, High, Low, Close, Volume)
scaler = MinMaxScaler(feature_range=(0, 1))
numerical_features = ['Open', 'High', 'Low', 'Close', 'Volume']
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [28]:
# Prepare data for LSTM
def preprocess_data(data, n_steps=10):
    X, y = [], []
    feature_columns = numerical_features + encoded_columns
    for ticker in data['Ticker'].unique():
        ticker_data = data[data['Ticker'] == ticker]
        ticker_data = ticker_data[feature_columns].values
        for i in range(n_steps, len(ticker_data)):
            X.append(ticker_data[i - n_steps:i, :])  # Last n_steps rows as features
            y.append(ticker_data[i, 3])  # Predict 'Close' price
    return np.array(X), np.array(y)

In [29]:
# Define the number of timesteps
n_steps = 10


In [30]:
# Preprocess the data
X, y = preprocess_data(data, n_steps=n_steps)

In [31]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)  # Output layer for predicting 'Close' price
])

In [33]:
# Compile the model
model.compile(optimizer='adam', loss='mse')

In [34]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [35]:
# Evaluate the model on the test set
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")


Test Loss: 7.024426304269582e-05


In [36]:
# Make predictions
predictions = model.predict(X_test)



In [37]:
# Inverse scale the predictions and actual values
y_test_scaled = scaler.inverse_transform(
    np.concatenate((np.zeros((y_test.shape[0], 4)), y_test.reshape(-1, 1)), axis=1))[:, -1]
predictions_scaled = scaler.inverse_transform(
    np.concatenate((np.zeros((predictions.shape[0], 4)), predictions), axis=1))[:, -1]

In [38]:
mae = mean_absolute_error(predictions, y_test)
mape = mean_absolute_percentage_error(predictions, y_test)

# Define a percentage threshold for accuracy
threshold_percentage = 5  # 5% tolerance

# Calculate percentage errors
percentage_errors = np.abs((y_test_scaled - predictions_scaled) / y_test_scaled) * 100

# Count predictions within the threshold
acc = np.mean(percentage_errors <= threshold_percentage) * 100


In [44]:
print(f"Mean Absolute Error = {mae}")
print(f"Mean Absolute Percentage Error = {mape*100:.2f}%")
print(f"Accuracy = {acc:.2f}%")

Mean Absolute Error = 0.0031873823263481663
Mean Absolute Percentage Error = 9.89%
Accuracy = 44.86%


In [17]:
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test_scaled, predictions_scaled))
print(f"Root Mean Squared Error (RMSE): {rmse}")

Root Mean Squared Error (RMSE): 7043705.054648481


In [18]:
# Calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [19]:
mape = mean_absolute_percentage_error(y_test_scaled, predictions_scaled)
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 24.21%


In [20]:
# Calculate MAE
mae = np.mean(np.abs(y_test_scaled - predictions_scaled))
print(f"Mean Absolute Error (MAE): {mae}")

Mean Absolute Error (MAE): 4685994.636200323


In [21]:
# Define a percentage threshold for accuracy
threshold_percentage = 5  # 5% tolerance

# Calculate percentage errors
percentage_errors = np.abs((y_test_scaled - predictions_scaled) / y_test_scaled) * 100

# Count predictions within the threshold
accuracy = np.mean(percentage_errors <= threshold_percentage) * 100
print(f"Accuracy (within {threshold_percentage}% threshold): {accuracy:.2f}%")

Accuracy (within 5% threshold): 44.42%
