In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('cleaned_data.csv')

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Function to build the LSTM model
def build_lstm_model(X_train, units=50, dropout_rate=0.2, optimizer='adam'):
    """
    Builds and compiles the LSTM model based on input parameters.
    
    Parameters:
    - X_train: Training data (used to get input shape)
    - units: Number of LSTM units
    - dropout_rate: Dropout rate to prevent overfitting
    - optimizer: Optimizer to be used for model compilation
    
    Returns:
    - model: Compiled LSTM model
    """
    model = Sequential()
    model.add(LSTM(units=units, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(units=units))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))  # Output layer with 1 neuron (regression task)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

# Function to preprocess the data
def preprocess_data(df, timesteps=10):
    """
    Preprocesses the data, scaling features and reshaping into a format suitable for LSTM.
    
    Parameters:
    - df: DataFrame containing the features and target column ('close' in this case)
    - timesteps: Number of previous days to use as input features
    
    Returns:
    - X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled: Preprocessed and scaled data
    """
    # Scaling the features and target
    scaler = MinMaxScaler()
    X = df.drop(['close'], axis=1)
    y = df['close']
    
    X_scaled = scaler.fit_transform(X)
    y_scaled = scaler.fit_transform(y.values.reshape(-1, 1))
    
    # Reshaping the data to fit LSTM input format
    X_reshaped, y_reshaped = [], []
    for i in range(timesteps, len(X_scaled)):
        X_reshaped.append(X_scaled[i-timesteps:i])
        y_reshaped.append(y_scaled[i])

    X_reshaped, y_reshaped = np.array(X_reshaped), np.array(y_reshaped)

    # Train-test split
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_reshaped, test_size=0.2, random_state=42, shuffle=False)

    return X_train, X_test, y_train, y_test, scaler

# Function to train the LSTM model and calculate performance metrics
def train_and_evaluate_lstm(df, timesteps=10, units=50, dropout_rate=0.2, optimizer='adam', batch_size=32, epochs=20):
    """
    Trains the LSTM model, evaluates it, and returns error metrics.
    
    Parameters:
    - df: DataFrame containing features and target
    - timesteps: Number of previous days to use as input features
    - units: Number of units in the LSTM layer
    - dropout_rate: Dropout rate for regularization
    - optimizer: Optimizer to use for training
    - batch_size: Batch size for training
    - epochs: Number of epochs to train the model
    
    Returns:
    - model: Trained LSTM model
    - rmse: Root Mean Squared Error
    - mae: Mean Absolute Error
    - mape: Mean Absolute Percentage Error
    """
    # Preprocess the data
    X_train, X_test, y_train, y_test, scaler = preprocess_data(df, timesteps)

    # Build the LSTM model
    model = build_lstm_model(X_train, units=units, dropout_rate=dropout_rate, optimizer=optimizer)

    # Train the model
    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)

    # Make predictions
    y_pred_scaled = model.predict(X_test)

    # Inverse transform the predictions and true values
    y_pred = scaler.inverse_transform(y_pred_scaled)
    y_test_orig = scaler.inverse_transform(y_test)

    # Calculate error metrics
    rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred))
    mae = mean_absolute_error(y_test_orig, y_pred)
    mape = np.mean(np.abs((y_test_orig - y_pred) / y_test_orig)) * 100

    print("RMSE - LSTM:", rmse)
    print("MAE - LSTM:", mae)
    print("MAPE - LSTM:", mape)

    return model, rmse, mae, mape

# Example usage
# Assuming df is your DataFrame containing the stock data with 'close' as the target column
# df = ...  # Load your stock price dataset

# Hyperparameters
timesteps = 10
units = 50
dropout_rate = 0.2
optimizer = 'adam'
batch_size = 32
epochs = 20

# Train and evaluate the model
model, rmse, mae, mape = train_and_evaluate_lstm(df, timesteps, units, dropout_rate, optimizer, batch_size, epochs)


RMSE - LSTM: 11.137208872725754
MAE - LSTM: 9.180786132812496
MAPE - LSTM: 4.460068097216628


In [None]:

#pip install --upgrade keras
#pip install --upgrade tensorflow
#pip install --upgrade scikit-learn