In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('AirPassengers.csv')

# Display the first few rows of the dataset
print(data.head())

# Preprocessing: Extract the 'Passengers' column for normalization
passengers = data['#Passengers'].values.reshape(-1, 1)

# Normalization: Scale the passenger values between 0 and 1
scaler = MinMaxScaler()
passengers_normalized = scaler.fit_transform(passengers)

# Replace the original 'Passengers' column with the normalized values
data['#Passengers'] = passengers_normalized

# Splitting the dataset into training and test sets (80% train, 20% test)
train_data, test_data = train_test_split(data, test_size=0.2, shuffle=False)

# Display the dimensions of the training and test sets
print("Training set size:", len(train_data))
print("Test set size:", len(test_data))


     Month  #Passengers
0  1949-01          112
1  1949-02          118
2  1949-03          132
3  1949-04          129
4  1949-05          121
Training set size: 115
Test set size: 29


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense

def create_lstm_model(input_shape, lstm_units=[50, 50], dropout_rate=0.2):

    model = Sequential()

    # Adding the first LSTM layer
    model.add(LSTM(units=lstm_units[0], return_sequences=True, input_shape=input_shape))
    model.add(Dropout(dropout_rate))

    # Adding additional LSTM layers if specified
    for units in lstm_units[1:]:
        model.add(LSTM(units=units, return_sequences=True))
        model.add(Dropout(dropout_rate))

    # Adding a final LSTM layer without return_sequences=True
    model.add(LSTM(units=lstm_units[-1]))
    model.add(Dropout(dropout_rate))

    # Adding a Dense output layer
    model.add(Dense(units=1))

    # Compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    return model


input_shape = (sequence_length, num_features)
model = create_lstm_model(input_shape, lstm_units=[50, 50], dropout_rate=0.2)
model.summary()


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from keras.optimizers import Adam
import matplotlib.pyplot as plt

def compile_and_train_model(model, train_data, epochs=100, batch_size=32):

    X_train, y_train = train_data

    # Compile the model
    model.compile(optimizer=Adam(), loss='mean_squared_error')

    # Train the model
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=1)

    return history

# Example usage:
# Assuming input sequences of shape (sequence_length, num_features)
input_shape = (sequence_length, num_features)
model = create_lstm_model(input_shape, lstm_units=[50, 50], dropout_rate=0.2)

# Assuming you have training data X_train, y_train
train_data = (X_train, y_train)

# Compile and train the model
history = compile_and_train_model(model, train_data, epochs=100, batch_size=32)

# Plot training and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Assuming you have test data X_test, y_test
test_data = (X_test, y_test)

# Evaluate the trained model on the test set
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)

print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)

# Visualize the model's predictions against the ground truth
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual')
plt.plot(predictions, label='Predicted')
plt.title('AirPassengers Forecasting')
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.legend()
plt.show()


In [None]:
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Define function to create LSTM model
def create_lstm_model(learning_rate=0.01, lstm_units=50, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units=lstm_units, input_shape=(sequence_length, num_features)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=1))
    model.compile(optimizer=Adam(learning_rate), loss='mean_squared_error')
    return model

# Create KerasRegressor wrapper
model = KerasRegressor(build_fn=create_lstm_model)

# Define hyperparameter search space
param_dist = {
    'learning_rate': [0.001, 0.01, 0.1],
    'lstm_units': [50, 100, 150],
    'dropout_rate': [0.1, 0.2, 0.3]
}

# Perform random search
random_search = RandomizedSearchCV(model, param_dist, n_iter=10, cv=3, scoring='neg_mean_squared_error', verbose=1)
random_search_results = random_search.fit(X_train, y_train)

# Print best parameters and corresponding mean test score
print("Best Parameters:", random_search_results.best_params_)
print("Best Mean Test Score:", -random_search_results.best_score_)


Challenges encountered during model training and optimization:

One challenge is selecting appropriate hyperparameters, such as learning rate, batch size, number of LSTM layers and units, and dropout rate. Tuning these hyperparameters can be time-consuming and computationally expensive, especially when performing grid search or random search.
Another challenge is dealing with overfitting, especially when working with limited data. Dropout layers and early stopping techniques are commonly used to mitigate overfitting.
Decision on the number of LSTM layers and units:

The number of LSTM layers and units is often determined through experimentation and validation. Starting with a single LSTM layer and gradually increasing the complexity by adding more layers and units can help find the optimal architecture.
Factors such as the complexity of the dataset, available computational resources, and the trade-off between model complexity and generalization ability influence the decision.
Preprocessing steps performed on the time series data before training the model:

Common preprocessing steps include:
Removing any trend and seasonality from the data through techniques like differencing or decomposition.
Normalizing the data to a common scale to ensure stable training.
Splitting the data into training and test sets, and possibly further dividing the training set into validation sets for hyperparameter tuning.
Preparing the data into sequences suitable for input to the LSTM model.
Purpose of dropout layers in LSTM networks and how they prevent overfitting:

Dropout layers randomly deactivate a fraction of neurons during training, preventing them from contributing to the forward pass and backward pass. This regularization technique helps prevent overfitting by reducing the model's reliance on specific neurons and encourages the network to learn more robust features.
Dropout layers provide a form of ensemble learning within a single model, effectively reducing the model's variance and improving generalization performance.
Analysis of the model's ability to capture long-term dependencies and make accurate predictions:

The LSTM architecture is specifically designed to capture long-term dependencies in sequential data. By maintaining a memory cell and gating mechanisms, LSTMs can effectively learn and remember patterns over long sequences.
The accuracy of predictions depends on various factors such as the quality and quantity of training data, the complexity of the underlying patterns in the data, and the effectiveness of the chosen architecture and hyperparameters.
Evaluation metrics such as mean absolute error (MAE) or root mean squared error (RMSE) can provide quantitative insights into the model's predictive performance.
Potential improvements or alternative approaches for enhancing forecasting performance:

Experimenting with more complex architectures, such as bidirectional LSTMs or stacked LSTMs, may capture more intricate patterns in the data.
Feature engineering and incorporating domain knowledge can improve model performance by providing additional relevant information to the model.
Ensembling multiple models or using techniques like model averaging can further enhance forecasting accuracy and robustness.
Regularization techniques such as L1 and L2 regularization, in addition to dropout, can help prevent overfitting.
Continual monitoring and fine-tuning of hyperparameters and model architecture based on validation performance can lead to better forecasting results over time.