In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator

# Assuming 'data' is the original DataFrame and has been preprocessed similarly to previous steps
data_series = data['OperatingCashFlow'].values.reshape(-1, 1)  # Reshape for compatibility with MinMaxScaler

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_series)

# Define sequence length (number of time steps to look back for predicting the next value)
sequence_length = 4

# Prepare data generators for training and testing
train_data_gen = TimeseriesGenerator(scaled_data[:train_size], scaled_data[:train_size],
                                     length=sequence_length, batch_size=1)
test_data_gen = TimeseriesGenerator(scaled_data[train_size:], scaled_data[train_size:],
                                    length=sequence_length, batch_size=1)
# Check the shape of generated data samples
for i in range(len(train_data_gen)):
    x, y = train_data_gen[i]
    print('Input data shape:', x.shape, 'Output data shape:', y.shape)
    if i == 0:  # Just display the first batch shapes
        break


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define model architecture
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(sequence_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Model summary
model.summary()

In [None]:
# Train the model
model.fit(train_data_gen, epochs=20, verbose=1)


In [None]:
# Predict using the test data generator
predictions = model.predict(test_data_gen)

# Invert scaling for actual comparison
predictions = scaler.inverse_transform(predictions)
actuals = scaler.inverse_transform(scaled_data[train_size + sequence_length:])

# Calculate RMSE or other performance metrics
from sklearn.metrics import mean_squared_error
from math import sqrt

rmse = sqrt(mean_squared_error(actuals, predictions))
print('Test RMSE:', rmse)


In [None]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Extend the dataset to include the future quarter (March 2024) for forecasting
future_dates = pd.date_range(start=data.index[-1] + pd.DateOffset(months=3), periods=1, freq='Q')
future_df = pd.DataFrame(index=future_dates, columns=data.columns)
extended_data = pd.concat([data, future_df])

# Forward fill the exogenous variables to cover all dates including the extended future dates
extended_exog = extended_data[['Exog1', 'Exog2', 'Exog3']]  # Replace with actual names of exogenous variables
extended_exog_ffill = extended_exog.ffill()

# Convert all columns to numeric, assuming that they should be numeric for SARIMAX
extended_data_numeric = extended_data.apply(pd.to_numeric, errors='coerce')
extended_exog_numeric = extended_exog_ffill.apply(pd.to_numeric, errors='coerce')

# Fit the SARIMAX model with numeric and aligned data
model = SARIMAX(extended_data_numeric['OperatingCashFlow'], exog=extended_exog_numeric,
                order=(1, 1, 1), seasonal_order=(0, 0, 0, 0))
model_fit = model.fit(disp=False)

# Forecast including the test period and the March 2024 quarter
forecast = model_fit.get_forecast(steps=len(test) + 1, exog=extended_exog_numeric[-(len(test)+1):])
forecast_mean = forecast.predicted_mean

# Display the forecasted values
forecast_mean