**Importing Libraries and Data**

In [None]:
#Importing libraries
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import root_mean_squared_error
from statsmodels.tsa.stattools import acf
import matplotlib.pyplot as plt
import prophet
import numpy as np
import warnings
import gc
import os
import sys
import json
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler

#Ignoring warnings
warnings.filterwarnings("ignore")

#Reading in data
BoostedTransactions = pd.read_csv("Boosted Transaction Dataset.csv")
#Extracting unique account ID values
AccountIDs = BoostedTransactions["AccountId"].values
UniqueAccounts = set(AccountIDs)

#Converting the transaction dates into dates and converting them to months
BoostedTransactions["TransactionDate"] = pd.to_datetime(BoostedTransactions["TransactionDate"])
BoostedTransactions["TransactionDate"] = BoostedTransactions["TransactionDate"].dt.to_period("M").dt.to_timestamp()

#Grouping the transactions by account and month and calculating the net sum of transactions
GroupedSumOfTransactionsByAccountAndMonth = BoostedTransactions.groupby(by=["AccountId", "TransactionDate"]).agg(["sum"])["Amount"]
print(GroupedSumOfTransactionsByAccountAndMonth)

#Resetting the dataset index
GroupedSumOfTransactionsByAccountAndMonth = GroupedSumOfTransactionsByAccountAndMonth.reset_index()

                                                          sum
AccountId                            TransactionDate         
0003a5ae-0c77-4372-b44d-882ef9874a28 2019-05-01      -5541.31
                                     2019-06-01      -3601.46
                                     2019-07-01       3151.74
                                     2019-08-01      -4801.13
                                     2019-09-01       3890.05
...                                                       ...
fff7f00c-c869-4310-b705-4503538f5ecf 2020-03-01       2974.84
                                     2020-04-01        382.93
                                     2020-05-01        166.15
                                     2020-06-01       -550.89
                                     2020-07-01      -2721.18

[10995 rows x 1 columns]


**Creating Account IDs set and writing unique accounts list to JSON file (only execute before running first batch)**

Only execute this code block before running the first batch. This is to maintain the order of accounts and prevent any duplicate account forecasts.

In [None]:
#Generating account IDs set and writing the unique accounts list to a JSON file
AccountIDs = BoostedTransactions["AccountId"].values
UniqueAccounts = list(dict.fromkeys(AccountIDs))
with open("UniqueAccounts.json", "w") as File:
  json.dump(UniqueAccounts, File)


In [None]:
#Reading in the unique accounts list
with open("UniqueAccounts.json", "r") as File:
  UniqueAccounts = json.load(File)

#Converting the transaction dates to months
BoostedTransactions["TransactionDate"] = pd.to_datetime(BoostedTransactions["TransactionDate"])
BoostedTransactions["TransactionDate"] = BoostedTransactions["TransactionDate"].dt.to_period("M").dt.to_timestamp()

#Grouping the transactions by account and month and calculating the net sum
GroupedSumOfTransactionsByAccountAndMonth = BoostedTransactions.groupby(by=["AccountId", "TransactionDate"]).agg(["sum"])["Amount"]
print(GroupedSumOfTransactionsByAccountAndMonth)

#Creating a list to store the processed time series in
DataFrames = []

#Imputing 0 values into any missing months in each account's time series and adding it to the DataFrames list
for i in range(0, len(UniqueAccounts)):
  ResetDataFrame = GroupedSumOfTransactionsByAccountAndMonth.reset_index()
  TimeSeries = ResetDataFrame[ResetDataFrame["AccountId"] == list(UniqueAccounts)[i]]
  TimeSeries = TimeSeries.set_index("TransactionDate")

  TimeSeries = TimeSeries.resample("MS").sum()
  for j in range(0, len(TimeSeries)):
    if TimeSeries.iloc[j]["AccountId"] == 0:
      TimeSeries.at[TimeSeries.index[j], "AccountId"] = list(UniqueAccounts)[i]

  DataFrames.append(TimeSeries)

#Grouping the time series together into one dataset containing all time series
NewGroupedDataset = pd.concat(DataFrames)
NewGroupedDataset = NewGroupedDataset.reset_index()

print(NewGroupedDataset)

                                                          sum
AccountId                            TransactionDate         
0003a5ae-0c77-4372-b44d-882ef9874a28 2019-05-01      -5541.31
                                     2019-06-01      -3601.46
                                     2019-07-01       3151.74
                                     2019-08-01      -4801.13
                                     2019-09-01       3890.05
...                                                       ...
fff7f00c-c869-4310-b705-4503538f5ecf 2020-03-01       2974.84
                                     2020-04-01        382.93
                                     2020-05-01        166.15
                                     2020-06-01       -550.89
                                     2020-07-01      -2721.18

[10995 rows x 1 columns]
      TransactionDate                             AccountId        sum
0          2019-05-01                                 16268 -263236.06
1          2019-06-01     

**Key LSTM Loops (split up for memory reasons)**

Batch for accounts 0-150

In [None]:
#Creating models and forecasting for the first 150 accounts
for idx, i in enumerate(UniqueAccounts):
    #Ensuring that only the first 150 accounts are processed in this batch
    if idx >= 150:
        break
    print(i)
    try:
        #Selecting only the data for the chosen account and ensuring there are no missing months
        ForecastingTimeSeries = NewGroupedDataset[NewGroupedDataset["AccountId"] == i][["TransactionDate", "sum"]]

        ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

        ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

        ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

        #Calculating the cutoff point between training and testing sets
        TrainEnd = int(0.7 * len(ForecastingTimeSeries))

        #Splitting the data into training and testing sets
        TrainData = ForecastingTimeSeries[:TrainEnd]
        TestData = ForecastingTimeSeries[TrainEnd:]

        #Setting transaction month as the index
        TrainData = TrainData.set_index("TransactionDate")

        #Scaling the training data using min/max scaling
        Scaler = MinMaxScaler(feature_range=(0,1))
        ScaledTrain = Scaler.fit_transform(TrainData)

        #Generating a batched time series that the LSTM can process
        TimeStep = 1
        TimeSeriesData = TimeseriesGenerator(ScaledTrain, ScaledTrain, length=TimeStep, batch_size=1)

        #Declaring the model layers
        Model = Sequential()
        Model.add(LSTM(units=16, return_sequences=True, activation="tanh", input_shape=(TimeStep, 1)))
        Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
        Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
        Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
        Model.add(LSTM(units=16, activation="tanh"))
        Model.add(Dense(units=1, activation="sigmoid"))
        Model.add(Dense(units=1, activation="sigmoid"))
        Model.add(Dense(1))
        Model.compile(optimizer="adam", loss="mean_squared_error")

        #Training the model
        Model.fit(TimeSeriesData, epochs=20, batch_size=1, verbose=0)

        #Creating a list for the test data predictions
        TestPredictions = []

        #Creating the first testing batch
        FirstTestingBatch = ScaledTrain[-TimeStep:]
        CurrentBatch = FirstTestingBatch.reshape((1, TimeStep, 1))

        #Forecasting the test dataset
        for h in range(len(TestData)):
            CurrentPrediction = Model.predict(CurrentBatch)[0]
            TestPredictions.append(CurrentPrediction)
            CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

        #Unscaling the test prediction values
        Forecast = Scaler.inverse_transform(TestPredictions)

        #Calculating the error rates and appending them into the errors CSV
        try:
          RMSE = root_mean_squared_error(Forecast, TestData["sum"])
          SI = abs(RMSE/abs(TestData["sum"]).mean())
          AccountError = pd.DataFrame({"AccountID" : [i],
                                        "RMSE" : [RMSE],
                                        "SI" : [SI]})
          AccountError.to_csv("LSTM Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Errors (Month-by-Month).csv"), index=False)
        #Where a ValueError occurs, the loop is broken
        except ValueError:
          print("ValueError encountered")
          break

        #Creating a list to store the future predictions in
        FuturePredictions = []

        #Creating the first future batch
        FirstFutureBatch = np.array(TestPredictions[-TimeStep:])
        CurrentBatch = FirstFutureBatch.reshape((1, TimeStep, 1))

        #Forecasting the next 12 months
        for j in range(12):
            CurrentPrediction = Model.predict(CurrentBatch)[0]
            FuturePredictions.append(CurrentPrediction)
            CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

        #Unscaling the future forecast values
        FutureForecast = Scaler.inverse_transform(FuturePredictions)

        #Calculating highest amount, lowest amount and difference and appending them to the forecasts CSV
        HighestAmount = FutureForecast.max()
        LowestAmount = FutureForecast.min()
        Difference = HighestAmount - LowestAmount

        ForecastData = pd.DataFrame({"AccountID" : [i],
                                     "Difference over Forecast Period" : [Difference],
                                     "Highest Amount" : [HighestAmount],
                                     "Lowest Amount" : [LowestAmount]})

        ForecastData.to_csv("LSTM Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Forecasts (Month-by-Month).csv"), index=False)

        #Informing the user of successful execution
        print("Processed account number",i)
    #Where an error occurs, the user is informed about it
    except Exception as e:
        print(f"Error encountered processing account number {i}.")
        ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

        ErrorFilename = os.path.split(
            ErrorTraceback.tb_frame.f_code.co_filename
        )[1]

        ErrorMessage = str(e)

        ErrorLineNumber = ErrorTraceback.tb_lineno

        print(f'Exception Type: {ErrorType}')

        print(f'Exception Filename: {ErrorFilename}')

        print(f'Exception Line Number: {ErrorLineNumber}')

        print(f'Exception Message: {ErrorMessage}')
        break
    #Wiping the model info from memory (to conserve RAM)
    finally:
        del Model, Forecast, TrainData, TestData, ForecastingTimeSeries
        gc.collect()


16268
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 670ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

Batch for accounts 150-300

In [None]:
#Creating models and forecasting for accounts 150-300
for idx, i in enumerate(UniqueAccounts):
    #Ensuring that only accounts 150-300 are processed
    if idx < 150:
        pass
    else:
      if idx >= 300:
        break
      print(i)
      try:
          #Selecting only the selected account's time series and ensuring it contains no missing values
          ForecastingTimeSeries = NewGroupedDataset[NewGroupedDataset["AccountId"] == i][["TransactionDate", "sum"]]

          ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

          ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

          ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

          #Calculating the cutoff point between training and testing sets
          TrainEnd = int(0.7 * len(ForecastingTimeSeries))

          #Splitting the data into training and testing sets
          TrainData = ForecastingTimeSeries[:TrainEnd]
          TestData = ForecastingTimeSeries[TrainEnd:]

          #Setting transaction month as the index
          TrainData = TrainData.set_index("TransactionDate")

          #Scaling the training data using min/max scaling
          Scaler = MinMaxScaler(feature_range=(0,1))
          ScaledTrain = Scaler.fit_transform(TrainData)

          #Generating a batched time series that the LSTM can process
          TimeStep = 1
          TimeSeriesData = TimeseriesGenerator(ScaledTrain, ScaledTrain, length=TimeStep, batch_size=1)

          #Declaring the model layers
          Model = Sequential()
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh", input_shape=(TimeStep, 1)))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, activation="tanh"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(1))
          Model.compile(optimizer="adam", loss="mean_squared_error")

          #Training the model
          Model.fit(TimeSeriesData, epochs=20, batch_size=1, verbose=0)

          #Creating a list to store test predictions
          TestPredictions = []

          #Creating the first test dataset batch
          FirstTestingBatch = ScaledTrain[-TimeStep:]
          CurrentBatch = FirstTestingBatch.reshape((1, TimeStep, 1))

          #Forecasting on the test dataset
          for h in range(len(TestData)):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              TestPredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the test predictions
          Forecast = Scaler.inverse_transform(TestPredictions)

          #Calculating the error rates and appending them into the errors CSV
          try:
            RMSE = root_mean_squared_error(Forecast, TestData["sum"])
            SI = abs(RMSE/abs(TestData["sum"]).mean())
            AccountError = pd.DataFrame({"AccountID" : [i],
                                          "RMSE" : [RMSE],
                                          "SI" : [SI]})
            AccountError.to_csv("LSTM Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Errors (Month-by-Month).csv"), index=False)
          #Where a ValueError occurs, the loop is broken
          except ValueError:
            print("ValueError encountered")
            break

          #Creating a list to store future predictions
          FuturePredictions = []

          #Creating the first future batch
          FirstFutureBatch = np.array(TestPredictions[-TimeStep:])
          CurrentBatch = FirstFutureBatch.reshape((1, TimeStep, 1))

          #Forecasting the next 12 months
          for j in range(12):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              FuturePredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the future forecast values
          FutureForecast = Scaler.inverse_transform(FuturePredictions)

          #Calculating highest amount, lowest amount and difference and appending them into the forecasts CSV
          HighestAmount = FutureForecast.max()
          LowestAmount = FutureForecast.min()
          Difference = HighestAmount - LowestAmount

          ForecastData = pd.DataFrame({"AccountID" : [i],
                                      "Difference over Forecast Period" : [Difference],
                                      "Highest Amount" : [HighestAmount],
                                      "Lowest Amount" : [LowestAmount]})

          ForecastData.to_csv("LSTM Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Forecasts (Month-by-Month).csv"), index=False)

          print("Processed account number",i)
      #Where an error occurs, the user is informed about it
      except Exception as e:
          print(f"Error encountered processing account number {i}.")
          ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

          ErrorFilename = os.path.split(
              ErrorTraceback.tb_frame.f_code.co_filename
          )[1]

          ErrorMessage = str(e)

          ErrorLineNumber = ErrorTraceback.tb_lineno

          print(f'Exception Type: {ErrorType}')

          print(f'Exception Filename: {ErrorFilename}')

          print(f'Exception Line Number: {ErrorLineNumber}')

          print(f'Exception Message: {ErrorMessage}')
          break
      #Deleting the model data from memory to conserve RAM
      finally:
          del Model, Forecast, TrainData, TestData, ForecastingTimeSeries
          gc.collect()

4190cd47-6c10-434a-99b3-51ed9499ff1c
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 709ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━

Batch for accounts 300-450

In [None]:
#Creating models and forecasting for accounts 300-450
for idx, i in enumerate(UniqueAccounts):
    #Ensuring that only accounts 300-450 are processed
    if idx < 300:
        pass
    else:
      if idx >= 450:
        break
      print(i)
      try:
          #Selecting only the selected account's time series and ensuring it has no gaps
          ForecastingTimeSeries = NewGroupedDataset[NewGroupedDataset["AccountId"] == i][["TransactionDate", "sum"]]

          ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

          ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

          ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

          #Calculating the cutoff point between training and testing sets
          TrainEnd = int(0.7 * len(ForecastingTimeSeries))

          #Splitting the dataset into training and testing sets
          TrainData = ForecastingTimeSeries[:TrainEnd]
          TestData = ForecastingTimeSeries[TrainEnd:]

          #Setting transaction month as the index
          TrainData = TrainData.set_index("TransactionDate")

          #Scaling the training data using min/max scaling
          Scaler = MinMaxScaler(feature_range=(0,1))
          ScaledTrain = Scaler.fit_transform(TrainData)

          #Generating a batched time series that the LSTM can process
          TimeStep = 1
          TimeSeriesData = TimeseriesGenerator(ScaledTrain, ScaledTrain, length=TimeStep, batch_size=1)

          #Declaring the model layers
          Model = Sequential()
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh", input_shape=(TimeStep, 1)))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, activation="tanh"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(1))
          Model.compile(optimizer="adam", loss="mean_squared_error")

          #Training the model
          Model.fit(TimeSeriesData, epochs=20, batch_size=1, verbose=0)

          #Generating a list to store test predictions
          TestPredictions = []

          #Generating the first testing batch
          FirstTestingBatch = ScaledTrain[-TimeStep:]
          CurrentBatch = FirstTestingBatch.reshape((1, TimeStep, 1))

          #Forecasting on the test dataset
          for h in range(len(TestData)):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              TestPredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the test prediction values
          Forecast = Scaler.inverse_transform(TestPredictions)

          #Calculating the error rates and appending them into the errors CSV
          try:
            #print(IdealNumberOfStreams)
            RMSE = root_mean_squared_error(Forecast, TestData["sum"])
            SI = abs(RMSE/abs(TestData["sum"]).mean())
            AccountError = pd.DataFrame({"AccountID" : [i],
                                          "RMSE" : [RMSE],
                                          "SI" : [SI]})
            AccountError.to_csv("LSTM Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Errors (Month-by-Month).csv"), index=False)
          #Where a ValueError occurs, the loop is broken
          except ValueError:
            print("ValueError encountered")
            break

          #Creating a list to store future predictions
          FuturePredictions = []

          #Generating the first future batch
          FirstFutureBatch = np.array(TestPredictions[-TimeStep:])
          CurrentBatch = FirstFutureBatch.reshape((1, TimeStep, 1))

          #Forecasting the next 12 months
          for j in range(12):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              FuturePredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the future forecast values
          FutureForecast = Scaler.inverse_transform(FuturePredictions)

          #Calculating highest amount, lowest amount and difference and appending them into the forecasts CSV
          HighestAmount = FutureForecast.max()
          LowestAmount = FutureForecast.min()
          Difference = HighestAmount - LowestAmount

          ForecastData = pd.DataFrame({"AccountID" : [i],
                                      "Difference over Forecast Period" : [Difference],
                                      "Highest Amount" : [HighestAmount],
                                      "Lowest Amount" : [LowestAmount]})

          ForecastData.to_csv("LSTM Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Forecasts (Month-by-Month).csv"), index=False)

          print("Processed account number",i)
      #Where an error occurs, the user is informed about it
      except Exception as e:
          print(f"Error encountered processing account number {i}.")
          ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

          ErrorFilename = os.path.split(
              ErrorTraceback.tb_frame.f_code.co_filename
          )[1]

          ErrorMessage = str(e)

          ErrorLineNumber = ErrorTraceback.tb_lineno

          print(f'Exception Type: {ErrorType}')

          print(f'Exception Filename: {ErrorFilename}')

          print(f'Exception Line Number: {ErrorLineNumber}')

          print(f'Exception Message: {ErrorMessage}')
          break
      #Wiping the model data from memory to conserve RAM
      finally:
          del Model, Forecast, TrainData, TestData, ForecastingTimeSeries
          gc.collect()

d3cfd119-8562-4318-9eee-98b9cc1c775c
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 715ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━

Batch for accounts 450-600

In [None]:
#Creating models and forecasting for accounts 450-600
for idx, i in enumerate(UniqueAccounts):
    #Ensuring that only accounts 450-600 are processed
    if idx < 450:
        pass
    else:
      if idx >= 600:
        break
      print(i)
      try:
          #Selecting only the current account's time series and ensuring that no gaps exist
          ForecastingTimeSeries = NewGroupedDataset[NewGroupedDataset["AccountId"] == i][["TransactionDate", "sum"]]

          ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

          ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

          ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

          #Calculating the cutoff point between training and testing sets
          TrainEnd = int(0.7 * len(ForecastingTimeSeries))

          #Splitting the dataset into training and testing sets
          TrainData = ForecastingTimeSeries[:TrainEnd]
          TestData = ForecastingTimeSeries[TrainEnd:]

          #Setting the transaction month as the index
          TrainData = TrainData.set_index("TransactionDate")

          #Scaling the training dataset using min/max scaling
          Scaler = MinMaxScaler(feature_range=(0,1))
          ScaledTrain = Scaler.fit_transform(TrainData)

          #Generating a batched time series that the LSTM can process
          TimeStep = 1
          TimeSeriesData = TimeseriesGenerator(ScaledTrain, ScaledTrain, length=TimeStep, batch_size=1)

          #Declaring model layers
          Model = Sequential()
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh", input_shape=(TimeStep, 1)))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, activation="tanh"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(1))
          Model.compile(optimizer="adam", loss="mean_squared_error")

          #Training the model
          Model.fit(TimeSeriesData, epochs=20, batch_size=1, verbose=0)

          #Creating a list to store the test dataset predictions
          TestPredictions = []

          #Creating the first testing batch
          FirstTestingBatch = ScaledTrain[-TimeStep:]
          CurrentBatch = FirstTestingBatch.reshape((1, TimeStep, 1))

          #Forecasting on the test dataset
          for h in range(len(TestData)):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              TestPredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the test predictions
          Forecast = Scaler.inverse_transform(TestPredictions)

          #Calculating the error rates and appending them into the errors CSV
          try:
            RMSE = root_mean_squared_error(Forecast, TestData["sum"])
            SI = abs(RMSE/abs(TestData["sum"]).mean())
            AccountError = pd.DataFrame({"AccountID" : [i],
                                          "RMSE" : [RMSE],
                                          "SI" : [SI]})
            AccountError.to_csv("LSTM Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Errors (Month-by-Month).csv"), index=False)
          #Where a ValueError occurs, the loop is broken
          except ValueError:
            print("ValueError encountered")
            break

          #Creating a list to store future predictions
          FuturePredictions = []

          #Creating the first future batch
          FirstFutureBatch = np.array(TestPredictions[-TimeStep:])
          CurrentBatch = FirstFutureBatch.reshape((1, TimeStep, 1))

          #Forecasting the next 12 months
          for j in range(12):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              FuturePredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the future forecast
          FutureForecast = Scaler.inverse_transform(FuturePredictions)

          #Calculating highest amount, lowest amount and difference and appending them into the forecasts CSV
          HighestAmount = FutureForecast.max()
          LowestAmount = FutureForecast.min()
          Difference = HighestAmount - LowestAmount

          ForecastData = pd.DataFrame({"AccountID" : [i],
                                      "Difference over Forecast Period" : [Difference],
                                      "Highest Amount" : [HighestAmount],
                                      "Lowest Amount" : [LowestAmount]})

          ForecastData.to_csv("LSTM Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Forecasts (Month-by-Month).csv"), index=False)

          print("Processed account number",i)
      #Where an error occurs, the user is informed about it
      except Exception as e:
          print(f"Error encountered processing account number {i}.")
          ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

          ErrorFilename = os.path.split(
             ErrorTraceback.tb_frame.f_code.co_filename
          )[1]

          ErrorMessage = str(e)

          ErrorLineNumber = ErrorTraceback.tb_lineno

          print(f'Exception Type: {ErrorType}')

          print(f'Exception Filename: {ErrorFilename}')

          print(f'Exception Line Number: {ErrorLineNumber}')

          print(f'Exception Message: {ErrorMessage}')
          break
      #Deleting model data from memory to conserve RAM
      finally:
          del Model, Forecast, TrainData, TestData, ForecastingTimeSeries
          gc.collect()

1a5f93c5-21ff-4929-bde0-e7d6587a48a9
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 735ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━

Batch for accounts 600-750

In [None]:
#Creating models and forecasting for accounts 600-750
for idx, i in enumerate(UniqueAccounts):
    #Ensuring that only accounts 600-750 are processed
    if idx < 600:
        pass
    else:
      if idx >= 750:
        break
      print(i)
      try:
          #Select only the chosen account's time series and ensure it contains no gaps
          ForecastingTimeSeries = NewGroupedDataset[NewGroupedDataset["AccountId"] == i][["TransactionDate", "sum"]]

          ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

          ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

          ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

          #Calculating the cutoff point between training and testing sets
          TrainEnd = int(0.7 * len(ForecastingTimeSeries))

          #Splitting the dataset into training and testing sets
          TrainData = ForecastingTimeSeries[:TrainEnd]
          TestData = ForecastingTimeSeries[TrainEnd:]

          #Setting the transaction month as the index
          TrainData = TrainData.set_index("TransactionDate")

          #Scaling the data using min/max scaling
          Scaler = MinMaxScaler(feature_range=(0,1))
          ScaledTrain = Scaler.fit_transform(TrainData)

          #Generating a batched time series that the LSTM can process
          TimeStep = 1
          TimeSeriesData = TimeseriesGenerator(ScaledTrain, ScaledTrain, length=TimeStep, batch_size=1)

          #Declaring model layers
          Model = Sequential()
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh", input_shape=(TimeStep, 1)))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, activation="tanh"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(1))
          Model.compile(optimizer="adam", loss="mean_squared_error")

          #Training the model
          Model.fit(TimeSeriesData, epochs=20, batch_size=1, verbose=0)

          #Creating a list to store test predictions
          TestPredictions = []

          #Generating the first testing batch
          FirstTestingBatch = ScaledTrain[-TimeStep:]
          CurrentBatch = FirstTestingBatch.reshape((1, TimeStep, 1))

          #Forecasting on the test data
          for h in range(len(TestData)):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              TestPredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the test forecasts
          Forecast = Scaler.inverse_transform(TestPredictions)

          #Calculating the error rates and appending them into the errors CSV
          try:
            RMSE = root_mean_squared_error(Forecast, TestData["sum"])
            SI = abs(RMSE/abs(TestData["sum"]).mean())
            AccountError = pd.DataFrame({"AccountID" : [i],
                                          "RMSE" : [RMSE],
                                          "SI" : [SI]})
            AccountError.to_csv("LSTM Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Errors (Month-by-Month).csv"), index=False)
          #Where a ValueError occurs, the loop is broken
          except ValueError:
            print("ValueError encountered")
            break

          #Creating a list to store future predictions
          FuturePredictions = []

          #Creating the first future batch
          FirstFutureBatch = np.array(TestPredictions[-TimeStep:])
          CurrentBatch = FirstFutureBatch.reshape((1, TimeStep, 1))

          #Forecasting the next 12 months
          for j in range(12):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              FuturePredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the future forecast
          FutureForecast = Scaler.inverse_transform(FuturePredictions)

          #Calculating highest amount, lowest amount and difference and appending them into the forecasts CSV
          HighestAmount = FutureForecast.max()
          LowestAmount = FutureForecast.min()
          Difference = HighestAmount - LowestAmount

          ForecastData = pd.DataFrame({"AccountID" : [i],
                                      "Difference over Forecast Period" : [Difference],
                                      "Highest Amount" : [HighestAmount],
                                      "Lowest Amount" : [LowestAmount]})

          ForecastData.to_csv("LSTM Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Forecasts (Month-by-Month).csv"), index=False)

          print("Processed account number",i)
      #Where an error occurs, the user is informed about it
      except Exception as e:
          print(f"Error encountered processing account number {i}.")
          ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

          ErrorFilename = os.path.split(
              ErrorTraceback.tb_frame.f_code.co_filename
          )[1]

          ErrorMessage = str(e)

          ErrorLineNumber = ErrorTraceback.tb_lineno

          print(f'Exception Type: {ErrorType}')

          print(f'Exception Filename: {ErrorFilename}')

          print(f'Exception Line Number: {ErrorLineNumber}')

          print(f'Exception Message: {ErrorMessage}')
          break
      #Deleting model data from memory to conserve RAM
      finally:
          del Model, Forecast, TrainData, TestData, ForecastingTimeSeries
          gc.collect()

9201d207-6a15-4aa6-b240-4c622f360bd3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 680ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━

Batch for last 122 accounts

In [None]:
#Creating models and forecasting for the last 122 accounts
for idx, i in enumerate(UniqueAccounts):
    #Ensuring that only the last 122 accounts are processed
    if idx < 750:
        pass
    else:
      print(i)
      try:
          #Selecting only the selected account's time series and ensuring it has no gaps
          ForecastingTimeSeries = NewGroupedDataset[NewGroupedDataset["AccountId"] == i][["TransactionDate", "sum"]]

          ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

          ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

          ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

          #Calculating the cutoff point between training and testing sets
          TrainEnd = int(0.7 * len(ForecastingTimeSeries))

          #Splitting the data into training and testing sets
          TrainData = ForecastingTimeSeries[:TrainEnd]
          TestData = ForecastingTimeSeries[TrainEnd:]

          #Setting transaction month as the index
          TrainData = TrainData.set_index("TransactionDate")

          #Scaling using min/max scaling
          Scaler = MinMaxScaler(feature_range=(0,1))
          ScaledTrain = Scaler.fit_transform(TrainData)

          #Generating a batched time series that LSTM can process
          TimeStep = 1
          TimeSeriesData = TimeseriesGenerator(ScaledTrain, ScaledTrain, length=TimeStep, batch_size=1)

          #Declaring model layers
          Model = Sequential()
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh", input_shape=(TimeStep, 1)))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, return_sequences=True, activation="tanh"))
          Model.add(LSTM(units=16, activation="tanh"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(units=1, activation="sigmoid"))
          Model.add(Dense(1))
          Model.compile(optimizer="adam", loss="mean_squared_error")

          #Training the model
          Model.fit(TimeSeriesData, epochs=20, batch_size=1, verbose=0)

          #Creating a list to store test predictions
          TestPredictions = []

          #Creating the first test batch
          FirstTestingBatch = ScaledTrain[-TimeStep:]
          CurrentBatch = FirstTestingBatch.reshape((1, TimeStep, 1))

          #Forecasting for the test dataset
          for h in range(len(TestData)):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              TestPredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the test forecast
          Forecast = Scaler.inverse_transform(TestPredictions)

          #Calculating error rates and appending to the errors CSV
          try:
            RMSE = root_mean_squared_error(Forecast, TestData["sum"])
            SI = abs(RMSE/abs(TestData["sum"]).mean())
            AccountError = pd.DataFrame({"AccountID" : [i],
                                          "RMSE" : [RMSE],
                                          "SI" : [SI]})
            AccountError.to_csv("LSTM Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Errors (Month-by-Month).csv"), index=False)
          #Where a ValueError occurs, the loop is broken
          except ValueError:
            print("ValueError encountered")
            break

          #Creating a list to store future predictions
          FuturePredictions = []

          #Creating first future batch
          FirstFutureBatch = np.array(TestPredictions[-TimeStep:])
          CurrentBatch = FirstFutureBatch.reshape((1, TimeStep, 1))

          #Forecasting the next 12 months
          for j in range(12):
              CurrentPrediction = Model.predict(CurrentBatch)[0]
              FuturePredictions.append(CurrentPrediction)
              CurrentBatch = np.append(CurrentBatch[:, 1:, :], [[CurrentPrediction]], axis=1)

          #Unscaling the future forecast
          FutureForecast = Scaler.inverse_transform(FuturePredictions)

          #Calculating the highest amount, lowest amount and difference and appending them into the forecasts CSV
          HighestAmount = FutureForecast.max()
          LowestAmount = FutureForecast.min()
          Difference = HighestAmount - LowestAmount

          ForecastData = pd.DataFrame({"AccountID" : [i],
                                      "Difference over Forecast Period" : [Difference],
                                      "Highest Amount" : [HighestAmount],
                                      "Lowest Amount" : [LowestAmount]})

          ForecastData.to_csv("LSTM Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("LSTM Forecasts (Month-by-Month).csv"), index=False)

          print("Processed account number",i)
      #Where an error occurs, the user is informed about it
      except Exception as e:
          print(f"Error encountered processing account number {i}.")
          ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

          ErrorFilename = os.path.split(
              ErrorTraceback.tb_frame.f_code.co_filename
          )[1]

          ErrorMessage = str(e)

          ErrorLineNumber = ErrorTraceback.tb_lineno

          print(f'Exception Type: {ErrorType}')

          print(f'Exception Filename: {ErrorFilename}')

          print(f'Exception Line Number: {ErrorLineNumber}')

          print(f'Exception Message: {ErrorMessage}')
          break
      #Deleting model data from memory to conserve RAM
      finally:
          del Model, Forecast, TrainData, TestData, ForecastingTimeSeries
          gc.collect()

5e9b429a-79a2-4deb-9377-9f3187c3e194
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 717ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━

**Calculating Error Statistics**

In [None]:
#Calculating averaged error statistics
Errors = pd.read_csv("LSTM Errors (Month-by-Month).csv")
MeanRMSE = Errors["RMSE"].mean()
print("Mean RMSE:",MeanRMSE)
MedianRMSE = Errors["RMSE"].median()
print("Median RMSE:",MedianRMSE)
MeanSI = Errors["SI"].mean()
print("Mean SI:",MeanSI)
MedianSI = Errors["SI"].median()
print("Median SI:",MedianSI)

Mean RMSE: 54483.260308369245
Median RMSE: 6940.984124898985
Mean SI: 8.622570778514547
Median SI: 2.9518230476756626
