**Importing Libraries and Data**

In [1]:
#Importing libraries
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import root_mean_squared_error
from statsmodels.tsa.stattools import acf
import matplotlib.pyplot as plt
import prophet
import numpy as np
import warnings
import gc
import os
import sys

#Ignoring warnings
warnings.filterwarnings("ignore")

#Reading in data and extracting unique account values
BoostedTransactions = pd.read_csv("Longer Subset of Transaction Data (14 Months).csv")
AccountIDs = BoostedTransactions["AccountId"].values
UniqueAccounts = set(AccountIDs)

#Converting transaction dates into months
BoostedTransactions["TransactionDate"] = pd.to_datetime(BoostedTransactions["TransactionDate"])
BoostedTransactions["TransactionDate"] = BoostedTransactions["TransactionDate"].dt.to_period("M").dt.to_timestamp()

#Grouping transactions by account and month and summating net transaction amounts for each grouping
GroupedSumOfTransactionsByAccountAndMonth = BoostedTransactions.groupby(by=["AccountId", "TransactionDate"]).agg(["sum"])["Amount"]
print(GroupedSumOfTransactionsByAccountAndMonth)
GroupedSumOfTransactionsByAccountAndMonth = GroupedSumOfTransactionsByAccountAndMonth.reset_index()

                                                          sum
AccountId                            TransactionDate         
0003a5ae-0c77-4372-b44d-882ef9874a28 2019-05-01      -5541.31
                                     2019-06-01      -3601.46
                                     2019-07-01       3151.74
                                     2019-08-01      -4801.13
                                     2019-09-01       3890.05
...                                                       ...
fff7f00c-c869-4310-b705-4503538f5ecf 2020-03-01       2974.84
                                     2020-04-01        382.93
                                     2020-05-01        166.15
                                     2020-06-01       -550.89
                                     2020-07-01      -2721.18

[7632 rows x 1 columns]


**Key Naive Forecast Loop**

In [3]:
#Creating models and forecasting for all accounts
for i in UniqueAccounts:
    print(i)
    try:
        #Selecting only the selected account's time series and ensuring it has no gaps
        ForecastingTimeSeries = GroupedSumOfTransactionsByAccountAndMonth[GroupedSumOfTransactionsByAccountAndMonth["AccountId"] == i][["TransactionDate", "sum"]]

        ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

        ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

        ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

        #Calculating the cutoff point between the training and testing sets
        TrainEnd = int(0.7 * len(ForecastingTimeSeries))

        #Splitting the data into training and testing sets
        TrainData = ForecastingTimeSeries[:TrainEnd]
        TestData = ForecastingTimeSeries[TrainEnd:]

        #Performing a naive forecast on the test data
        TestData["Naive Forecast"] = TrainData.iloc[-1]["sum"]

        #Calculating error rates and appending them into the errors CSV
        try:
          RMSE = root_mean_squared_error(TestData["Naive Forecast"], TestData["sum"])
          SI = abs(RMSE/abs(TestData["sum"]).mean())
          AccountError = pd.DataFrame({"AccountID" : [i],
                                        "RMSE" : [RMSE],
                                        "SI" : [SI]})
          AccountError.to_csv("Naive Longer Subset Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("Naive Longer Subset Errors (Month-by-Month).csv"), index=False)
        #Where a ValueError occurs, the loop is broken
        except ValueError:
          print("ValueError encountered")
          break

        #Performing a naive forecast for the next 12 months
        LastDate = TestData["TransactionDate"].max()

        FutureDates = pd.date_range(start=LastDate + pd.tseries.frequencies.to_offset("MS"),
                                    periods=12,
                                    freq="MS")

        FutureForecast = pd.DataFrame({"TransactionDate": FutureDates})

        FutureForecast["Forecast"] = TestData.iloc[-1]["Naive Forecast"]
        FutureForecastValues = FutureForecast["Forecast"].values

        #Calculating highest amount, lowest amount and difference and appending them into the forecasts CSV
        Difference = FutureForecastValues.max() - FutureForecastValues.min()
        HighestAmount = FutureForecastValues.max()
        LowestAmount = FutureForecastValues.min()

        ForecastData = pd.DataFrame({"AccountID" : [i],
                                     "Difference" : [Difference],
                                     "Highest Amount" : [HighestAmount],
                                     "Lowest Amount" : [LowestAmount]})

        ForecastData.to_csv("Naive Longer Subset Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("Naive Longer Subset Forecasts (Month-by-Month).csv"), index=False)

        print("Processed account number",i)
    #Where an error occurs, the user is told about it
    except Exception as e:
        print(f"Error encountered processing account number {i}.")
        ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

        ErrorFilename = os.path.split(
            ErrorTraceback.tb_frame.f_code.co_filename
        )[1]

        ErrorMessage = str(e)

        ErrorLineNumber = ErrorTraceback.tb_lineno

        print(f'Exception Type: {ErrorType}')

        print(f'Exception Filename: {ErrorFilename}')

        print(f'Exception Line Number: {ErrorLineNumber}')

        print(f'Exception Message: {ErrorMessage}')
        break
    #Deleting model data from memory to conserve RAM
    finally:
        del TrainData, TestData, ForecastingTimeSeries
        gc.collect()

a2c44597-5156-49b0-b0c8-c9570d9ef30b
Processed account number a2c44597-5156-49b0-b0c8-c9570d9ef30b
8f26459d-4da4-4c12-b2a1-56cb3f2559b4
Processed account number 8f26459d-4da4-4c12-b2a1-56cb3f2559b4
16279
Processed account number 16279
16432
Processed account number 16432
e991898e-301f-4ccd-8794-851e1cc158df
Processed account number e991898e-301f-4ccd-8794-851e1cc158df
b0f82d53-42ac-409e-bb42-5bc0c4902a38
Processed account number b0f82d53-42ac-409e-bb42-5bc0c4902a38
4190cd47-6c10-434a-99b3-51ed9499ff1c
Processed account number 4190cd47-6c10-434a-99b3-51ed9499ff1c
ba72a5e9-81c8-4933-9888-5619324bebe7
Processed account number ba72a5e9-81c8-4933-9888-5619324bebe7
204854f5-c4e5-4eee-92ed-f2ff134a8e83
Processed account number 204854f5-c4e5-4eee-92ed-f2ff134a8e83
58e29f83-897c-462e-ae79-b9a67895dbce
Processed account number 58e29f83-897c-462e-ae79-b9a67895dbce
608a67d0-8359-403e-8883-c185053923ba
Processed account number 608a67d0-8359-403e-8883-c185053923ba
fce1f270-ed5c-4ae7-9b52-a3dd8995bb6

**Calculating Error Statistics**

In [5]:
#Calculating averaged error statistics
Errors = pd.read_csv("Naive Longer Subset Errors (Month-by-Month).csv")
MeanRMSE = Errors["RMSE"].mean()
print("Mean RMSE:",MeanRMSE)
MedianRMSE = Errors["RMSE"].median()
print("Median RMSE:",MedianRMSE)
MeanSI = Errors["SI"].mean()
print("Mean SI:",MeanSI)
MedianSI = Errors["SI"].median()
print("Median SI:",MedianSI)

Mean RMSE: 22773.9794017371
Median RMSE: 6098.024077482134
Mean SI: 1.7382433002162874
Median SI: 1.3477383139091526
