**Importing Libraries and Data**

In [1]:
#Importing libraries
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import root_mean_squared_error
from statsmodels.tsa.stattools import acf
import matplotlib.pyplot as plt
import prophet
import numpy as np
import warnings
import gc
import os
import sys

#Ignoring warnings
warnings.filterwarnings("ignore")

#Reading in data and extracting unique account ID values
BoostedTransactions = pd.read_csv("Boosted Transaction Dataset.csv")
AccountIDs = BoostedTransactions["AccountId"].values
UniqueAccounts = set(AccountIDs)

#Converting transaction dates to months
BoostedTransactions["TransactionDate"] = pd.to_datetime(BoostedTransactions["TransactionDate"])
BoostedTransactions["TransactionDate"] = BoostedTransactions["TransactionDate"].dt.to_period("M").dt.to_timestamp()

#Grouping transactions by account and month and summating the net transaction amount for each grouping
GroupedSumOfTransactionsByAccountAndMonth = BoostedTransactions.groupby(by=["AccountId", "TransactionDate"]).agg(["sum"])["Amount"]
print(GroupedSumOfTransactionsByAccountAndMonth)
GroupedSumOfTransactionsByAccountAndMonth = GroupedSumOfTransactionsByAccountAndMonth.reset_index()

                                                          sum
AccountId                            TransactionDate         
0003a5ae-0c77-4372-b44d-882ef9874a28 2019-05-01      -5541.31
                                     2019-06-01      -3601.46
                                     2019-07-01       3151.74
                                     2019-08-01      -4801.13
                                     2019-09-01       3890.05
...                                                       ...
fff7f00c-c869-4310-b705-4503538f5ecf 2020-03-01       2974.84
                                     2020-04-01        382.93
                                     2020-05-01        166.15
                                     2020-06-01       -550.89
                                     2020-07-01      -2721.18

[10995 rows x 1 columns]


**Key Naive Forecast Loop**

In [2]:
#Creating models and forecasting for all accounts
for i in UniqueAccounts:
    print(i)
    try:
        #Selecting only the selected account's time series and ensuring it has no gaps
        ForecastingTimeSeries = GroupedSumOfTransactionsByAccountAndMonth[GroupedSumOfTransactionsByAccountAndMonth["AccountId"] == i][["TransactionDate", "sum"]]

        ForecastingTimeSeries = ForecastingTimeSeries.set_index("TransactionDate")

        ForecastingTimeSeries = ForecastingTimeSeries.resample("MS").sum()

        ForecastingTimeSeries = ForecastingTimeSeries.reset_index()

        #Calculating the cutoff between training and testing sets
        TrainEnd = int(0.7 * len(ForecastingTimeSeries))

        #Splitting the data into training and testing sets
        TrainData = ForecastingTimeSeries[:TrainEnd]
        TestData = ForecastingTimeSeries[TrainEnd:]

        #Performing a naive forecast on the test data
        TestData["Naive Forecast"] = TrainData.iloc[-1]["sum"]

        #Calculating error rates and appending them into the errors CSV
        try:
          RMSE = root_mean_squared_error(TestData["Naive Forecast"], TestData["sum"])
          SI = abs(RMSE/abs(TestData["sum"]).mean())
          AccountError = pd.DataFrame({"AccountID" : [i],
                                        "RMSE" : [RMSE],
                                        "SI" : [SI]})
          AccountError.to_csv("Naive Errors (Month-by-Month).csv", mode='a', header=not os.path.exists("Naive Errors (Month-by-Month).csv"), index=False)
        #Where a ValueError occurs, the loop is broken
        except ValueError:
          print("ValueError encountered")
          break

        #Performing a naive forecast over the next 12 months
        LastDate = TestData["TransactionDate"].max()

        FutureDates = pd.date_range(start=LastDate + pd.tseries.frequencies.to_offset("MS"),
                                    periods=12,
                                    freq="MS")

        FutureForecast = pd.DataFrame({"TransactionDate": FutureDates})

        FutureForecast["Forecast"] = TestData.iloc[-1]["Naive Forecast"]
        FutureForecastValues = FutureForecast["Forecast"].values

        #Calculating highest amount, lowest amount and difference and appending them into the forecasts CSV
        Difference = FutureForecastValues.max() - FutureForecastValues.min()
        HighestAmount = FutureForecastValues.max()
        LowestAmount = FutureForecastValues.min()

        ForecastData = pd.DataFrame({"AccountID" : [i],
                                     "Difference" : [Difference],
                                     "Highest Amount" : [HighestAmount],
                                     "Lowest Amount" : [LowestAmount]})

        ForecastData.to_csv("Naive Forecasts (Month-by-Month).csv", mode='a', header=not os.path.exists("Naive Forecasts (Month-by-Month).csv"), index=False)

        print("Processed account number",i)
    #Where an error occurs, the user is informed about it
    except Exception as e:
        print(f"Error encountered processing account number {i}.")
        ErrorType, ErrorObject, ErrorTraceback = sys.exc_info()

        ErrorFilename = os.path.split(
            ErrorTraceback.tb_frame.f_code.co_filename
        )[1]

        ErrorMessage = str(e)

        ErrorLineNumber = ErrorTraceback.tb_lineno

        print(f'Exception Type: {ErrorType}')

        print(f'Exception Filename: {ErrorFilename}')

        print(f'Exception Line Number: {ErrorLineNumber}')

        print(f'Exception Message: {ErrorMessage}')
        break
    #Deleting model data from memory to conserve RAM
    finally:
        del TrainData, TestData, ForecastingTimeSeries
        gc.collect()

ec5f06dc-4601-48f7-9f98-5e1cab7fef30
Processed account number ec5f06dc-4601-48f7-9f98-5e1cab7fef30
5b74e76a-5211-4eeb-a58b-b952ef4dad05
Processed account number 5b74e76a-5211-4eeb-a58b-b952ef4dad05
75c5de1e-ea8e-4292-bbdd-208619c52bf9
Processed account number 75c5de1e-ea8e-4292-bbdd-208619c52bf9
d0e8786d-0722-4201-b92b-98971320da47
Processed account number d0e8786d-0722-4201-b92b-98971320da47
5c9a8148-80f7-4f50-a743-e5aa862df2f6
Processed account number 5c9a8148-80f7-4f50-a743-e5aa862df2f6
ef6c06ee-8674-4267-8c69-f0671308c180
Processed account number ef6c06ee-8674-4267-8c69-f0671308c180
16448
Processed account number 16448
e520d0a6-a1d7-468a-9d5b-3e554dafe615
Processed account number e520d0a6-a1d7-468a-9d5b-3e554dafe615
de5ae23a-7bc6-46b7-9dd1-bbd280221ce8
Processed account number de5ae23a-7bc6-46b7-9dd1-bbd280221ce8
37a4fd57-0597-4e55-bd80-30b98e0a213a
Processed account number 37a4fd57-0597-4e55-bd80-30b98e0a213a
42f9221c-4768-4a06-86b5-fca66c9df450
Processed account number 42f9221c-4

**Calculating Error Statistics**

In [3]:
#Calculating averaged error statistics
Errors = pd.read_csv("Naive Errors (Month-by-Month).csv")
MeanRMSE = Errors["RMSE"].mean()
print("Mean RMSE:",MeanRMSE)
MedianRMSE = Errors["RMSE"].median()
print("Median RMSE:",MedianRMSE)
MeanSI = Errors["SI"].mean()
print("Mean SI:",MeanSI)
MedianSI = Errors["SI"].median()
print("Median SI:",MedianSI)

Mean RMSE: 14612.522393405205
Median RMSE: 3957.4117233143825
Mean SI: 4.91332048091912
Median SI: 1.5343829333992682
