In [6]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX

def process_user_data(q):
    # Import all packages and libraries
    from statsmodels.tsa.arima.model import ARIMA
    import matplotlib.pyplot as plt

    # import transaction data - feed user csv into function
    def read_transaction_by_Q(q):
        transactions = pd.read_csv('user'+q+'.csv')
        transactions = transactions.sort_values(by="postDate")
        transactions = transactions.reset_index(drop=True)
        return transactions

    data = read_transaction_by_Q(q)

    # Add the new total_balance variable which we will use as our predictor variable
    def process_transactions(data):
        initial_balance = data.at[0, 'balance']
        data.at[0, 'total_balance'] = initial_balance
        for index, row in data.iterrows():
            if index == 0:
                continue
            amount = row['amount']
            initial_balance += amount
            data.at[index, 'total_balance'] = initial_balance
        data.insert(7, 'total_balance', data.pop('total_balance'))
        return data

    data = process_transactions(data)

    # Data Preparation
    data['postDate'] = pd.to_datetime(data.postDate)
    # Keep important columns - postDate and total_balance
    model_df = data[['postDate', 'total_balance']]
    # Split data into train and test
    total_rows = len(model_df)
    train_rows = int(0.8 * total_rows)
    test_rows = total_rows - train_rows
    train_data = model_df.iloc[:train_rows]
    test_data = model_df.iloc[train_rows:]
    # Set postDate to index in train and test
    train_data.set_index('postDate', inplace=True)
    test_data.set_index('postDate', inplace=True)

    # Use SARIMAX
    sa_model = SARIMAX(train_data, order=(2, 1, 2), seasonal_order=(2, 1, 2, 4))
    sa_results = sa_model.fit()

    # Generate SARIMAX forecasts for the next 365 days
    forecast_steps = 365
    sa_future = sa_results.get_forecast(steps=forecast_steps)
    sa_forecast_values = sa_future.predicted_mean
    forecast_index = pd.date_range(start=test_data.index[-1] + pd.DateOffset(days=1), periods=forecast_steps, freq='D')

    # Create a DataFrame with postDate and predicted values
    predicted_df = pd.DataFrame({'postDate': forecast_index, 'Predicted_Total_Balance': sa_forecast_values})

    # Get the last 'total_balance' value from the test_data dataframe
    last_balance = test_data['total_balance'].iloc[-1]

    # Define the indices for the desired time periods
    time_periods = [6, 29, 89, 179, 364]  # 7 days, 30 days, 90 days, 180 days, 365 days

    # Empty dictionary to store the results
    savings_dict = {}

    # Calculate and store the savings for each time period
    for i, period in enumerate(["1 Week", "1 Month", "3 Months", "6 Months", "12 Months"]):
        # Get the index for the current time period
        period_index = time_periods[i]

        # Get the predicted balance at the current time period
        predicted_balance = predicted_df['Predicted_Total_Balance'].iloc[period_index]

        # Calculate the savings
        savings = predicted_balance - last_balance

        # Store the savings in the dictionary
        savings_dict[period] = savings

    # Convert the dictionary to a DataFrame
    savings_df = pd.DataFrame(list(savings_dict.items()), columns=['Time Period', 'Savings'])

    return savings_df

# Example usage:
savings_result = process_user_data('1')
print(savings_result)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Time Period       Savings
0      1 Week  -5751.519237
1     1 Month   5040.588085
2    3 Months  15181.871406
3    6 Months  28708.481022
4   12 Months  60573.039177


  return get_prediction_index(
  return get_prediction_index(
