In [1]:
import pandas as pd
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATSx
from neuralforecast.losses.pytorch import MQLoss
import optuna
from statsmodels.tsa.seasonal import STL
from sklearn.metrics import mean_absolute_error  

In [2]:

def tune_prediction_model(df, horizon, n_trials):

    df = df.copy()
    if not pd.api.types.is_datetime64_any_dtype(df['date']):
        df['date'] = pd.to_datetime(df['date'])
        
    df['date'] = pd.to_datetime(df['date'])
    df_model = df.rename(columns={'date': 'ds', 'payment': 'y'})
    df_model['unique_id'] = 'main_series'
    
    
    def create_future_features(test_df):
        # Create future_df from test_df dates
        future_df = test_df[['ds']].copy()
        future_df['unique_id'] = 'main_series'
        
        # Merge with test_df features
        test_features = test_df[['ds', 'balance', 'balance_changed' ,'day_of_week_sin', 'day_of_week_cos', 
                               'is_weekend','payment_made']]
        future_df = future_df.merge(test_features, on='ds', how='left')
        
        return future_df[[
            'ds', 'unique_id', 'balance', 'balance_changed',
            'day_of_week_sin', 'day_of_week_cos', 'is_weekend','payment_made']]
    
    main_df=df_model[:-horizon].copy()
    train_df = main_df[:-horizon].copy()
    test_df = main_df[-horizon:].copy()

    #create future features to tune parameters 
    future_exog_df = create_future_features(test_df)


    h_exog_vars = [
        'payment_count', 'days_since_last_payment', '7day_avg_payment', '30day_avg_payment',
        'cumulative_avg_zero_days', 'num_payments_from_start', 'payment_made', 
        'day_of_week_sin', 'day_of_week_cos', 'is_weekend', 
        'balance', 'balance_changed'
    ]

    f_exog_vars = [
        'day_of_week_sin', 'day_of_week_cos', 'is_weekend',
        'balance', 'balance_changed',
        'payment_made'
    ]

    
    def objective(trial):
        try:
            input_size = trial.suggest_int('input_size', 14, 42, step=7)
            dropout_prob_theta = trial.suggest_float('dropout_prob_theta', 0.1, 0.3)
            max_steps = trial.suggest_int('max_steps', 100, 300, step=50)

            model = NBEATSx(
                h=horizon,
                input_size=input_size,
                loss=MQLoss(level=[90]),
                scaler_type='minmax',
                dropout_prob_theta=dropout_prob_theta,
                futr_exog_list=f_exog_vars,
                hist_exog_list=h_exog_vars,
                max_steps=max_steps,
                val_check_steps=20,
                early_stop_patience_steps=5
            )

            nf = NeuralForecast(models=[model], freq='D')
            nf.fit(df=train_df, val_size=horizon)

            Y_hat_df = nf.predict(futr_df=future_exog_df)
            Y_hat_df['NBEATSx-median'] *= future_exog_df['payment_made'].values

            # Create ground truth from validation part of train set
            val_target = test_df['y'].values
            pred = Y_hat_df['NBEATSx-median'].values

            mae = mean_absolute_error(val_target, pred)

            print(f"Trial {trial.number}: input_size={input_size}, dropout={dropout_prob_theta:.2f}, steps={max_steps}, MAE={mae:.4f}")
            return mae
        except Exception as e:
            print(f"Trial {trial.number} failed: {e}")
            return float('inf')  # Penalize failed trial

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials)

    if len(study.trials) == 0 or all(t.state != optuna.trial.TrialState.COMPLETE for t in study.trials):
        print("No successful trials were completed.")
        return {}

    print("Best trial:", study.best_trial)
    return study.best_trial.params

In [11]:
def train_and_predict_payment(df, best_params, horizon, balance_pred_df, classification_results_df):
    df = df.copy()
    if not pd.api.types.is_datetime64_any_dtype(df['date']):
        df['date'] = pd.to_datetime(df['date'])

    df['unique_id'] = 'main_series'
    df = df.rename(columns={'date': 'ds', 'payment': 'y'})

    def create_future_features(df, balance_pred_df,classification_results_df):
        last_date = df['ds'].max()
        future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=horizon, freq='D')

        future_df = pd.DataFrame({'ds': future_dates})
        future_df['unique_id'] = 'main_series'

        # Load external forecasts
        occur_pred = classification_results_df.copy()
        #occur_pred['date'] = pd.to_datetime(occur_pred['date']) 
        occur_pred = occur_pred.rename(columns={
            'predicted_payment_occurence': 'payment_made',
            'predicted_occurence_probability': 'payment_occurenc_probability'
        })
        occur_pred = occur_pred[['ds', 'payment_made', 'payment_occurenc_probability']]

        balance_pred = balance_pred_df.copy()
        min_v = balance_pred['Forecast'].min()
        max_v = balance_pred['Forecast'].max()
        balance_pred['Forecast'] = (balance_pred['Forecast'] - min_v) / (max_v - min_v) if max_v != min_v else 0
        #balance_pred['date'] = pd.to_datetime(balance_pred['date']) 
        balance_pred = balance_pred.rename(columns={'Forecast': 'balance'})
        balance_pred = balance_pred[['ds', 'balance']]

        # Merge
        future_df = future_df.merge(occur_pred, on='ds', how='left')
        future_df = future_df.merge(balance_pred, on='ds', how='left')

        # Add time features
        future_df['day_of_week'] = future_df['ds'].dt.dayofweek
        future_df['day_of_week_sin'] = np.sin(2 * np.pi * future_df['day_of_week'] / 7)
        future_df['day_of_week_cos'] = np.cos(2 * np.pi * future_df['day_of_week'] / 7)
        future_df['is_weekend'] = future_df['day_of_week'].isin([5, 6]).astype(int)

        # Balance change features
        future_df = future_df.sort_values('ds').reset_index(drop=True)
        future_df['balance_changed'] = (future_df['balance'] != future_df['balance'].shift(1)).astype(int)
        future_df['balance_changed'] = future_df['balance_changed'].fillna(0)

        future_df['payment_made'] = future_df['payment_made'].fillna(0)
        future_df['payment_occurenc_probability'] = future_df['payment_occurenc_probability'].fillna(0)
        future_df['balance'] = future_df['balance'].fillna(method='ffill').fillna(0)


        

        return future_df[[
            'ds', 'unique_id', 'balance', 'balance_changed',
            'day_of_week_sin', 'day_of_week_cos', 'is_weekend', 'payment_made', 'payment_occurenc_probability'
        ]]

    amount_train_df = df[:-horizon].copy()

    future_exog_df = create_future_features(amount_train_df, balance_pred_df, classification_results_df)

    h_exog_vars = [
        'payment_count', 'days_since_last_payment', '7day_avg_payment', '30day_avg_payment',
        'cumulative_avg_zero_days', 'num_payments_from_start', 'payment_made',
        'day_of_week_sin', 'day_of_week_cos', 'is_weekend',
        'balance', 'balance_changed'
    ]

    f_exog_vars = [
        'day_of_week_sin', 'day_of_week_cos', 'is_weekend',
        'balance', 'balance_changed', 'payment_made'
    ]

    model = NBEATSx(
        h=horizon,
        input_size=best_params['input_size'],
        loss=MQLoss(level=[90]),
        scaler_type='minmax',
        dropout_prob_theta=best_params['dropout_prob_theta'],
        futr_exog_list=f_exog_vars,
        hist_exog_list=h_exog_vars,
        max_steps=best_params['max_steps'],
        val_check_steps=20,
        early_stop_patience_steps=5
    )

    nf = NeuralForecast(models=[model], freq='D')
    nf.fit(df=amount_train_df, val_size=horizon)

    amount_Y_hat_df = nf.predict(futr_df=future_exog_df)
    amount_Y_hat_df['NBEATSx-median'] *= future_exog_df['payment_made'].values
    amount_Y_hat_df['NBEATSx-hi-90'] *= future_exog_df['payment_made'].values
    amount_Y_hat_df['NBEATSx-lo-90'] *= future_exog_df['payment_made'].values

    final_regression_result = amount_Y_hat_df[['ds', 'NBEATSx-median', 'NBEATSx-lo-90', 'NBEATSx-hi-90']].copy()
    final_regression_result['uncertainty_range'] = (
        final_regression_result['NBEATSx-hi-90'] - final_regression_result['NBEATSx-lo-90']
    )
    epsilon = 1e-6
    final_regression_result['uncertainty_percent'] = (
        final_regression_result['uncertainty_range'] / (final_regression_result['NBEATSx-median'] + epsilon)
    ) * 100

    series = final_regression_result['NBEATSx-median']
    series.index = pd.to_datetime(final_regression_result['ds'])
    stl = STL(series, period=7)
    result = stl.fit()
    final_regression_result['trend'] = result.trend.values
    final_regression_result['seasonal'] = result.seasonal.values
    final_regression_result['residual'] = result.resid.values

    final_regression_result['occurence'] = future_exog_df['payment_made']
    final_regression_result['probability'] = future_exog_df['payment_occurenc_probability']

    def get_top_correlation_features(train_data, target_col, future_features, historical_features, top_k=3):
        feature_importance = {}
        for feature in future_features + historical_features:
            if feature in train_data.columns:
                try:
                    x = train_data[feature].values
                    y = train_data[target_col].values
                    if np.std(x) == 0 or np.std(y) == 0:
                        feature_importance[feature] = 0.0
                    else:
                        corr = np.corrcoef(x, y)[0, 1]
                        feature_importance[feature] = round(np.abs(corr), 2) if not np.isnan(corr) else 0.0
                except Exception:
                    feature_importance[feature] = 0.0
        sorted_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
        return sorted_features[:top_k]

    top_features = get_top_correlation_features(
        train_data=df,
        target_col='y',
        future_features=h_exog_vars,
        historical_features=f_exog_vars,
        top_k=5
    )

    return final_regression_result, top_features

In [4]:
if __name__ == "__main__":
    # Load data
    
    classification_df = pd.read_excel('classification_result_shiharadata.xlsx')
    df = pd.read_excel('shihara_processed_data.xlsx')
    balance_pred_df = pd.read_excel('forecast.xlsx')
    
    # Ensure dates are parsed
    df['date'] = pd.to_datetime(df['date'])
    balance_pred_df['ds'] = pd.to_datetime("2025-01-02") + pd.to_timedelta(balance_pred_df['Day'] - 1, unit='D')
    classification_df['ds'] = pd.to_datetime(classification_df['ds'])

    


    best_params = tune_prediction_model(
    df=df,
    horizon=30,
    n_trials=7,
    
    )

    final_df,top_feats= train_and_predict_payment(
        df=df,
        best_params=best_params,
        horizon=30,
        balance_pred_df=balance_pred_df,
        classification_results_df=classification_df
    )

[I 2025-07-12 11:43:27,063] A new study created in memory with name: no-name-24eda479-dc12-4ade-abe1-ac99b1e599eb
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.6 M 
-----------------------------------------------
3.6 M     Trainable params
3.5 K     Non-trainable params
3.6 M     Total params
14.593    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=200` reached.
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:44:51,533] Trial 0 finished with value: 257.33853059895836 and parameters: {'input_size': 28, 'dropout_prob_theta': 0.15453335388995676, 'max_steps': 200}. Best is trial 0 with value: 257.33853059895836.
Seed set to 1


Trial 0: input_size=28, dropout=0.15, steps=200, MAE=257.3385


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.9 M 
-----------------------------------------------
3.9 M     Trainable params
4.0 K     Non-trainable params
3.9 M     Total params
15.426    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:45:26,381] Trial 1 finished with value: 328.9357761332194 and parameters: {'input_size': 35, 'dropout_prob_theta': 0.18866214153498523, 'max_steps': 100}. Best is trial 0 with value: 257.33853059895836.
Seed set to 1


Trial 1: input_size=35, dropout=0.19, steps=100, MAE=328.9358


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 4.1 M 
-----------------------------------------------
4.1 M     Trainable params
4.4 K     Non-trainable params
4.1 M     Total params
16.260    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=100` reached.
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:45:40,001] Trial 2 finished with value: 329.4515959370931 and parameters: {'input_size': 42, 'dropout_prob_theta': 0.29575054557457847, 'max_steps': 100}. Best is trial 0 with value: 257.33853059895836.
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.4 M 
-----------------------------------------------
3.4 M     Trainable params
3.1 K     Non-trainable params
3.4 M     Total params
13.760    Total estimated model params size (MB)


Trial 2: input_size=42, dropout=0.30, steps=100, MAE=329.4516


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:46:21,474] Trial 3 finished with value: 294.0886486002604 and parameters: {'input_size': 21, 'dropout_prob_theta': 0.24872995578278637, 'max_steps': 200}. Best is trial 0 with value: 257.33853059895836.
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.9 M 
-----------------------------------------------
3.9 M     Trainable params
4.0 K     Non-trainable params
3.9 M     Total params
15.426    Total estimated model params size (MB)


Trial 3: input_size=21, dropout=0.25, steps=200, MAE=294.0886


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=200` reached.
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:46:43,379] Trial 4 finished with value: 278.4719651489258 and parameters: {'input_size': 35, 'dropout_prob_theta': 0.16725657328426152, 'max_steps': 200}. Best is trial 0 with value: 257.33853059895836.
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.9 M 
-----------------------------------------------
3.9 M     Trainable params
4.0 K     Non-trainable params
3.9 M     Total params
15.426    Total estimated model params size (MB)


Trial 4: input_size=35, dropout=0.17, steps=200, MAE=278.4720


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=300` reached.
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:48:24,255] Trial 5 finished with value: 209.62702343750001 and parameters: {'input_size': 35, 'dropout_prob_theta': 0.12490567664577282, 'max_steps': 300}. Best is trial 5 with value: 209.62702343750001.
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 4.1 M 
-----------------------------------------------
4.1 M     Trainable params
4.4 K     Non-trainable params
4.1 M     Total params
16.260    Total estimated model params size (MB)


Trial 5: input_size=35, dropout=0.12, steps=300, MAE=209.6270


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2025-07-12 11:49:43,676] Trial 6 finished with value: 279.9090801188151 and parameters: {'input_size': 42, 'dropout_prob_theta': 0.18429817045350427, 'max_steps': 250}. Best is trial 5 with value: 209.62702343750001.
Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.9 M 
-----------------------------------------------
3.9 M     Trainable params
4.0 K     Non-trainable params
3.9 M     Total params
15.426    Total estimated model params size (MB)


Trial 6: input_size=42, dropout=0.18, steps=250, MAE=279.9091
Best trial: FrozenTrial(number=5, state=1, values=[209.62702343750001], datetime_start=datetime.datetime(2025, 7, 12, 11, 46, 43, 379465), datetime_complete=datetime.datetime(2025, 7, 12, 11, 48, 24, 255002), params={'input_size': 35, 'dropout_prob_theta': 0.12490567664577282, 'max_steps': 300}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'input_size': IntDistribution(high=42, log=False, low=14, step=7), 'dropout_prob_theta': FloatDistribution(high=0.3, log=False, low=0.1, step=None), 'max_steps': IntDistribution(high=300, log=False, low=100, step=50)}, trial_id=5, value=None)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

ValueError: Found null values in `futr_df`

In [12]:
final_df,top_feats= train_and_predict_payment(
        df=df,
        best_params=best_params,
        horizon=30,
        balance_pred_df=balance_pred_df,
        classification_results_df=classification_df
    )

  future_df['balance'] = future_df['balance'].fillna(method='ffill').fillna(0)
Seed set to 1


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 3     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks       | ModuleList    | 3.9 M 
-----------------------------------------------
3.9 M     Trainable params
4.0 K     Non-trainable params
3.9 M     Total params
15.426    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [13]:
final_df.to_excel('final_shiharadata_payments.xlsx', index=False)
print("Saved predictions to final_payment_predictions.xlsx")

Saved predictions to final_payment_predictions.xlsx


In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_prediction_results(df, final_df):
    # Ensure date types are correct
    df['date'] = pd.to_datetime(df['date'])
    final_df['ds'] = pd.to_datetime(final_df['ds'])

    # Filter ground truth (actuals) for prediction period (next 30 days)
    start_date = final_df['ds'].min()
    end_date = final_df['ds'].max()
    actual_df = df[(df['date'] >= start_date) & (df['date'] <= end_date)].copy()

    # Filter only days where payment was actually made (expenses > 0)
    actual_expenses = actual_df[actual_df['payment'] > 0].copy()

    # Merge with predictions
    merged = pd.merge(
        actual_expenses[['date', 'payment']],
        final_df[['ds', 'NBEATSx-median']],
        left_on='date',
        right_on='ds',
        how='inner'
    )

    # Drop rows with missing predictions
    merged = merged.dropna()

    # Rename for clarity
    y_true = merged['payment'].values
    y_pred = merged['NBEATSx-median'].values

    # Compute metrics
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-6))) * 100  # avoid division by zero
    r2 = r2_score(y_true, y_pred)

    # Print results
    print("---- Evaluation on Actual Expense Days (Payment > 0) ----")
    print(f"MAE:  {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"R²:   {r2:.4f}")

    return {
        'mae': mae,
        'rmse': rmse,
        'mape': mape,
        'r2': r2
    }

# Usage
metrics = evaluate_prediction_results(df, final_df)

---- Evaluation on Actual Expense Days (Payment > 0) ----
MAE:  1455.5006
RMSE: 1628.6489
MAPE: 96.81%
R²:   -1.8524
