In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/Data_Klaim.csv
/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/sample_submission.csv
/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/Data_Polis.csv


# DATA FOUNDATION

In [2]:
# ============================================================
# STAGE 1 v5 — CLEAN MINIMAL FOUNDATION
# Payment/Service switch • No lag drop • No exposure drift
# ============================================================

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

BASE_PATH = "/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/"

klaim = pd.read_csv(BASE_PATH + "Data_Klaim.csv")
polis = pd.read_csv(BASE_PATH + "Data_Polis.csv")

# =============================
# CLEAN COLUMNS
# =============================

def clean_columns(df):
    df = df.copy()
    df.columns = (
        df.columns
        .str.strip()
        .str.lower()
        .str.replace(" ", "_", regex=False)
        .str.replace("/", "_", regex=False)
        .str.replace("-", "_", regex=False)
    )
    return df

klaim = clean_columns(klaim)
polis = clean_columns(polis)

klaim = klaim.drop_duplicates().reset_index(drop=True)
polis = polis.drop_duplicates().reset_index(drop=True)

# =============================
# DATE PARSING
# =============================

for col in klaim.columns:
    if "tanggal" in col:
        klaim[col] = pd.to_datetime(klaim[col], errors="coerce")

for col in polis.columns:
    if "tanggal" in col:
        polis[col] = pd.to_datetime(polis[col], errors="coerce")

# =============================
# BASIC CLEANING
# =============================

klaim = klaim.dropna(subset=["nomor_polis"])
klaim["nominal_klaim_yang_disetujui"] = klaim["nominal_klaim_yang_disetujui"].fillna(0)

df = klaim.merge(polis, on="nomor_polis", how="left")

# =============================
# MONTH AXIS (VERY IMPORTANT)
# =============================

MONTH_TYPE = "PAYMENT"   # <<< GANTI KE PAYMENT

if MONTH_TYPE == "SERVICE":
    month_col = "tanggal_pasien_masuk_rs"
elif MONTH_TYPE == "PAYMENT":
    month_col = "tanggal_pembayaran_klaim"
else:
    raise ValueError("MONTH_TYPE must be SERVICE or PAYMENT")

df = df.dropna(subset=[month_col]).copy()
df["year_month"] = df[month_col].dt.to_period("M")

# =============================
# MONTHLY AGGREGATION
# =============================

monthly = (
    df.groupby("year_month")
      .agg(
          frequency=("claim_id","count"),
          total_claim=("nominal_klaim_yang_disetujui","sum")
      )
      .reset_index()
      .sort_values("year_month")
      .reset_index(drop=True)
)

monthly["severity"] = (
    monthly["total_claim"] /
    monthly["frequency"].replace(0,np.nan)
)

print("Month type:", MONTH_TYPE)
print("Monthly shape:", monthly.shape)
print("Unique months:", monthly["year_month"].nunique())
print("\nSTAGE 1 v5 READY")


Month type: PAYMENT
Monthly shape: (24, 4)
Unique months: 24

STAGE 1 v5 READY


In [3]:
# ============================================================
# STAGE 1 v4 — USE PAYMENT DATE (CRITICAL TEST)
# ============================================================

df["year_month"] = df["tanggal_pembayaran_klaim"].dt.to_period("M")

monthly = (
    df.groupby("year_month")
      .agg(
          frequency=("claim_id","count"),
          total_claim=("nominal_klaim_yang_disetujui","sum")
      )
      .reset_index()
      .sort_values("year_month")
      .reset_index(drop=True)
)

print(monthly)

   year_month  frequency   total_claim
0     2024-01          8  1.283162e+08
1     2024-02         92  2.684171e+09
2     2024-03         97  3.809944e+09
3     2024-04        221  9.281203e+09
4     2024-05        233  1.103847e+10
5     2024-06        221  1.127720e+10
6     2024-07        205  1.159773e+10
7     2024-08        285  1.895989e+10
8     2024-09        250  1.484250e+10
9     2024-10        242  1.114198e+10
10    2024-11        365  1.740396e+10
11    2024-12        295  1.409901e+10
12    2025-01        293  1.697253e+10
13    2025-02        183  9.559585e+09
14    2025-03        234  1.494105e+10
15    2025-04        184  7.538943e+09
16    2025-05        201  9.628068e+09
17    2025-06        204  1.617766e+10
18    2025-07        272  1.862361e+10
19    2025-08        245  1.546896e+10
20    2025-09        197  1.041073e+10
21    2025-10         58  4.900102e+09
22    2025-11          3  1.356322e+08
23    2025-12          2  1.366003e+08


# TIME-SERIES DATASET ENGINEERING

In [4]:
# ============================================================
# STAGE 2 v3 — STABLE SEGMENT SHARE PANEL
# Plan-Level Only • Share-Based • Short Series Safe
# ============================================================

import numpy as np
import pandas as pd

# ============================================================
# ENSURE PLAN CODE EXISTS
# ============================================================

if "plan_code" not in df.columns:
    df["plan_code"] = "UNKNOWN"

df["plan_code"] = df["plan_code"].fillna("UNKNOWN")

# ============================================================
# BUILD MONTHLY PLAN-LEVEL PANEL
# ============================================================

seg_monthly = (
    df.groupby(["year_month", "plan_code"])
      .agg(
          frequency=("claim_id","count"),
          total_claim=("nominal_klaim_yang_disetujui","sum")
      )
      .reset_index()
      .sort_values(["plan_code","year_month"])
      .reset_index(drop=True)
)

# ============================================================
# MERGE GLOBAL TOTAL (UNTUK SHARE)
# ============================================================

global_monthly = (
    df.groupby("year_month")
      .agg(total_global=("nominal_klaim_yang_disetujui","sum"))
      .reset_index()
)

seg_monthly = seg_monthly.merge(global_monthly, on="year_month", how="left")

# ============================================================
# SHARE OF TOTAL (STABLE TARGET)
# ============================================================

seg_monthly["share_total"] = (
    seg_monthly["total_claim"] /
    seg_monthly["total_global"].replace(0,np.nan)
)

# ============================================================
# SMOOTH SHARE (SHORT SERIES SAFE)
# ============================================================

seg_monthly["share_roll3"] = (
    seg_monthly.groupby("plan_code")["share_total"]
    .transform(lambda x: x.rolling(3, min_periods=1).mean())
)

# ============================================================
# LOG DOMAIN (OPTIONAL)
# ============================================================

seg_monthly["log_total"] = np.log1p(seg_monthly["total_claim"])
seg_monthly["log_freq"]  = np.log1p(seg_monthly["frequency"])

# ============================================================
# LAG FEATURES (SAFE)
# ============================================================

for col in ["log_total","log_freq","share_roll3"]:
    seg_monthly[f"{col}_lag1"] = \
        seg_monthly.groupby("plan_code")[col].shift(1)
    
    seg_monthly[f"{col}_lag2"] = \
        seg_monthly.groupby("plan_code")[col].shift(2)

# ============================================================
# DROP EARLY NA (MINIMAL LOSS)
# ============================================================

seg_model = seg_monthly.dropna().reset_index(drop=True)

# ============================================================
# FINAL CHECK
# ============================================================

print("SEGMENT PANEL SHAPE:", seg_model.shape)
print("Unique plan codes:", seg_model["plan_code"].nunique())
print("Months per plan (min):", 
      seg_model.groupby("plan_code")["year_month"].nunique().min())

print("\nSTAGE 2 v3 — STABLE SEGMENT SHARE PANEL READY")


SEGMENT PANEL SHAPE: (62, 15)
Unique plan codes: 3
Months per plan (min): 19

STAGE 2 v3 — STABLE SEGMENT SHARE PANEL READY


# MODEL DEVELOPMENT

In [5]:
# ============================================================
# STAGE 3 FINAL — SHORT SERIES SAFE VERSION
# SERVICE + DAMPED + ENSEMBLE (NO SEASONAL)
# ============================================================

import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings
warnings.filterwarnings("ignore")

def mape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

# ==============================
# BUILD MONTHLY (SERVICE)
# ==============================

df["year_month"] = df["tanggal_pasien_masuk_rs"].dt.to_period("M")

monthly = (
    df.groupby("year_month")
      .agg(
          frequency=("claim_id","count"),
          total_claim=("nominal_klaim_yang_disetujui","sum")
      )
      .reset_index()
      .sort_values("year_month")
      .reset_index(drop=True)
)

monthly["severity"] = (
    monthly["total_claim"] /
    monthly["frequency"].replace(0,np.nan)
)

monthly["log_freq"]  = np.log1p(monthly["frequency"])
monthly["log_sev"]   = np.log1p(monthly["severity"])
monthly["log_total"] = np.log1p(monthly["total_claim"])

# ==============================
# SPLIT
# ==============================

train = monthly.iloc[:-5].copy()
valid = monthly.iloc[-5:].copy()

H = len(valid)

# ==============================
# MODEL FREQUENCY
# ==============================

model_freq = ExponentialSmoothing(
    train["log_freq"],
    trend="add",
    damped_trend=True,
    seasonal=None,
    initialization_method="estimated"
).fit()

# ==============================
# MODEL SEVERITY
# ==============================

model_sev = ExponentialSmoothing(
    train["log_sev"],
    trend="add",
    damped_trend=True,
    seasonal=None,
    initialization_method="estimated"
).fit()

# ==============================
# MODEL TOTAL DIRECT
# ==============================

model_total = ExponentialSmoothing(
    train["log_total"],
    trend="add",
    damped_trend=True,
    seasonal=None,
    initialization_method="estimated"
).fit()

# ==============================
# FORECAST
# ==============================

freq_forecast = np.expm1(model_freq.forecast(H))
sev_forecast  = np.expm1(model_sev.forecast(H))

total_direct = np.expm1(model_total.forecast(H))
total_mul    = freq_forecast * sev_forecast

# ENSEMBLE
total_forecast = 0.6 * total_direct + 0.4 * total_mul

# ==============================
# RESULTS
# ==============================

print("\n==============================")
print("MAPE Frequency :", round(mape(valid["frequency"], freq_forecast),4))
print("MAPE Severity  :", round(mape(valid["severity"], sev_forecast),4))
print("MAPE Total     :", round(mape(valid["total_claim"], total_forecast),4))
print("Estimated Score:", round(np.mean([
    mape(valid["frequency"], freq_forecast),
    mape(valid["severity"], sev_forecast),
    mape(valid["total_claim"], total_forecast)
]),4))
print("==============================")



MAPE Frequency : 5.9765
MAPE Severity  : 4.5584
MAPE Total     : 6.9296
Estimated Score: 5.8215


# TOTAL CLAIM OPTIMIZATION & VALIDATION, OPTUNA

In [6]:
# ============================================================
# STAGE 4 PRO — FULL KAGGLE OPTIMIZATION
# Optimize FREQ + SEV + TOTAL simultaneously
# Objective = EXACT Kaggle Score
# ============================================================

!pip install -q optuna

import optuna
import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings
warnings.filterwarnings("ignore")

# ============================================================
# METRIC
# ============================================================

def mape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask]))

def kaggle_score(freq_t, freq_p,
                 sev_t, sev_p,
                 total_t, total_p):
    return (
        mape(freq_t, freq_p) +
        mape(sev_t, sev_p) +
        mape(total_t, total_p)
    ) / 3

# ============================================================
# BUILD MONTHLY (SERVICE)
# ============================================================

df["year_month"] = df["tanggal_pasien_masuk_rs"].dt.to_period("M")

monthly = (
    df.groupby("year_month")
      .agg(
          frequency=("claim_id","count"),
          total_claim=("nominal_klaim_yang_disetujui","sum")
      )
      .reset_index()
      .sort_values("year_month")
      .reset_index(drop=True)
)

monthly["severity"] = (
    monthly["total_claim"] /
    monthly["frequency"].replace(0,np.nan)
)

monthly["log_freq"]  = np.log1p(monthly["frequency"])
monthly["log_sev"]   = np.log1p(monthly["severity"])
monthly["log_total"] = np.log1p(monthly["total_claim"])

train_full = monthly.iloc[:-4].copy()
valid_full = monthly.iloc[-4:].copy()

H = 4

# ============================================================
# OBJECTIVE
# ============================================================

def objective(trial):

    alpha = trial.suggest_float("alpha", 0.2, 0.9)
    beta  = trial.suggest_float("beta", 0.01, 0.3)
    phi   = trial.suggest_float("phi", 0.8, 0.99)

    sim_df = train_full.copy()

    freq_preds = []
    sev_preds = []
    total_preds = []

    for step in range(H):

        try:
            model_freq = ExponentialSmoothing(
                sim_df["log_freq"],
                trend="add",
                damped_trend=True,
                seasonal=None
            ).fit(
                smoothing_level=alpha,
                smoothing_trend=beta,
                damping_trend=phi,
                optimized=False
            )

            model_sev = ExponentialSmoothing(
                sim_df["log_sev"],
                trend="add",
                damped_trend=True,
                seasonal=None
            ).fit(
                smoothing_level=alpha,
                smoothing_trend=beta,
                damping_trend=phi,
                optimized=False
            )

            model_total = ExponentialSmoothing(
                sim_df["log_total"],
                trend="add",
                damped_trend=True,
                seasonal=None
            ).fit(
                smoothing_level=alpha,
                smoothing_trend=beta,
                damping_trend=phi,
                optimized=False
            )

            freq_fc = np.expm1(model_freq.forecast(1).iloc[0])
            sev_fc  = np.expm1(model_sev.forecast(1).iloc[0])
            total_direct = np.expm1(model_total.forecast(1).iloc[0])

        except:
            freq_fc = sim_df["frequency"].iloc[-1]
            sev_fc  = sim_df["severity"].iloc[-1]
            total_direct = sim_df["total_claim"].iloc[-1]

        total_mul = freq_fc * sev_fc
        total_final = 0.6 * total_direct + 0.4 * total_mul

        freq_preds.append(freq_fc)
        sev_preds.append(sev_fc)
        total_preds.append(total_final)

        new_row = {
            "year_month": None,
            "frequency": freq_fc,
            "severity": sev_fc,
            "total_claim": total_final,
            "log_freq": np.log1p(freq_fc),
            "log_sev": np.log1p(sev_fc),
            "log_total": np.log1p(total_final)
        }

        sim_df = pd.concat([sim_df, pd.DataFrame([new_row])], ignore_index=True)

    return kaggle_score(
        valid_full["frequency"], freq_preds,
        valid_full["severity"], sev_preds,
        valid_full["total_claim"], total_preds
    )

# ============================================================
# RUN OPTUNA
# ============================================================

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=150)

print("\nBest Params:", study.best_params)
print("Best 4M Kaggle Score:", round(study.best_value*100,4), "%")

[I 2026-02-19 04:45:04,098] A new study created in memory with name: no-name-b01f34f0-3b68-450f-9246-369af22d298e
[I 2026-02-19 04:45:04,124] Trial 0 finished with value: 0.055617530858484455 and parameters: {'alpha': 0.21947321672839393, 'beta': 0.11842101343321475, 'phi': 0.9624862332605874}. Best is trial 0 with value: 0.055617530858484455.
[I 2026-02-19 04:45:04,148] Trial 1 finished with value: 0.14859624406176888 and parameters: {'alpha': 0.4800518454685475, 'beta': 0.21438197878908738, 'phi': 0.9782342800424122}. Best is trial 0 with value: 0.055617530858484455.
[I 2026-02-19 04:45:04,171] Trial 2 finished with value: 0.10378922405364226 and parameters: {'alpha': 0.4624178407183691, 'beta': 0.07929891197542623, 'phi': 0.9516008755023037}. Best is trial 0 with value: 0.055617530858484455.
[I 2026-02-19 04:45:04,193] Trial 3 finished with value: 0.11963702918890702 and parameters: {'alpha': 0.5051033714367781, 'beta': 0.056183909590657465, 'phi': 0.805297730179027}. Best is trial 


Best Params: {'alpha': 0.20110931604981408, 'beta': 0.10772698958842039, 'phi': 0.9784622797245897}
Best 4M Kaggle Score: 5.1947 %


# TEST PREDICTION & KAGGLE SUBMISSION

In [7]:
# ============================================================
# STAGE 5 FINAL PRO — OPTIMIZED + ENSEMBLE + RECURSIVE
# MATCH STAGE 4 PRO
# ============================================================

import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings
warnings.filterwarnings("ignore")

BASE_PATH = "/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/"
sample_sub = pd.read_csv(BASE_PATH + "sample_submission.csv")

# ==============================
# GANTI DENGAN PARAMETER TERBAIK DARI STAGE 4
# ==============================

BEST_PARAMS = {
    "alpha": 0.20,   # <-- ganti
    "beta": 0.15,    # <-- ganti
    "phi": 0.95      # <-- ganti
}

# ==============================
# BUILD MONTHLY (SERVICE)
# ==============================

df["year_month"] = df["tanggal_pasien_masuk_rs"].dt.to_period("M")

monthly = (
    df.groupby("year_month")
      .agg(
          frequency=("claim_id","count"),
          total_claim=("nominal_klaim_yang_disetujui","sum")
      )
      .reset_index()
      .sort_values("year_month")
      .reset_index(drop=True)
)

monthly["severity"] = (
    monthly["total_claim"] /
    monthly["frequency"].replace(0,np.nan)
)

monthly["log_freq"]  = np.log1p(monthly["frequency"])
monthly["log_sev"]   = np.log1p(monthly["severity"])
monthly["log_total"] = np.log1p(monthly["total_claim"])

# ==============================
# RECURSIVE FORECASTING
# ==============================

future_df = monthly.copy()
predictions = {}

sample_sub["year"]  = sample_sub["id"].str.split("_").str[0]
sample_sub["month"] = sample_sub["id"].str.split("_").str[1]
sample_sub["month_key"] = sample_sub["year"] + "-" + sample_sub["month"]

future_periods = (
    pd.PeriodIndex(sample_sub["month_key"], freq="M")
      .unique()
      .sort_values()
)

for period in future_periods:

    alpha = BEST_PARAMS["alpha"]
    beta  = BEST_PARAMS["beta"]
    phi   = BEST_PARAMS["phi"]

    # === FIT MODELS ===
    model_freq = ExponentialSmoothing(
        future_df["log_freq"],
        trend="add",
        damped_trend=True,
        seasonal=None
    ).fit(
        smoothing_level=alpha,
        smoothing_trend=beta,
        damping_trend=phi,
        optimized=False
    )

    model_sev = ExponentialSmoothing(
        future_df["log_sev"],
        trend="add",
        damped_trend=True,
        seasonal=None
    ).fit(
        smoothing_level=alpha,
        smoothing_trend=beta,
        damping_trend=phi,
        optimized=False
    )

    model_total = ExponentialSmoothing(
        future_df["log_total"],
        trend="add",
        damped_trend=True,
        seasonal=None
    ).fit(
        smoothing_level=alpha,
        smoothing_trend=beta,
        damping_trend=phi,
        optimized=False
    )

    # === FORECAST 1 STEP ===
    freq_fc = np.expm1(model_freq.forecast(1).iloc[0])
    sev_fc  = np.expm1(model_sev.forecast(1).iloc[0])
    total_direct = np.expm1(model_total.forecast(1).iloc[0])

    total_mul = freq_fc * sev_fc
    total_final = 0.6 * total_direct + 0.4 * total_mul

    # === SAVE PREDICTIONS ===
    key_total = f"{period.year}_{str(period.month).zfill(2)}_Total_Claim"
    key_freq  = f"{period.year}_{str(period.month).zfill(2)}_Claim_Frequency"
    key_sev   = f"{period.year}_{str(period.month).zfill(2)}_Claim_Severity"

    predictions[key_total] = total_final
    predictions[key_freq]  = freq_fc
    predictions[key_sev]   = sev_fc

    # === UPDATE FUTURE_DF (RECURSIVE) ===
    new_row = {
        "year_month": period,
        "frequency": freq_fc,
        "severity": sev_fc,
        "total_claim": total_final,
        "log_freq": np.log1p(freq_fc),
        "log_sev": np.log1p(sev_fc),
        "log_total": np.log1p(total_final)
    }

    future_df = pd.concat([future_df, pd.DataFrame([new_row])], ignore_index=True)

# ==============================
# BUILD SUBMISSION
# ==============================

submission = sample_sub.copy()
submission["value"] = submission["id"].map(predictions)
submission = submission[["id","value"]]

submission.to_csv("submission.csv", index=False)

print("Submission created — OPTIMIZED RECURSIVE ENSEMBLE VERSION")


Submission created — OPTIMIZED RECURSIVE ENSEMBLE VERSION


In [8]:
print(submission.head(9)) ## ganti payment buat validasi 

                        id         value
0  2025_08_Claim_Frequency  2.316477e+02
1   2025_08_Claim_Severity  5.284064e+07
2      2025_08_Total_Claim  1.224037e+10
3  2025_09_Claim_Frequency  2.314850e+02
4   2025_09_Claim_Severity  5.283280e+07
5      2025_09_Total_Claim  1.222997e+10
6  2025_10_Claim_Frequency  2.313305e+02
7   2025_10_Claim_Severity  5.282535e+07
8      2025_10_Total_Claim  1.222009e+10


In [9]:
import pandas as pd
import numpy as np

# =====================================
# BUILD BOTH AXIS
# =====================================

df["service_month"] = df["tanggal_pasien_masuk_rs"].dt.to_period("M")
df["payment_month"] = df["tanggal_pembayaran_klaim"].dt.to_period("M")

service_monthly = (
    df.groupby("service_month")
      .agg(freq=("claim_id","count"),
           total=("nominal_klaim_yang_disetujui","sum"))
      .reset_index()
      .sort_values("service_month")
)

payment_monthly = (
    df.groupby("payment_month")
      .agg(freq=("claim_id","count"),
           total=("nominal_klaim_yang_disetujui","sum"))
      .reset_index()
      .sort_values("payment_month")
)

print("===== BASIC INFO =====")
print("Service months:", service_monthly["service_month"].min(), "→", service_monthly["service_month"].max())
print("Payment months:", payment_monthly["payment_month"].min(), "→", payment_monthly["payment_month"].max())
print()

print("Total months (Service):", len(service_monthly))
print("Total months (Payment):", len(payment_monthly))
print()

# =====================================
# TAIL CHECK
# =====================================

print("===== LAST 6 MONTHS (SERVICE) =====")
print(service_monthly.tail(6))
print()

print("===== LAST 6 MONTHS (PAYMENT) =====")
print(payment_monthly.tail(6))
print()

# =====================================
# VOLATILITY CHECK
# =====================================

print("===== VOLATILITY =====")
print("Service freq std/mean:", 
      service_monthly["freq"].std() / service_monthly["freq"].mean())

print("Payment freq std/mean:", 
      payment_monthly["freq"].std() / payment_monthly["freq"].mean())
print()

# =====================================
# SEASONAL CHECK (AVG PER MONTH)
# =====================================

payment_monthly["month"] = payment_monthly["payment_month"].dt.month

seasonal = payment_monthly.groupby("month")["total"].mean()

print("===== AVG TOTAL PER CALENDAR MONTH =====")
print(seasonal.sort_values())


===== BASIC INFO =====
Service months: 2024-01 → 2025-07
Payment months: 2024-01 → 2025-12

Total months (Service): 19
Total months (Payment): 24

===== LAST 6 MONTHS (SERVICE) =====
   service_month  freq         total
13       2025-02   246  1.748054e+10
14       2025-03   230  1.367924e+10
15       2025-04   207  1.090143e+10
16       2025-05   234  1.190261e+10
17       2025-06   227  1.231230e+10
18       2025-07   260  1.322321e+10

===== LAST 6 MONTHS (PAYMENT) =====
   payment_month  freq         total
18       2025-07   272  1.862361e+10
19       2025-08   245  1.546896e+10
20       2025-09   197  1.041073e+10
21       2025-10    58  4.900102e+09
22       2025-11     3  1.356322e+08
23       2025-12     2  1.366003e+08

===== VOLATILITY =====
Service freq std/mean: 0.10975628930790748
Payment freq std/mean: 0.5146225591902012

===== AVG TOTAL PER CALENDAR MONTH =====
month
2     6.121878e+09
12    7.117807e+09
10    8.021042e+09
4     8.410073e+09
1     8.550421e+09
11    8.76