In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/Data_Klaim.csv
/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/sample_submission.csv
/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/Data_Polis.csv


# DATA FOUNDATION

In [2]:
# ============================================================
# STAGE 1 — DATA FOUNDATION (ROBUST KAGGLE VERSION)
# ============================================================

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# ============================================================
# 1. LOAD DATA (KAGGLE SAFE PATH)
# ============================================================

KLAIM_PATH = "/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/Data_Klaim.csv"
POLIS_PATH = "/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/Data_Polis.csv"

klaim = pd.read_csv(KLAIM_PATH)
polis = pd.read_csv(POLIS_PATH)

print("Initial Klaim shape :", klaim.shape)
print("Initial Polis shape :", polis.shape)

# ============================================================
# 2. STANDARDIZE COLUMN NAMES
# ============================================================

def clean_columns(df):
    df.columns = (
        df.columns
        .str.strip()
        .str.lower()
        .str.replace(" ", "_", regex=False)
        .str.replace("/", "_", regex=False)
        .str.replace("-", "_", regex=False)
    )
    return df

klaim = clean_columns(klaim)
polis = clean_columns(polis)

# ============================================================
# 3. REMOVE DUPLICATES
# ============================================================

klaim = klaim.drop_duplicates().reset_index(drop=True)
polis = polis.drop_duplicates().reset_index(drop=True)

# ============================================================
# 4. CONVERT DATE COLUMNS (ONLY IF EXISTS)
# ============================================================

date_columns = [
    "tanggal_pembayaran_klaim",
    "tanggal_pasien_masuk_rs",
    "tanggal_pasien_keluar_rs",
    "tanggal_lahir",
    "tanggal_efektif_polis"
]

for col in date_columns:
    if col in klaim.columns:
        klaim[col] = pd.to_datetime(klaim[col], errors="coerce")
    if col in polis.columns:
        polis[col] = pd.to_datetime(polis[col], errors="coerce")

# ============================================================
# 5. CLEAN INVALID DATA (SAFE CHECK)
# ============================================================

if "nomor_polis" in klaim.columns:
    klaim = klaim.dropna(subset=["nomor_polis"])

if "tanggal_pembayaran_klaim" in klaim.columns:
    klaim = klaim.dropna(subset=["tanggal_pembayaran_klaim"])

if "nominal_klaim_yang_disetujui" in klaim.columns:
    klaim = klaim[klaim["nominal_klaim_yang_disetujui"] > 0]

# Valid hospitalization dates
if (
    "tanggal_pasien_masuk_rs" in klaim.columns and
    "tanggal_pasien_keluar_rs" in klaim.columns
):
    klaim = klaim[
        klaim["tanggal_pasien_keluar_rs"] >=
        klaim["tanggal_pasien_masuk_rs"]
    ]

print("Klaim shape after strict cleaning :", klaim.shape)

# ============================================================
# 6. MERGE POLIS + KLAIM
# ============================================================

if "nomor_polis" in klaim.columns and "nomor_polis" in polis.columns:
    df = klaim.merge(polis, on="nomor_polis", how="left")
else:
    df = klaim.copy()

print("Merged shape :", df.shape)

# ============================================================
# 7. BASIC FEATURE ENGINEERING (SAFE)
# ============================================================

# Age
if "tanggal_pasien_masuk_rs" in df.columns and "tanggal_lahir" in df.columns:
    df["age"] = (
        (df["tanggal_pasien_masuk_rs"] - df["tanggal_lahir"]).dt.days / 365
    )

# Tenure
if "tanggal_pasien_masuk_rs" in df.columns and "tanggal_efektif_polis" in df.columns:
    df["tenure_days"] = (
        (df["tanggal_pasien_masuk_rs"] - df["tanggal_efektif_polis"]).dt.days
    )

# Length of Stay
if "tanggal_pasien_keluar_rs" in df.columns and "tanggal_pasien_masuk_rs" in df.columns:
    df["los"] = (
        (df["tanggal_pasien_keluar_rs"] - df["tanggal_pasien_masuk_rs"]).dt.days
    )

# Claim Ratio
if (
    "nominal_klaim_yang_disetujui" in df.columns and
    "nominal_biaya_rs_yang_terjadi" in df.columns
):
    df["claim_ratio"] = (
        df["nominal_klaim_yang_disetujui"] /
        df["nominal_biaya_rs_yang_terjadi"]
    )

# Year-Month
if "tanggal_pembayaran_klaim" in df.columns:
    df["year_month"] = df["tanggal_pembayaran_klaim"].dt.to_period("M")

# ============================================================
# 8. FINAL VALIDATION CHECK
# ============================================================

print("\nData Info:")
print(df.info())

print("\nMissing Values (Top 10):")
print(df.isna().sum().sort_values(ascending=False).head(10))

if "year_month" in df.columns:
    print("\nUnique Months:", df["year_month"].nunique())

print("\nSTAGE 1 COMPLETE — SAFE & READY FOR STAGE 2")


Initial Klaim shape : (4627, 13)
Initial Polis shape : (4096, 6)
Klaim shape after strict cleaning : (4579, 13)
Merged shape : (4579, 18)

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4579 entries, 0 to 4578
Data columns (total 23 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   claim_id                       4579 non-null   object        
 1   nomor_polis                    4579 non-null   object        
 2   reimburse_cashless             4579 non-null   object        
 3   inpatient_outpatient           4544 non-null   object        
 4   icd_diagnosis                  4575 non-null   object        
 5   icd_description                4575 non-null   object        
 6   status_klaim                   4579 non-null   object        
 7   tanggal_pembayaran_klaim       4579 non-null   datetime64[ns]
 8   tanggal_pasien_masuk_rs        4579 non-null   datetime64[ns]
 9   t

# TIME-SERIES DATASET ENGINEERING

In [3]:
# ============================================================
# STAGE 2 — TIME-SERIES DATASET ENGINEERING (LEAKAGE SAFE)
# ============================================================

import pandas as pd
import numpy as np

# ============================================================
# 1. AGREGASI BULANAN (SAFE CHECK)
# ============================================================

agg_dict = {
    "nominal_klaim_yang_disetujui": "sum"
}

if "claim_id" in df.columns:
    agg_dict["claim_id"] = "count"

if "age" in df.columns:
    agg_dict["age"] = "mean"

if "tenure_days" in df.columns:
    agg_dict["tenure_days"] = "mean"

if "los" in df.columns:
    agg_dict["los"] = "mean"

if "claim_ratio" in df.columns:
    agg_dict["claim_ratio"] = "mean"

monthly = (
    df.groupby("year_month")
      .agg(agg_dict)
      .reset_index()
)

# Rename safely
if "claim_id" in monthly.columns:
    monthly = monthly.rename(columns={"claim_id": "frequency"})
else:
    monthly["frequency"] = (
        df.groupby("year_month").size().values
    )

monthly = monthly.rename(
    columns={
        "nominal_klaim_yang_disetujui": "total_claim",
        "age": "avg_age",
        "tenure_days": "avg_tenure",
        "los": "avg_los",
        "claim_ratio": "avg_claim_ratio"
    }
)

# ============================================================
# 2. SEVERITY
# ============================================================

monthly["severity"] = monthly["total_claim"] / monthly["frequency"]

# ============================================================
# 3. SORT & TIME FEATURES
# ============================================================

monthly = monthly.sort_values("year_month").reset_index(drop=True)
monthly["year_month_dt"] = monthly["year_month"].dt.to_timestamp()

monthly["month_index"] = np.arange(len(monthly))
monthly["month"] = monthly["year_month_dt"].dt.month

# Optional: cyclic seasonality (lebih kuat)
monthly["month_sin"] = np.sin(2 * np.pi * monthly["month"] / 12)
monthly["month_cos"] = np.cos(2 * np.pi * monthly["month"] / 12)

# ============================================================
# 4. LAG FEATURES
# ============================================================

monthly["freq_lag1"] = monthly["frequency"].shift(1)
monthly["freq_lag2"] = monthly["frequency"].shift(2)
monthly["freq_lag3"] = monthly["frequency"].shift(3)

monthly["sev_lag1"] = monthly["severity"].shift(1)
monthly["sev_lag2"] = monthly["severity"].shift(2)

monthly["total_lag1"] = monthly["total_claim"].shift(1)

# ============================================================
# 5. ROLLING FEATURES (LEAKAGE SAFE)
# ============================================================

monthly["freq_roll3"] = (
    monthly["frequency"].shift(1).rolling(3).mean()
)

monthly["freq_roll6"] = (
    monthly["frequency"].shift(1).rolling(6).mean()
)

monthly["sev_roll3"] = (
    monthly["severity"].shift(1).rolling(3).mean()
)

monthly["total_roll3"] = (
    monthly["total_claim"].shift(1).rolling(3).mean()
)

# ============================================================
# 6. GROWTH FEATURES (LEAKAGE SAFE)
# ============================================================

monthly["freq_growth"] = (
    monthly["frequency"].pct_change().shift(1)
)

monthly["sev_growth"] = (
    monthly["severity"].pct_change().shift(1)
)

monthly["total_growth"] = (
    monthly["total_claim"].pct_change().shift(1)
)

# ============================================================
# 7. DROP NaN FROM LAG/ROLLING
# ============================================================

monthly = monthly.dropna().reset_index(drop=True)

# ============================================================
# 8. FINAL CHECK
# ============================================================

print("Final Monthly Dataset Shape:", monthly.shape)

print("\nColumns:")
print(monthly.columns.tolist())

print("\nPreview:")
print(monthly.head())

print("\nTime Range:")
print(monthly["year_month"].min(), "to", monthly["year_month"].max())

print("\nSTAGE 2 COMPLETE — LEAKAGE SAFE & READY FOR MODEL DEVELOPMENT")


Final Monthly Dataset Shape: (18, 26)

Columns:
['year_month', 'total_claim', 'frequency', 'avg_age', 'avg_tenure', 'avg_los', 'avg_claim_ratio', 'severity', 'year_month_dt', 'month_index', 'month', 'month_sin', 'month_cos', 'freq_lag1', 'freq_lag2', 'freq_lag3', 'sev_lag1', 'sev_lag2', 'total_lag1', 'freq_roll3', 'freq_roll6', 'sev_roll3', 'total_roll3', 'freq_growth', 'sev_growth', 'total_growth']

Preview:
  year_month   total_claim  frequency    avg_age    avg_tenure   avg_los  \
0    2024-07  1.159773e+10        202  54.335874  19832.594059  1.608911   
1    2024-08  1.895989e+10        283  54.423322  19864.512367  1.572438   
2    2024-09  1.484250e+10        248  54.521310  19900.278226  1.487903   
3    2024-10  1.114198e+10        241  54.611107  19933.053942  1.132780   
4    2024-11  1.740396e+10        365  54.719670  19972.679452  1.117808   

   avg_claim_ratio      severity year_month_dt  month_index  ...  \
0         0.899471  5.741451e+07    2024-07-01            6  .

# MODEL DEVELOPMENT

In [5]:
# ============================================================
# STAGE 3 — OPTIMIZED MODEL DEVELOPMENT (REVISED SAFE)
# ============================================================

import numpy as np
import pandas as pd
import lightgbm as lgb
from catboost import CatBoostRegressor

# ============================================================
# 1. TIME-BASED SPLIT
# ============================================================

train = monthly[monthly["year_month"] < "2025-04"].copy()
valid = monthly[(monthly["year_month"] >= "2025-04") & 
                (monthly["year_month"] < "2025-08")].copy()

print("Train size:", len(train))
print("Valid size:", len(valid))

if len(valid) == 0:
    raise ValueError("Validation set kosong. Cek rentang tanggal.")

# ============================================================
# 2. FEATURE SET (LEAKAGE SAFE + SEASONAL)
# ============================================================

features = [
    "month_index",
    "month",
    "month_sin",
    "month_cos",
    "freq_lag1",
    "freq_lag2",
    "sev_lag1",
    "total_lag1",
    "freq_roll3",
    "sev_roll3"
]

# Pastikan fitur ada
features = [f for f in features if f in monthly.columns]

X_train = train[features]
X_valid = valid[features]

# ============================================================
# 3. MODEL FREQUENCY (LOG1P)
# ============================================================

y_train_freq = np.log1p(train["frequency"])
y_valid_freq = valid["frequency"]

mmodel_freq = lgb.LGBMRegressor(
    objective="regression",
    n_estimators=200,
    learning_rate=0.05,
    num_leaves=7,
    max_depth=3,
    min_child_samples=2,
    random_state=42
)

model_freq.fit(
    X_train,
    y_train_freq,
    eval_set=[(X_valid, np.log1p(y_valid_freq))],
    eval_metric="l2"
)

freq_pred_valid = np.expm1(model_freq.predict(X_valid))
freq_pred_valid = np.clip(freq_pred_valid, 1, None)

# ============================================================
# 4. MODEL SEVERITY (LOG1P)
# ============================================================

y_train_sev = np.log1p(train["severity"])
y_valid_sev = valid["severity"]

model_sev = CatBoostRegressor(
    iterations=200,
    learning_rate=0.05,
    depth=3,
    l2_leaf_reg=5,
    random_seed=42,
    verbose=False
)

model_sev.fit(X_train, y_train_sev)

sev_pred_valid = np.expm1(model_sev.predict(X_valid))
sev_pred_valid = np.clip(sev_pred_valid, 1, None)

# ============================================================
# 5. TOTAL CLAIM (ACTUARIAL)
# ============================================================

total_pred_valid = freq_pred_valid * sev_pred_valid

# ============================================================
# 6. SAFE MAPE
# ============================================================

def safe_mape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask]))

mape_freq = safe_mape(valid["frequency"], freq_pred_valid)
mape_sev = safe_mape(valid["severity"], sev_pred_valid)
mape_total = safe_mape(valid["total_claim"], total_pred_valid)

final_score = (mape_freq + mape_sev + mape_total) / 3

print("\nMAPE Frequency :", round(mape_freq, 4))
print("MAPE Severity  :", round(mape_sev, 4))
print("MAPE Total     :", round(mape_total, 4))
print("Final Score    :", round(final_score, 4))

print("\nSTAGE 3 COMPLETE — STABLE VERSION")


Train size: 9
Valid size: 4
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001027 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 9, number of used features: 10
[LightGBM] [Info] Start training from score 5.546629

MAPE Frequency : 0.1573
MAPE Severity  : 0.2643
MAPE Total     : 0.4501
Final Score    : 0.2906

STAGE 3 COMPLETE — STABLE VERSION


# TOTAL CLAIM OPTIMIZATION & VALIDATION

In [6]:
# ============================================================
# STAGE 4 — TOTAL CLAIM OPTIMIZATION & VALIDATION (REVISED)
# ============================================================

import numpy as np
import lightgbm as lgb

# ============================================================
# 1. SAFE MAPE (SAME AS STAGE 3)
# ============================================================

def safe_mape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask]))

# ============================================================
# 2. ACTUARIAL APPROACH
# ============================================================

total_pred_valid_actuarial = freq_pred_valid * sev_pred_valid

# ============================================================
# 3. DIRECT TOTAL MODEL (TWEEDIE)
# ============================================================

features = [
    "month_index",
    "month",
    "month_sin",
    "month_cos",
    "freq_lag1",
    "freq_lag2",
    "sev_lag1",
    "total_lag1",
    "freq_roll3",
    "sev_roll3"
]

features = [f for f in features if f in monthly.columns]

X_train = train[features]
X_valid = valid[features]

y_train_total = train["total_claim"]
y_valid_total = valid["total_claim"]

model_total = lgb.LGBMRegressor(
    objective="tweedie",
    tweedie_variance_power=1.5,
    n_estimators=200,
    learning_rate=0.05,
    num_leaves=7,
    max_depth=3,
    min_child_samples=2,
    random_state=42
)

model_total.fit(X_train, y_train_total)

total_pred_valid_tweedie = model_total.predict(X_valid)
total_pred_valid_tweedie = np.clip(total_pred_valid_tweedie, 1, None)

# ============================================================
# 4. BLEND MODEL (STABILIZER)
# ============================================================

total_pred_valid_blend = (
    0.6 * total_pred_valid_actuarial +
    0.4 * total_pred_valid_tweedie
)

# ============================================================
# 5. EVALUATION
# ============================================================

mape_total_actuarial = safe_mape(y_valid_total, total_pred_valid_actuarial)
mape_total_tweedie   = safe_mape(y_valid_total, total_pred_valid_tweedie)
mape_total_blend     = safe_mape(y_valid_total, total_pred_valid_blend)

print("\nMAPE Total (Actuarial) :", round(mape_total_actuarial, 4))
print("MAPE Total (Tweedie)   :", round(mape_total_tweedie, 4))
print("MAPE Total (Blend)     :", round(mape_total_blend, 4))

# ============================================================
# 6. SELECT BEST METHOD
# ============================================================

mape_dict = {
    "Actuarial": mape_total_actuarial,
    "Tweedie": mape_total_tweedie,
    "Blend": mape_total_blend
}

best_method = min(mape_dict, key=mape_dict.get)

if best_method == "Actuarial":
    best_total_pred_valid = total_pred_valid_actuarial
elif best_method == "Tweedie":
    best_total_pred_valid = total_pred_valid_tweedie
else:
    best_total_pred_valid = total_pred_valid_blend

print("\nBest Total Method:", best_method)

# ============================================================
# 7. FINAL COMPETITION SCORE
# ============================================================

mape_freq_final = safe_mape(valid["frequency"], freq_pred_valid)
mape_sev_final  = safe_mape(valid["severity"], sev_pred_valid)
mape_total_final = safe_mape(y_valid_total, best_total_pred_valid)

final_score = (mape_freq_final + mape_sev_final + mape_total_final) / 3

print("\nFinal Competition Score")
print("MAPE Frequency :", round(mape_freq_final, 4))
print("MAPE Severity  :", round(mape_sev_final, 4))
print("MAPE Total     :", round(mape_total_final, 4))
print("Final Score    :", round(final_score, 4))

print("\nSTAGE 4 COMPLETE — OPTIMAL METHOD SELECTED")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40
[LightGBM] [Info] Number of data points in the train set: 9, number of used features: 10
[LightGBM] [Info] Start training from score 23.389863

MAPE Total (Actuarial) : 0.4501
MAPE Total (Tweedie)   : 0.419
MAPE Total (Blend)     : 0.4377

Best Total Method: Tweedie

Final Competition Score
MAPE Frequency : 0.1573
MAPE Severity  : 0.2643
MAPE Total     : 0.419
Final Score    : 0.2802

STAGE 4 COMPLETE — OPTIMAL METHOD SELECTED


# TEST PREDICTION & KAGGLE SUBMISSION

In [7]:
import pandas as pd
import numpy as np

# ============================================================
# LOAD SAMPLE SUBMISSION
# ============================================================

BASE_PATH = "/kaggle/input/datasets/dimaspashaakrilian/dsc-itb/"
sample_sub = pd.read_csv(BASE_PATH + "sample_submission.csv")

# ============================================================
# EXTRACT FUTURE MONTHS FROM SAMPLE
# ============================================================

sample_sub["year"]  = sample_sub["id"].str.split("_").str[0]
sample_sub["month"] = sample_sub["id"].str.split("_").str[1]

sample_sub["month_key"] = sample_sub["year"] + "_" + sample_sub["month"]

unique_months = sample_sub["month_key"].unique()

future_df = monthly.copy().sort_values("year_month").reset_index(drop=True)

predictions = {}

# ============================================================
# FEATURE LIST (SYNC WITH STAGE 3 & 4)
# ============================================================

features = [
    "month_index",
    "month",
    "month_sin",
    "month_cos",
    "freq_lag1",
    "freq_lag2",
    "sev_lag1",
    "total_lag1",
    "freq_roll3",
    "sev_roll3"
]

features = [f for f in features if f in monthly.columns]

# ============================================================
# RECURSIVE FORECAST
# ============================================================

for m in unique_months:

    year, month = m.split("_")
    period = pd.Period(f"{year}-{month}", freq="M")

    last_row = future_df.iloc[-1]

    new_row = {}
    new_row["year_month"] = period
    new_row["year_month_dt"] = period.to_timestamp()
    new_row["month_index"] = last_row["month_index"] + 1
    new_row["month"] = int(month)

    # Seasonality
    new_row["month_sin"] = np.sin(2 * np.pi * int(month) / 12)
    new_row["month_cos"] = np.cos(2 * np.pi * int(month) / 12)

    # Lag features
    new_row["freq_lag1"] = last_row["frequency"]
    new_row["freq_lag2"] = future_df.iloc[-2]["frequency"] if len(future_df) > 1 else last_row["frequency"]
    new_row["sev_lag1"] = last_row["severity"]
    new_row["total_lag1"] = last_row["total_claim"]

    new_row["freq_roll3"] = future_df["frequency"].tail(3).mean()
    new_row["sev_roll3"]  = future_df["severity"].tail(3).mean()

    temp = pd.DataFrame([new_row])
    X_temp = temp[features]

    # ===============================
    # PREDICT (USE EXPM1 NOT EXP)
    # ===============================

    freq_pred = np.expm1(model_freq.predict(X_temp))[0]
    sev_pred  = np.expm1(model_sev.predict(X_temp))[0]

    freq_pred = max(freq_pred, 1)
    sev_pred  = max(sev_pred, 1)

    total_actuarial = freq_pred * sev_pred

    # Optional: if Stage 4 selected Tweedie or Blend
    if "best_method" in globals():
        if best_method == "Tweedie":
            total_pred = max(model_total.predict(X_temp)[0], 1)
        elif best_method == "Blend":
            total_tweedie = max(model_total.predict(X_temp)[0], 1)
            total_pred = 0.6 * total_actuarial + 0.4 * total_tweedie
        else:
            total_pred = total_actuarial
    else:
        total_pred = total_actuarial

    new_row["frequency"] = freq_pred
    new_row["severity"] = sev_pred
    new_row["total_claim"] = total_pred

    future_df = pd.concat(
        [future_df, pd.DataFrame([new_row])],
        ignore_index=True
    )

    # Save using correct format YYYY_MM
    predictions[f"{m}_Claim_Frequency"] = freq_pred
    predictions[f"{m}_Claim_Severity"]  = sev_pred
    predictions[f"{m}_Total_Claim"]     = total_pred

# ============================================================
# BUILD FINAL SUBMISSION
# ============================================================

submission = sample_sub.copy()
submission["value"] = submission["id"].map(predictions)

missing = submission["value"].isna().sum()

if missing > 0:
    print("ERROR: Ada ID tidak cocok sebanyak:", missing)
else:
    print("All IDs matched perfectly.")

submission = submission[["id", "value"]]
submission.to_csv("submission.csv", index=False)

print("\nSubmission file created successfully.")
print(submission.head(9))


All IDs matched perfectly.

Submission file created successfully.
                        id         value
0  2025_08_Claim_Frequency  2.372802e+02
1   2025_08_Claim_Severity  6.281779e+07
2      2025_08_Total_Claim  1.287515e+10
3  2025_09_Claim_Frequency  2.343247e+02
4   2025_09_Claim_Severity  5.966099e+07
5      2025_09_Total_Claim  1.141423e+10
6  2025_10_Claim_Frequency  2.398048e+02
7   2025_10_Claim_Severity  4.904400e+07
8      2025_10_Total_Claim  1.273590e+10
