In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from catboost import CatBoostClassifier


In [2]:
train = pd.read_csv("/kaggle/input/arisdata/train_processed.csv")
test  = pd.read_csv("/kaggle/input/arisdata/test_processed.csv")

y = train["RiskFlag"].astype(int)
X = train.drop(columns=["RiskFlag", "ProfileID"])
test_X = test.drop(columns=["ProfileID"])

print(X.shape, y.mean())


(204277, 25) 0.1162783867004117


In [3]:
# imbalance ratio
pos = y.sum()
neg = len(y) - pos
w_pos = neg / pos

print("Class weight:", w_pos)


Class weight: 7.6000505199343245


In [4]:
params = {
    "iterations": 1800,
    "depth": 9,
    "learning_rate": 0.022,
    "l2_leaf_reg": 6,
    "border_count": 254,
    "loss_function": "Logloss",
    "eval_metric": "F1",
    "class_weights": [1.0, w_pos],
    "random_seed": 42,
    "verbose": 200,
    "thread_count": -1
}


In [5]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

oof_proba = np.zeros(len(X))
models = []

for tr, val in kf.split(X, y):

    model = CatBoostClassifier(**params)
    model.fit(
        X.iloc[tr], y.iloc[tr],
        eval_set=(X.iloc[val], y.iloc[val]),
        verbose=False
    )

    oof_proba[val] = model.predict_proba(X.iloc[val])[:, 1]
    models.append(model)

print("OOF done")


OOF done


In [6]:
best_f1 = 0
best_t = 0.5

for t in np.arange(0.05, 0.95, 0.01):
    preds = (oof_proba > t).astype(int)
    f1 = f1_score(y, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_t = t

print("Best threshold:", best_t)
print("Best OOF F1:", best_f1)


Best threshold: 0.6000000000000002
Best OOF F1: 0.3627318240498756


In [7]:
test_proba = np.mean([m.predict_proba(test_X)[:, 1] for m in models], axis=0)
test_preds = (test_proba > best_t).astype(int)

submission = pd.DataFrame({
    "ProfileID": test["ProfileID"],
    "RiskFlag": test_preds
})

submission.to_csv("/kaggle/working/submission.csv", index=False)
print("Saved submission.csv")


Saved submission.csv
