In [1]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb

# ----------------------------
# Load data (auto-detect)
# ----------------------------
ROOT = "/kaggle/input/competitions"

train_path, test_path = None, None

for dirpath, dirnames, filenames in os.walk(ROOT):
    if "train.csv" in filenames and "test.csv" in filenames:
        train_path = os.path.join(dirpath, "train.csv")
        test_path  = os.path.join(dirpath, "test.csv")
        break

train = pd.read_csv(train_path)
test  = pd.read_csv(test_path)

ID_COL = "event_id"
TIME_COL = "time_to_hit_hours"
EVENT_COL = "event"

FEATURES = [c for c in train.columns if c not in [ID_COL, TIME_COL, EVENT_COL]]

X_train = train[FEATURES].copy()
X_test  = test[FEATURES].copy()

# Simple encoding
X_all = pd.concat([X_train, X_test])
X_all = pd.get_dummies(X_all, dummy_na=True)
X_all = X_all.fillna(X_all.median(numeric_only=True))

X_train = X_all.iloc[:len(train)]
X_test  = X_all.iloc[len(train):]

# ----------------------------
# Train 4 binary models
# ----------------------------
HORIZONS = [12, 24, 48, 72]
probs = np.zeros((len(test), len(HORIZONS)))

for i, h in enumerate(HORIZONS):
    print(f"Training model for {h}h...")
    
    y = ((train[EVENT_COL] == 1) & (train[TIME_COL] <= h)).astype(int)
    
    model = xgb.XGBClassifier(
        n_estimators=200,
        max_depth=3,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric="logloss"
    )
    
    model.fit(X_train, y)
    probs[:, i] = model.predict_proba(X_test)[:, 1]

# Enforce monotonicity
probs = np.maximum.accumulate(probs, axis=1)
probs = np.clip(probs, 0, 1)

# ----------------------------
# Build CSV
# ----------------------------
sub = pd.DataFrame({
    "event_id": test[ID_COL],
    "prob_12h": probs[:,0],
    "prob_24h": probs[:,1],
    "prob_48h": probs[:,2],
    "prob_72h": probs[:,3],
})

sub.to_csv("submission.csv", index=False)

print("\nDone. Preview:")
display(sub.head(20))

Training model for 12h...
Training model for 24h...
Training model for 48h...
Training model for 72h...

Done. Preview:


Unnamed: 0,event_id,prob_12h,prob_24h,prob_48h,prob_72h
0,10662602,0.004193,0.007442,0.007442,0.007853
1,13353600,0.465454,0.985049,0.987638,0.987638
2,13942327,0.0027,0.004718,0.004718,0.007946
3,16112781,0.831618,0.950733,0.98485,0.98485
4,17132808,0.011127,0.011127,0.011166,0.011166
5,17445696,0.007191,0.007191,0.007191,0.007191
6,17599982,0.005685,0.0072,0.0072,0.007508
7,18750374,0.453159,0.930247,0.959829,0.976025
8,21365245,0.004472,0.004472,0.00578,0.005959
9,23634840,0.530814,0.968994,0.968994,0.977476
