In [3]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# ----------------------
# Feature engineering
# ----------------------
dir_map = {"U": (0, 1), "D": (0, -1), "L": (-1, 0), "R": (1, 0)}
wind_map = {"N": (0, 1), "S": (0, -1), "E": (1, 0), "W": (-1, 0),
            "NE": (1, 1), "NW": (-1, 1), "SE": (1, -1), "SW": (-1, -1)}

def parse_wind(token):
    d, sg = token.split("-")
    s, g = sg.split("/")
    return d, int(s), int(g)

def route_features(route):
    steps = [dir_map[c] for c in route]
    xs, ys = zip(*steps)
    dx, dy = sum(xs), sum(ys)
    counts = {d: route.count(d) for d in "UDLR"}
    total = len(route)
    turns = sum(route[i] != route[i-1] for i in range(1, total))
    return {
        "route_len": total,
        "turns": turns,
        "turn_density": turns / total,
        "U": counts["U"],
        "D": counts["D"],
        "L": counts["L"],
        "R": counts["R"],
        "dx": dx,
        "dy": dy,
        "manhattan": abs(dx) + abs(dy),
    }

def alt_features(alt):
    a = np.array(list(map(int, alt.split("-"))))
    diff = np.diff(a)
    return {
        "alt_mean": a.mean(),
        "alt_max": a.max(),
        "alt_min": a.min(),
        "climb_total": diff[diff > 0].sum() if np.any(diff > 0) else 0,
        "descent_total": -diff[diff < 0].sum() if np.any(diff < 0) else 0,
    }

def build(df):
    df = df.copy()
    wind = df["wind_token"].apply(parse_wind)
    df["wind_dir"] = wind.apply(lambda x: x[0])
    df["wind_speed"] = wind.apply(lambda x: x[1])
    df["wind_gust"] = wind.apply(lambda x: x[2])
    df["gust_ratio"] = df["wind_gust"] / (df["wind_speed"] + 1)
    route_df = df["route_turns"].apply(route_features).apply(pd.Series)
    alt_df = df["alt_profile"].apply(alt_features).apply(pd.Series)
    df = pd.concat([df, route_df, alt_df], axis=1)
    df["slot_sin"] = np.sin(2 * np.pi * df["slot_15min"] / 96)
    df["slot_cos"] = np.cos(2 * np.pi * df["slot_15min"] / 96)
    df = df.drop(columns=["wind_token", "route_turns", "alt_profile"])
    return df

train = build(train)
test = build(test)

# ----------------------
# Modeling
# ----------------------
target = "on_time"
features = [c for c in train.columns if c not in ["Id", target]]
cat_features = ["drone_model", "wind_dir", "landing_zone", "operator_tag"]

X = train[features]
y = train[target]
X_test = test[features]

# Stratified 5-fold (CPU-friendly)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
test_preds = np.zeros(len(test))

for tr, va in skf.split(X, y):
    model = CatBoostClassifier(
        iterations=1000,     # reduced from 2500
        learning_rate=0.02,
        depth=8,
        l2_leaf_reg=5,
        loss_function="Logloss",
        eval_metric="Logloss",
        random_seed=42,
        task_type="CPU",
        verbose=100
    )
    model.fit(X.iloc[tr], y.iloc[tr], cat_features=cat_features)
    test_preds += model.predict_proba(X_test)[:, 1] / skf.n_splits

# Clip probabilities (log-loss safe)
test_preds = np.clip(test_preds, 0.01, 0.99)

submission = pd.DataFrame({
    "Id": test["Id"],
    "on_time": test_preds
})
submission.to_csv("submission.csv", index=False)
print("Submission saved!")


0:	learn: 0.6783180	total: 222ms	remaining: 3m 41s
100:	learn: 0.3779122	total: 23.9s	remaining: 3m 32s
200:	learn: 0.3242162	total: 47.5s	remaining: 3m 8s
300:	learn: 0.2663712	total: 1m 11s	remaining: 2m 46s
400:	learn: 0.2283693	total: 1m 35s	remaining: 2m 23s
500:	learn: 0.2027156	total: 2m	remaining: 1m 59s
600:	learn: 0.1857779	total: 2m 24s	remaining: 1m 36s
700:	learn: 0.1707127	total: 2m 49s	remaining: 1m 12s
800:	learn: 0.1582255	total: 3m 13s	remaining: 48.1s
900:	learn: 0.1489664	total: 3m 38s	remaining: 24s
999:	learn: 0.1399012	total: 4m 2s	remaining: 0us
0:	learn: 0.6778483	total: 229ms	remaining: 3m 48s
100:	learn: 0.3763529	total: 24.7s	remaining: 3m 39s
200:	learn: 0.3263521	total: 49.2s	remaining: 3m 15s
300:	learn: 0.2685913	total: 1m 14s	remaining: 2m 52s
400:	learn: 0.2350214	total: 1m 38s	remaining: 2m 27s
500:	learn: 0.2059553	total: 2m 2s	remaining: 2m 2s
600:	learn: 0.1873113	total: 2m 27s	remaining: 1m 37s
700:	learn: 0.1726839	total: 2m 52s	remaining: 1m 13s