In [None]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

def parse_wind(token):
    d, sg = token.split("-")
    s, g = sg.split("/")
    return d, int(s), int(g)

dir_map = {"U": (0, 1), "D": (0, -1), "L": (-1, 0), "R": (1, 0)}
wind_map = {"N": (0, 1), "S": (0, -1), "E": (1, 0), "W": (-1, 0),
            "NE": (1, 1), "NW": (-1, 1), "SE": (1, -1), "SW": (-1, -1)}

def route_features(route):
    steps = [dir_map[c] for c in route]
    xs, ys = zip(*steps)
    dx, dy = sum(xs), sum(ys)

    counts = {d: route.count(d) for d in "UDLR"}
    total = len(route)
    probs = np.array(list(counts.values())) / total
    entropy = -np.sum(probs * np.log(probs + 1e-9))

    turns = sum(route[i] != route[i-1] for i in range(1, total))

    return {
        "route_len": total,
        "turns": turns,
        "turn_density": turns / total,
        "U": counts["U"],
        "D": counts["D"],
        "L": counts["L"],
        "R": counts["R"],
        "dx": dx,
        "dy": dy,
        "manhattan": abs(dx) + abs(dy),
        "zigzag": turns / (abs(dx) + abs(dy) + 1),
        "route_entropy": entropy
    }

def alt_features(alt):
    a = np.array(list(map(int, alt.split("-"))))
    diff = np.diff(a)

    return {
        "alt_mean": a.mean(),
        "alt_max": a.max(),
        "alt_min": a.min(),
        "alt_var": a.var(),
        "climb_total": diff[diff > 0].sum() if np.any(diff > 0) else 0,
        "descent_total": -diff[diff < 0].sum() if np.any(diff < 0) else 0,
        "energy": np.sum(np.abs(diff)),
        "slope_var": np.var(diff)
    }

def build(df):
    df = df.copy()

    wind = df["wind_token"].apply(parse_wind)
    df["wind_dir"] = wind.apply(lambda x: x[0])
    df["wind_speed"] = wind.apply(lambda x: x[1])
    df["wind_gust"] = wind.apply(lambda x: x[2])
    df["gust_ratio"] = df["wind_gust"] / (df["wind_speed"] + 1)

    route_df = df["route_turns"].apply(route_features).apply(pd.Series)
    alt_df = df["alt_profile"].apply(alt_features).apply(pd.Series)

    df = pd.concat([df, route_df, alt_df], axis=1)

    def wind_align(row):
        wx, wy = wind_map.get(row["wind_dir"], (0, 0))
        return (row["dx"] * wx + row["dy"] * wy) / (row["manhattan"] + 1)

    df["wind_alignment"] = df.apply(wind_align, axis=1)

    df["slot_sin"] = np.sin(2 * np.pi * df["slot_15min"] / 96)
    df["slot_cos"] = np.cos(2 * np.pi * df["slot_15min"] / 96)

    df = df.drop(columns=["wind_token", "route_turns", "alt_profile"])

    return df

train = build(train)
test = build(test)

target = "on_time"
features = [c for c in train.columns if c not in ["Id", target]]

cat_features = [
    "drone_model",
    "wind_dir",
    "landing_zone",
    "operator_tag"
]

X_test = test[features]
final_preds = np.zeros(len(test))

for drone in train["drone_model"].unique():
    tr_idx = train["drone_model"] == drone
    te_idx = test["drone_model"] == drone

    X = train.loc[tr_idx, features]
    y = train.loc[tr_idx, target]

    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    preds = np.zeros(len(test.loc[te_idx]))

    for tr, va in skf.split(X, y):
        model = CatBoostClassifier(
            iterations=2500,
            learning_rate=0.015,
            depth=9,
            l2_leaf_reg=7,
            loss_function="Logloss",
            eval_metric="Logloss",
            random_seed=42,
            verbose=False
        )

        model.fit(X.iloc[tr], y.iloc[tr], cat_features=cat_features)
        preds += model.predict_proba(X_test.loc[te_idx])[:, 1] / skf.n_splits

    final_preds[te_idx] = preds

final_preds = np.clip(final_preds, 0.01, 0.99)

submission = pd.DataFrame({
    "Id": test["Id"],
    "on_time": final_preds
})

submission.to_csv("submission2.csv", index=False)
