In [1]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
import random

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# Feature engineering
def parse_route_turns(x):
    if not isinstance(x, str):
        return 0
    return sum(c in "LR" for c in x.upper())

def parse_alt_changes(x):
    try:
        parts = [int(p) for p in str(x).split('-')]
        return sum(abs(parts[i+1] - parts[i]) for i in range(len(parts)-1))
    except:
        return np.nan

train['num_turns'] = train['route_turns'].apply(parse_route_turns)
test['num_turns'] = test['route_turns'].apply(parse_route_turns)

train['alt_changes'] = train['alt_profile'].apply(parse_alt_changes)
test['alt_changes'] = test['alt_profile'].apply(parse_alt_changes)

alt_median = train['alt_changes'].median()
train['alt_changes'].fillna(alt_median, inplace=True)
test['alt_changes'].fillna(alt_median, inplace=True)

def extract_wind_features(df):
    def _parse(token):
        try:
            direction, speeds = token.split('-')
            avg, gust = speeds.split('/')
            return direction, int(avg), int(gust)
        except:
            return 'UNK', 0, 0
    parsed = df['wind_token'].apply(_parse)
    df['wind_direction'] = parsed.apply(lambda t: t[0])
    df['wind_avg_speed'] = parsed.apply(lambda t: t[1])
    df['wind_gust'] = parsed.apply(lambda t: t[2])
    return df

train = extract_wind_features(train)
test = extract_wind_features(test)

# Define features
X = train[["distance_km", "payload_kg", "num_turns", "alt_changes",
           "drone_model", "slot_15min", "operator_tag",
           "wind_direction", "wind_avg_speed", "wind_gust", "landing_zone"]]
y = train["on_time"]
X_test = test[X.columns]

# Identify categorical features for CatBoost
cat_features = ["drone_model", "slot_15min", "operator_tag", "wind_direction", "landing_zone"]

# Train-validation split
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y)

# CatBoost model
model = CatBoostClassifier(
    iterations=2000,
    learning_rate=0.05,
    depth=6,
    eval_metric="Logloss",
    random_seed=SEED,
    verbose=100
)

# Fit model
model.fit(X_tr, y_tr, cat_features=cat_features, eval_set=(X_val, y_val), early_stopping_rounds=100)

# Predict probabilities for test
test_proba = model.predict_proba(X_test)[:, 1]

# Prepare submission
submission = pd.DataFrame({
    "Id": test["Id"],
    "on_time": test_proba
})

submission.to_csv("submission.csv", index=False)


ModuleNotFoundError: No module named 'catboost'

In [None]:
!pip install catboost