In [None]:
import pandas as pd
from src.utils.io import load_titanic
from src.preprocessing.features import add_feature_engineering
from src.modeling.pipeline import build_full_pipeline
from src.modeling.tuning import build_grid_search
from src.modeling.evaluation import plot_roc, plot_confusion

# 1. Load data
df = load_titanic("../data/raw/titanic.csv")

# 2. Drop useless columns
df = df.drop(columns=["Cabin", "Ticket"])

# 3. Feature engineering
df = add_feature_engineering(df)

# 4. Split
X = df.drop(columns=["Survived", "Name", "PassengerId"])
y = df["Survived"]

# 5. Build pipeline
pipe = build_full_pipeline()

# 6. GridSearchCV
grid = build_grid_search(pipe)
grid.fit(X, y)

print("BEST PARAMS:", grid.best_params_)
print("BEST SCORE (ROC-AUC):", grid.best_score_)

# 7. Evaluation on CV best model
best = grid.best_estimator_
y_pred = best.predict(X)
y_proba = best.predict_proba(X)[:, 1]

plot_confusion(y, y_pred)
plot_roc(y, y_proba)