In [2]:
import sys 
import os


import random
import logging
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.auto import tqdm
from sklearn import set_config

from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
 PROJECT_ROOT = Path("..").resolve()


In [6]:
def process_pipeline():
   
    DATA_DIR = PROJECT_ROOT / "data" / "raw"
    train_path = DATA_DIR / "train.csv"
    test_path = DATA_DIR / "test.csv"

    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)
    X_test = test_df.drop(columns=["id"])

    train_df["Heart Disease"]=train_df["Heart Disease"].map({"Presence":1,"Absence":0})
    train_df = train_df.drop(columns=["id"])

    X=train_df.iloc[:,0:-1]
    Y=train_df.iloc[:,-1]

    return X,Y,X_test

In [4]:
X,Y,X_test = process_pipeline()

## SUBMISSION PIPELINE

In [None]:
def submission_pipeline(test_preds,name):

    DATA_DIR = PROJECT_ROOT / "data" / "raw"
    submission_path= PROJECT_ROOT / "data" / "raw" / "sample_submission.csv"

    
    submission_df = pd.read_csv(submission_path) 
    submission_df["Heart Disease"] = test_preds
    Pname = PROJECT_ROOT / "submission_csvs" / f"{name}_submission.csv"
    submission_df.to_csv(Pname, index=False)

    print("Submission saved: " + name)

   

In [17]:
def get_cv(N_SPLITS=5):
    if USE_REPEATED:
        return RepeatedStratifiedKFold(
            n_splits=N_SPLITS,
            n_repeats=N_REPEATS,
            random_state=SEED
        )
    else:
        return StratifiedKFold(
            n_splits=N_SPLITS,
            shuffle=True,
            random_state=SEED
        )

In [22]:
def get_model(params):
    if MODEL_TYPE == "lgb":
        return lgb.LGBMClassifier(**params)

    elif MODEL_TYPE == "xgb":
        return xgb.XGBClassifier(**params)

    elif MODEL_TYPE == "cat":
        return CatBoostClassifier(**params)

In [18]:
def train_fold_lgb(params, X_train, y_train, X_val, y_val, trial=None, fold=None):
    """Train one LGB fold using native API for proper early stopping + pruning."""
    dtrain = lgb.Dataset(X_train, label=y_train)
    dval   = lgb.Dataset(X_val,   label=y_val, reference=dtrain)

    callbacks = [
        lgb.early_stopping(EARLY_STOPPING, verbose=False),
        lgb.log_evaluation(-1),
    ]
    if trial is not None:
        callbacks.append(
            optuna.integration.LightGBMPruningCallback(trial, "auc")
        )

    booster = lgb.train(
        params,
        dtrain,
        num_boost_round=N_BOOST_ROUNDS,
        valid_sets=[dval],
        callbacks=callbacks,
    )
    preds = booster.predict(X_val)
    return roc_auc_score(y_val, preds), booster

In [19]:
def train_fold_xgb(params, X_train, y_train, X_val, y_val, trial=None, fold=None):
    callbacks = []
    if trial is not None:
        callbacks.append(
            optuna.integration.XGBoostPruningCallback(trial, "validation_0-auc")
        )

    model = xgb.XGBClassifier(
        **params,
        n_estimators=N_BOOST_ROUNDS,
        early_stopping_rounds=EARLY_STOPPING,
        callbacks=callbacks if callbacks else None,
    )
    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False,
    )
    preds = model.predict_proba(X_val)[:, 1]
    return roc_auc_score(y_val, preds), model

In [20]:
def train_fold_cat(params, X_train, y_train, X_val, y_val, trial=None, fold=None):
    model = CatBoostClassifier(
        **params,
        iterations=N_BOOST_ROUNDS,
        early_stopping_rounds=EARLY_STOPPING,
    )
    model.fit(
        X_train, y_train,
        eval_set=(X_val, y_val),
        verbose=False,
    )
    preds = model.predict_proba(X_val)[:, 1]
    return roc_auc_score(y_val, preds), model


In [21]:
TRAIN_FN = {
    "lgb": train_fold_lgb,
    "xgb": train_fold_xgb,
    "cat": train_fold_cat,
}

In [None]:
def make_experiment_dir(name):
    SAVE_DIR = f"saved_models_{name}_norepeated"
    os.makedirs(SAVE_DIR, exist_ok=True)


In [None]:
# ==============================
# TRAIN FINAL MODELS + OOF
# ==============================
cv = get_cv()
dir_name=input("Enter Experiment Name")
make_experiment_dir(dir_name)
MODEL_TYPE = input("Enter Model Type")   # "lgb", "xgb", "cat"

oof_preds  = np.zeros(len(X))
test_preds = np.zeros(len(X_test))

for fold, (train_idx, val_idx) in enumerate(
    tqdm(cv.split(X, Y), total=cv.get_n_splits(X, Y))
):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = Y.iloc[train_idx], Y.iloc[val_idx]

    # âœ… Use native API with early stopping (no pruning in final run)
    auc, model = TRAIN_FN[MODEL_TYPE](
        best_params, X_train, y_train, X_val, y_val,
        trial=None, fold=fold
    )

    # OOF
    oof_preds[val_idx] = model.predict(X_val)          # lgb.Booster uses .predict()

    # Test prediction averaging
    test_preds += model.predict(X_test) / cv.get_n_splits()

    # Save fold model
    joblib.dump(model, f"{SAVE_DIR}/{MODEL_TYPE}_fold_{fold}.pkl")
    print(f"  Fold {fold}: AUC = {auc:.5f}")

# Save OOF & Test preds
np.save(f"{SAVE_DIR}/oof_preds_{MODEL_TYPE}.npy", oof_preds)
np.save(f"{SAVE_DIR}/test_preds_{MODEL_TYPE}.npy", test_preds)

print(f"\nOOF AUC: {roc_auc_score(Y, oof_preds):.5f}")
print("Saved OOF + Test predictions")

In [27]:
from sklearn.metrics import (
    roc_auc_score,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)
def evaluate_model(y_true, y_prob, threshold=0.5):
    
    y_pred = (y_prob >= threshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    metrics = {
        "auc": roc_auc_score(y_true, y_prob),
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred),
        "tn": tn,
        "fp": fp,
        "fn": fn,
        "tp": tp
    }

    return metrics

## XGB+LIGHT+CAT TUNED MANUAL STACK - HISTGRADIENT

In [26]:
xgb_oof=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "oof_preds_xgb.npy")
xgb_test=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "test_preds_xgb.npy")

cat_oof=np.load(PROJECT_ROOT / "models" /"CatBoost" /"oof_preds_cat.npy")
cat_test=np.load(PROJECT_ROOT / "models" / "CatBoost" /"test_preds_cat.npy")

lgb_oof=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "oof_preds_lgb.npy")
lgb_test=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "test_preds_lgb.npy")


In [28]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import roc_auc_score

# Build OOF matrix (add hgb if you have it)
oof_matrix = np.column_stack([
    lgb_oof,
    xgb_oof,
    cat_oof
])

test_matrix = np.column_stack([
    lgb_test,
    xgb_test,
    cat_test
])

# =========================
# Meta Model
# =========================
meta_model = HistGradientBoostingClassifier(
    learning_rate=0.05,
    max_iter=500,
    max_depth=3,
    max_leaf_nodes=15,
    l2_regularization=0.5,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=20,
    random_state=42
)

meta_model.fit(oof_matrix, Y)

# OOF stacked score
stacked_oof = meta_model.predict_proba(oof_matrix)[:, 1]
print("Stacked OOF AUC:", roc_auc_score(Y, stacked_oof))

# Final test prediction
final_test_preds = meta_model.predict_proba(test_matrix)[:, 1]


Stacked OOF AUC: 0.9556034114701177


In [30]:
submission_pipeline(final_test_preds,"stacked_submission-base-xg-light-cat-meta-hist.csv")

Submission saved: stacked_submission-base-xg-light-cat-meta-hist.csv


## XGB+LIGHT+CAT TUNED MANUAL STACK - Logistic Regression Meta Model

In [32]:
xgb_oof=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "oof_preds_xgb.npy")
xgb_test=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "test_preds_xgb.npy")

cat_oof=np.load(PROJECT_ROOT / "models" /"CatBoost" /"oof_preds_cat.npy")
cat_test=np.load(PROJECT_ROOT / "models" / "CatBoost" /"test_preds_cat.npy")

lgb_oof=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "oof_preds_lgb.npy")
lgb_test=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "test_preds_lgb.npy")


In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
oof_matrix = np.column_stack([
    lgb_oof,
    xgb_oof,
    cat_oof
])

test_matrix = np.column_stack([
    lgb_test,
    xgb_test,
    cat_test
])
meta_model = LogisticRegression(    
    solver="lbfgs",
    max_iter=2000,
    random_state=42)

meta_model.fit(oof_matrix, Y)

final_test_preds = meta_model.predict_proba(test_matrix)[:,1]

print("Stacked OOF AUC:", roc_auc_score(Y, meta_model.predict_proba(oof_matrix)[:,1]))


Stacked OOF AUC: 0.9554912964950262


In [34]:
submission_pipeline(final_test_preds,"stacked_submission-base-xg-light-cat-meta-logistic.csv")

Submission saved: stacked_submission-base-xg-light-cat-meta-logistic.csv


## XGB+LIGHT+CAT TUNED MANUAL STACK - Ridge Regression Meta Model

In [35]:
xgb_oof=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "oof_preds_xgb.npy")
xgb_test=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "test_preds_xgb.npy")

cat_oof=np.load(PROJECT_ROOT / "models" /"CatBoost" /"oof_preds_cat.npy")
cat_test=np.load(PROJECT_ROOT / "models" / "CatBoost" /"test_preds_cat.npy")

lgb_oof=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "oof_preds_lgb.npy")
lgb_test=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "test_preds_lgb.npy")


In [40]:
from sklearn.linear_model import Ridge

oof_matrix = np.column_stack([
    lgb_oof,
    xgb_oof,
    cat_oof
])

test_matrix = np.column_stack([
    lgb_test,
    xgb_test,
    cat_test
])

meta_model = Ridge(alpha=1)
meta_model.fit(oof_matrix, Y)

final_test_preds = meta_model.predict(test_matrix)
print("Stacked OOF AUC:", roc_auc_score(Y, meta_model.predict(oof_matrix)))

Stacked OOF AUC: 0.9555257956633003


## XGB+LIGHT+CAT TUNED MANUAL STACK - LightGBM  Meta Model

In [45]:
import json 
# json.load(PROJECT_ROOT / "models" / "lightGBM" / "best_params_lgb.json")
lgbm_params=json.load(open(PROJECT_ROOT / "models" / "lightGBM" / "best_params_lgb.json"))

In [46]:
lgbm_params

{'learning_rate': 0.09166541113559948,
 'num_leaves': 91,
 'max_depth': 7,
 'min_child_samples': 48,
 'min_child_weight': 0.006701718272616044,
 'subsample': 0.7332400511168536,
 'colsample_bytree': 0.6022071194477242,
 'bagging_freq': 4,
 'reg_alpha': 4.905395628360497,
 'reg_lambda': 6.096094150301336,
 'min_split_gain': 0.8430984187798867,
 'max_bin': 177,
 'scale_pos_weight': 0.8990221090900653}

In [41]:
xgb_oof=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "oof_preds_xgb.npy")
xgb_test=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "test_preds_xgb.npy")

cat_oof=np.load(PROJECT_ROOT / "models" /"CatBoost" /"oof_preds_cat.npy")
cat_test=np.load(PROJECT_ROOT / "models" / "CatBoost" /"test_preds_cat.npy")

lgb_oof=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "oof_preds_lgb.npy")
lgb_test=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "test_preds_lgb.npy")


In [49]:
from lightgbm import LGBMClassifier


oof_matrix = np.column_stack([
    lgb_oof,
    xgb_oof,
    cat_oof
])

test_matrix = np.column_stack([
    lgb_test,
    xgb_test,
    cat_test
])


meta_model = LGBMClassifier(
  **lgbm_params,
  n_estimators=2000,
    random_state=42,
    verbosity=-1
)

meta_model.fit(oof_matrix, Y)

final_test_preds = meta_model.predict_proba(test_matrix)[:,1]
print("Stacked OOF AUC:", roc_auc_score(Y, meta_model.predict_proba(oof_matrix)[:,1]))



Stacked OOF AUC: 0.9556234473720697


In [50]:
submission_pipeline(final_test_preds,"stacked_submission-base-xg-light-cat-meta-lightGBM.csv")

Submission saved: stacked_submission-base-xg-light-cat-meta-lightGBM.csv


## XGB+LIGHT+CAT TUNED MANUAL STACK - CATBOOST  Meta Model

In [53]:
import json 
# json.load(PROJECT_ROOT / "models" / "lightGBM" / "best_params_lgb.json")
cat_params=json.load(open(PROJECT_ROOT / "models" / "CatBoost" / "best_params_cat.json"))

In [54]:
cat_params

{'iterations': 1929,
 'learning_rate': 0.08881567369228521,
 'depth': 4,
 'l2_leaf_reg': 19.703757801579894,
 'border_count': 225,
 'bagging_temperature': 0.17643102746782444,
 'random_strength': 0.11651908368310085,
 'scale_pos_weight': 0.9959285437346185}

In [51]:
xgb_oof=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "oof_preds_xgb.npy")
xgb_test=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "test_preds_xgb.npy")

cat_oof=np.load(PROJECT_ROOT / "models" /"CatBoost" /"oof_preds_cat.npy")
cat_test=np.load(PROJECT_ROOT / "models" / "CatBoost" /"test_preds_cat.npy")

lgb_oof=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "oof_preds_lgb.npy")
lgb_test=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "test_preds_lgb.npy")


In [58]:
from catboost import CatBoostClassifier


oof_matrix = np.column_stack([
    lgb_oof,
    xgb_oof,
    cat_oof
])

test_matrix = np.column_stack([
    lgb_test,
    xgb_test,
    cat_test
])


meta_model = CatBoostClassifier(
  **cat_params,
    random_state=42,
    verbose=0
)

meta_model.fit(oof_matrix, Y)

final_test_preds = meta_model.predict_proba(test_matrix)[:,1]
print("Stacked OOF AUC:", roc_auc_score(Y, meta_model.predict_proba(oof_matrix)[:,1]))

Stacked OOF AUC: 0.955780639333224


In [59]:
submission_pipeline(final_test_preds,"stacked_submission-base-xg-light-cat-meta-catBoost.csv")

Submission saved: stacked_submission-base-xg-light-cat-meta-catBoost.csv


 ## XGB+LIGHT+CAT TUNED MANUAL 2 LEVEL STACK - HIST,HIST,HIST  BASE-Meta Model - CAT- META

In [63]:
import json 
# json.load(PROJECT_ROOT / "models" / "lightGBM" / "best_params_lgb.json")
cat_params=json.load(open(PROJECT_ROOT / "models" / "CatBoost" / "best_params_cat.json"))

In [62]:
xgb_oof=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "oof_preds_xgb.npy")
xgb_test=np.load(PROJECT_ROOT / "models" /"XgBoost"/ "test_preds_xgb.npy")

cat_oof=np.load(PROJECT_ROOT / "models" /"CatBoost" /"oof_preds_cat.npy")
cat_test=np.load(PROJECT_ROOT / "models" / "CatBoost" /"test_preds_cat.npy")

lgb_oof=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "oof_preds_lgb.npy")
lgb_test=np.load(PROJECT_ROOT / "models" /"lightGBM"/ "test_preds_lgb.npy")


In [65]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import roc_auc_score

# Build OOF matrix (add hgb if you have it)
oof_matrix = np.column_stack([
    lgb_oof,
    xgb_oof,
    cat_oof
])

test_matrix = np.column_stack([
    lgb_test,
    xgb_test,
    cat_test
])

# =========================
# BASE -Meta Model
# =========================
meta_model = HistGradientBoostingClassifier(
    learning_rate=0.05,
    max_iter=1000,
    max_depth=3,
    max_leaf_nodes=15,
    l2_regularization=0.5,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=20,
    random_state=42
)

meta_model.fit(oof_matrix, Y)

# OOF stacked score
stacked_oof_hist1 = meta_model.predict_proba(oof_matrix)[:, 1]
test_preds_hist1 = meta_model.predict_proba(test_matrix)[:, 1]
print("Stacked OOF AUC:", roc_auc_score(Y, stacked_oof_hist1))





# =========================
# BASE - Meta Model
# =========================
meta_model = HistGradientBoostingClassifier(
    learning_rate=0.03,
    max_iter=1000,
    max_depth=3,
    max_leaf_nodes=15,
    l2_regularization=0.5,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=20,
    random_state=1299
)

meta_model.fit(oof_matrix, Y)

# OOF stacked score
stacked_oof_hist2 = meta_model.predict_proba(oof_matrix)[:, 1]
test_preds_hist2 = meta_model.predict_proba(test_matrix)[:, 1]
print("Stacked OOF AUC:", roc_auc_score(Y, stacked_oof_hist2))



# =========================
# BASE - Meta Model
# =========================
meta_model = HistGradientBoostingClassifier(
    learning_rate=0.1,
    max_iter=1000,
    max_depth=3,
    max_leaf_nodes=15,
    l2_regularization=0.5,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=20,
    random_state=340
)

meta_model.fit(oof_matrix, Y)

# OOF stacked score
stacked_oof_hist3 = meta_model.predict_proba(oof_matrix)[:, 1]
test_preds_hist3 = meta_model.predict_proba(test_matrix)[:, 1]
print("Stacked OOF AUC:", roc_auc_score(Y, stacked_oof_hist3))



Stacked OOF AUC: 0.9556034114701177
Stacked OOF AUC: 0.9555961234705603
Stacked OOF AUC: 0.955599126712285


In [66]:
from catboost import CatBoostClassifier


oof_matrix = np.column_stack([
    stacked_oof_hist1,
    stacked_oof_hist2,
    stacked_oof_hist3,
])

test_matrix = np.column_stack([
    test_preds_hist1,
    test_preds_hist2,
    test_preds_hist3
])


meta_model = CatBoostClassifier(
  **cat_params,
    random_state=42,
    verbose=0
)

meta_model.fit(oof_matrix, Y)

final_test_preds = meta_model.predict_proba(test_matrix)[:,1]
print("Stacked OOF AUC:", roc_auc_score(Y, meta_model.predict_proba(oof_matrix)[:,1]))

Stacked OOF AUC: 0.9558778364180902


In [67]:
submission_pipeline(final_test_preds,"stacked_submission-base-xg-light-cat-basemeta3-hist-meta-cat.csv")

Submission saved: stacked_submission-base-xg-light-cat-basemeta3-hist-meta-cat.csv


##  XGB+LIGHT+CAT TUNED VOTING CLASSIFIER - CATBOOST  Meta Model

In [80]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np

class SklearnCompatCatBoost(BaseEstimator, ClassifierMixin):
    def __init__(self, **kwargs):
        self.kwargs = kwargs
        self._model = CatBoostClassifier(**kwargs)
    
    def fit(self, X, y, **fit_params):
        self._model.fit(X, y, **fit_params)
        self.classes_ = np.unique(y)
        return self
    
    def predict(self, X):
        return self._model.predict(X)
    
    def predict_proba(self, X):
        return self._model.predict_proba(X)

In [82]:
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.ensemble import StackingClassifier

In [83]:
cat_params=json.load(open(PROJECT_ROOT / "models" / "CatBoost" / "best_params_cat.json"))
lgb_params=json.load(open(PROJECT_ROOT / "models" / "lightGBM" / "best_params_lgb.json"))
xgb_params=json.load(open(PROJECT_ROOT / "models" / "XgBoost" / "best_params_xgb.json"))

In [84]:
print(cat_params)
print(lgb_params)
print(xgb_params)

{'iterations': 1929, 'learning_rate': 0.08881567369228521, 'depth': 4, 'l2_leaf_reg': 19.703757801579894, 'border_count': 225, 'bagging_temperature': 0.17643102746782444, 'random_strength': 0.11651908368310085, 'scale_pos_weight': 0.9959285437346185}
{'learning_rate': 0.09166541113559948, 'num_leaves': 91, 'max_depth': 7, 'min_child_samples': 48, 'min_child_weight': 0.006701718272616044, 'subsample': 0.7332400511168536, 'colsample_bytree': 0.6022071194477242, 'bagging_freq': 4, 'reg_alpha': 4.905395628360497, 'reg_lambda': 6.096094150301336, 'min_split_gain': 0.8430984187798867, 'max_bin': 177, 'scale_pos_weight': 0.8990221090900653}
{'n_estimators': 1228, 'learning_rate': 0.06054050451731425, 'max_depth': 5, 'min_child_weight': 16.899992418489692, 'gamma': 2.3855765268556994, 'max_delta_step': 9, 'subsample': 0.7670024902678183, 'colsample_bytree': 0.6741848423036123, 'reg_alpha': 3.747714572994834, 'reg_lambda': 3.9700433352288487, 'scale_pos_weight': 1.369296230986938}


In [85]:
from catboost import CatBoostClassifier
from sklearn.utils.estimator_checks import check_estimator

clf = SklearnCompatCatBoost(verbose=0)
print(hasattr(clf, '__sklearn_tags__'))

True


In [88]:
base_models = [
    ('cat_1', SklearnCompatCatBoost(**cat_params, random_seed=777, verbose=0)),
    ('cat_2', SklearnCompatCatBoost(**cat_params, random_seed=42, verbose=0)),
    ('lgb_1', LGBMClassifier(**lgb_params,n_estimators=1000, random_state=42,verbosity=-1)),
    ('lgb_2', LGBMClassifier(**lgb_params, n_estimators=2000,random_state=1234,verbosity=-1)),
    ('xgb_1', XGBClassifier(**xgb_params, random_state=99)),
    ('xgb_3', XGBClassifier(**xgb_params, random_state=2024))
]

stacking_model = StackingClassifier(estimators=base_models,
                                    final_estimator=LogisticRegression(random_state=42),
                                    cv=5,
                                    stack_method='predict_proba')

In [89]:
stacking_model.fit(X, Y)

Learning rate set to 0.16153
0:	learn: 0.5389021	total: 171ms	remaining: 2m 51s
1:	learn: 0.4485439	total: 338ms	remaining: 2m 48s
2:	learn: 0.3932501	total: 519ms	remaining: 2m 52s
3:	learn: 0.3592819	total: 670ms	remaining: 2m 46s
4:	learn: 0.3360749	total: 839ms	remaining: 2m 46s
5:	learn: 0.3214734	total: 994ms	remaining: 2m 44s
6:	learn: 0.3110239	total: 1.16s	remaining: 2m 45s
7:	learn: 0.3031617	total: 1.31s	remaining: 2m 42s
8:	learn: 0.2972461	total: 1.47s	remaining: 2m 42s
9:	learn: 0.2928862	total: 1.56s	remaining: 2m 34s
10:	learn: 0.2890940	total: 1.64s	remaining: 2m 27s
11:	learn: 0.2862542	total: 1.72s	remaining: 2m 21s
12:	learn: 0.2840381	total: 1.82s	remaining: 2m 17s
13:	learn: 0.2819526	total: 1.91s	remaining: 2m 14s
14:	learn: 0.2803368	total: 1.98s	remaining: 2m 10s
15:	learn: 0.2791294	total: 2.06s	remaining: 2m 6s
16:	learn: 0.2780374	total: 2.14s	remaining: 2m 3s
17:	learn: 0.2770784	total: 2.22s	remaining: 2m 1s
18:	learn: 0.2763448	total: 2.3s	remaining: 1m 5

0,1,2
,"estimators  estimators: list of (str, estimator) Base estimators which will be stacked together. Each element of the list is defined as a tuple of string (i.e. name) and an estimator instance. An estimator can be set to 'drop' using `set_params`. The type of estimator is generally expected to be a classifier. However, one can pass a regressor for some use case (e.g. ordinal regression).","[('cat_1', ...), ('cat_2', ...), ...]"
,"final_estimator  final_estimator: estimator, default=None A classifier which will be used to combine the base estimators. The default classifier is a :class:`~sklearn.linear_model.LogisticRegression`.",LogisticRegre...ndom_state=42)
,"cv  cv: int, cross-validation generator, iterable, or ""prefit"", default=None Determines the cross-validation splitting strategy used in `cross_val_predict` to train `final_estimator`. Possible inputs for cv are: * None, to use the default 5-fold cross validation, * integer, to specify the number of folds in a (Stratified) KFold, * An object to be used as a cross-validation generator, * An iterable yielding train, test splits, * `""prefit""`, to assume the `estimators` are prefit. In this case, the  estimators will not be refitted. For integer/None inputs, if the estimator is a classifier and y is either binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`~sklearn.model_selection.KFold` is used. These splitters are instantiated with `shuffle=False` so the splits will be the same across calls. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. If ""prefit"" is passed, it is assumed that all `estimators` have been fitted already. The `final_estimator_` is trained on the `estimators` predictions on the full training set and are **not** cross validated predictions. Please note that if the models have been trained on the same data to train the stacking model, there is a very high risk of overfitting. .. versionadded:: 1.1  The 'prefit' option was added in 1.1 .. note::  A larger number of split will provide no benefits if the number  of training samples is large enough. Indeed, the training time  will increase. ``cv`` is not used for model evaluation but for  prediction.",5
,"stack_method  stack_method: {'auto', 'predict_proba', 'decision_function', 'predict'}, default='auto' Methods called for each base estimator. It can be: * if 'auto', it will try to invoke, for each estimator,  `'predict_proba'`, `'decision_function'` or `'predict'` in that  order. * otherwise, one of `'predict_proba'`, `'decision_function'` or  `'predict'`. If the method is not implemented by the estimator, it  will raise an error.",'predict_proba'
,"n_jobs  n_jobs: int, default=None The number of jobs to run in parallel for `fit` of all `estimators`. `None` means 1 unless in a `joblib.parallel_backend` context. -1 means using all processors. See :term:`Glossary ` for more details.",
,"passthrough  passthrough: bool, default=False When False, only the predictions of estimators will be used as training data for `final_estimator`. When True, the `final_estimator` is trained on the predictions as well as the original training data.",False
,"verbose  verbose: int, default=0 Verbosity level.",0

0,1,2
,boosting_type,'gbdt'
,num_leaves,91
,max_depth,7
,learning_rate,0.09166541113559948
,n_estimators,1000
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.8430984187798867
,min_child_weight,0.006701718272616044

0,1,2
,boosting_type,'gbdt'
,num_leaves,91
,max_depth,7
,learning_rate,0.09166541113559948
,n_estimators,2000
,subsample_for_bin,200000
,objective,
,class_weight,
,min_split_gain,0.8430984187798867
,min_child_weight,0.006701718272616044

0,1,2
,"objective  objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType] Specify the learning task and the corresponding learning objective or a custom objective function to be used. For custom objective, see :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more information, along with the end note for function signatures.",'binary:logistic'
,"base_score  base_score: typing.Union[float, typing.List[float], NoneType] The initial prediction score of all instances, global bias.",
,booster,
,"callbacks  callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]] List of callback functions that are applied at end of each iteration. It is possible to use predefined callbacks by using :ref:`Callback API `. .. note::  States in callback are not preserved during training, which means callback  objects can not be reused for multiple training sessions without  reinitialization or deepcopy. .. code-block:: python  for params in parameters_grid:  # be sure to (re)initialize the callbacks before each run  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]  reg = xgboost.XGBRegressor(**params, callbacks=callbacks)  reg.fit(X, y)",
,colsample_bylevel  colsample_bylevel: typing.Optional[float] Subsample ratio of columns for each level.,
,colsample_bynode  colsample_bynode: typing.Optional[float] Subsample ratio of columns for each split.,
,colsample_bytree  colsample_bytree: typing.Optional[float] Subsample ratio of columns when constructing each tree.,0.6741848423036123
,"device  device: typing.Optional[str] .. versionadded:: 2.0.0 Device ordinal, available options are `cpu`, `cuda`, and `gpu`.",
,"early_stopping_rounds  early_stopping_rounds: typing.Optional[int] .. versionadded:: 1.6.0 - Activates early stopping. Validation metric needs to improve at least once in  every **early_stopping_rounds** round(s) to continue training. Requires at  least one item in **eval_set** in :py:meth:`fit`. - If early stopping occurs, the model will have two additional attributes:  :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the  :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal  number of trees during inference. If users want to access the full model  (including trees built after early stopping), they can specify the  `iteration_range` in these inference methods. In addition, other utilities  like model plotting can also use the entire model. - If you prefer to discard the trees after `best_iteration`, consider using the  callback function :py:class:`xgboost.callback.EarlyStopping`. - If there's more than one item in **eval_set**, the last entry will be used for  early stopping. If there's more than one metric in **eval_metric**, the last  metric will be used for early stopping.",
,enable_categorical  enable_categorical: bool See the same parameter of :py:class:`DMatrix` for details.,False

0,1,2
,"objective  objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType] Specify the learning task and the corresponding learning objective or a custom objective function to be used. For custom objective, see :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more information, along with the end note for function signatures.",'binary:logistic'
,"base_score  base_score: typing.Union[float, typing.List[float], NoneType] The initial prediction score of all instances, global bias.",
,booster,
,"callbacks  callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]] List of callback functions that are applied at end of each iteration. It is possible to use predefined callbacks by using :ref:`Callback API `. .. note::  States in callback are not preserved during training, which means callback  objects can not be reused for multiple training sessions without  reinitialization or deepcopy. .. code-block:: python  for params in parameters_grid:  # be sure to (re)initialize the callbacks before each run  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]  reg = xgboost.XGBRegressor(**params, callbacks=callbacks)  reg.fit(X, y)",
,colsample_bylevel  colsample_bylevel: typing.Optional[float] Subsample ratio of columns for each level.,
,colsample_bynode  colsample_bynode: typing.Optional[float] Subsample ratio of columns for each split.,
,colsample_bytree  colsample_bytree: typing.Optional[float] Subsample ratio of columns when constructing each tree.,0.6741848423036123
,"device  device: typing.Optional[str] .. versionadded:: 2.0.0 Device ordinal, available options are `cpu`, `cuda`, and `gpu`.",
,"early_stopping_rounds  early_stopping_rounds: typing.Optional[int] .. versionadded:: 1.6.0 - Activates early stopping. Validation metric needs to improve at least once in  every **early_stopping_rounds** round(s) to continue training. Requires at  least one item in **eval_set** in :py:meth:`fit`. - If early stopping occurs, the model will have two additional attributes:  :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the  :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal  number of trees during inference. If users want to access the full model  (including trees built after early stopping), they can specify the  `iteration_range` in these inference methods. In addition, other utilities  like model plotting can also use the entire model. - If you prefer to discard the trees after `best_iteration`, consider using the  callback function :py:class:`xgboost.callback.EarlyStopping`. - If there's more than one item in **eval_set**, the last entry will be used for  early stopping. If there's more than one metric in **eval_metric**, the last  metric will be used for early stopping.",
,enable_categorical  enable_categorical: bool See the same parameter of :py:class:`DMatrix` for details.,False

0,1,2
,"penalty  penalty: {'l1', 'l2', 'elasticnet', None}, default='l2' Specify the norm of the penalty: - `None`: no penalty is added; - `'l2'`: add a L2 penalty term and it is the default choice; - `'l1'`: add a L1 penalty term; - `'elasticnet'`: both L1 and L2 penalty terms are added. .. warning::  Some penalties may not work with some solvers. See the parameter  `solver` below, to know the compatibility between the penalty and  solver. .. versionadded:: 0.19  l1 penalty with SAGA solver (allowing 'multinomial' + L1) .. deprecated:: 1.8  `penalty` was deprecated in version 1.8 and will be removed in 1.10.  Use `l1_ratio` instead. `l1_ratio=0` for `penalty='l2'`, `l1_ratio=1` for  `penalty='l1'` and `l1_ratio` set to any float between 0 and 1 for  `'penalty='elasticnet'`.",'deprecated'
,"C  C: float, default=1.0 Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization. `C=np.inf` results in unpenalized logistic regression. For a visual example on the effect of tuning the `C` parameter with an L1 penalty, see: :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py`.",1.0
,"l1_ratio  l1_ratio: float, default=0.0 The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`. Setting `l1_ratio=1` gives a pure L1-penalty, setting `l1_ratio=0` a pure L2-penalty. Any value between 0 and 1 gives an Elastic-Net penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2`. .. warning::  Certain values of `l1_ratio`, i.e. some penalties, may not work with some  solvers. See the parameter `solver` below, to know the compatibility between  the penalty and solver. .. versionchanged:: 1.8  Default value changed from None to 0.0. .. deprecated:: 1.8  `None` is deprecated and will be removed in version 1.10. Always use  `l1_ratio` to specify the penalty type.",0.0
,"dual  dual: bool, default=False Dual (constrained) or primal (regularized, see also :ref:`this equation `) formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer `dual=False` when n_samples > n_features.",False
,"tol  tol: float, default=1e-4 Tolerance for stopping criteria.",0.0001
,"fit_intercept  fit_intercept: bool, default=True Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.",True
,"intercept_scaling  intercept_scaling: float, default=1 Useful only when the solver `liblinear` is used and `self.fit_intercept` is set to `True`. In this case, `x` becomes `[x, self.intercept_scaling]`, i.e. a ""synthetic"" feature with constant value equal to `intercept_scaling` is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. .. note::  The synthetic feature weight is subject to L1 or L2  regularization as all other features.  To lessen the effect of regularization on synthetic feature weight  (and therefore on the intercept) `intercept_scaling` has to be increased.",1
,"class_weight  class_weight: dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17  *class_weight='balanced'*",
,"random_state  random_state: int, RandomState instance, default=None Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details.",42
,"solver  solver: {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, default='lbfgs' Algorithm to use in the optimization problem. Default is 'lbfgs'. To choose a solver, you might want to consider the following aspects: - 'lbfgs' is a good default solver because it works reasonably well for a wide  class of problems. - For :term:`multiclass` problems (`n_classes >= 3`), all solvers except  'liblinear' minimize the full multinomial loss, 'liblinear' will raise an  error. - 'newton-cholesky' is a good choice for  `n_samples` >> `n_features * n_classes`, especially with one-hot encoded  categorical features with rare categories. Be aware that the memory usage  of this solver has a quadratic dependency on `n_features * n_classes`  because it explicitly computes the full Hessian matrix. - For small datasets, 'liblinear' is a good choice, whereas 'sag'  and 'saga' are faster for large ones; - 'liblinear' can only handle binary classification by default. To apply a  one-versus-rest scheme for the multiclass setting one can wrap it with the  :class:`~sklearn.multiclass.OneVsRestClassifier`. .. warning::  The choice of the algorithm depends on the penalty chosen (`l1_ratio=0`  for L2-penalty, `l1_ratio=1` for L1-penalty and `0 < l1_ratio < 1` for  Elastic-Net) and on (multinomial) multiclass support:  ================= ======================== ======================  solver l1_ratio multinomial multiclass  ================= ======================== ======================  'lbfgs' l1_ratio=0 yes  'liblinear' l1_ratio=1 or l1_ratio=0 no  'newton-cg' l1_ratio=0 yes  'newton-cholesky' l1_ratio=0 yes  'sag' l1_ratio=0 yes  'saga' 0<=l1_ratio<=1 yes  ================= ======================== ====================== .. note::  'sag' and 'saga' fast convergence is only guaranteed on features  with approximately the same scale. You can preprocess the data with  a scaler from :mod:`sklearn.preprocessing`. .. seealso::  Refer to the :ref:`User Guide ` for more  information regarding :class:`LogisticRegression` and more specifically the  :ref:`Table `  summarizing solver/penalty supports. .. versionadded:: 0.17  Stochastic Average Gradient (SAG) descent solver. Multinomial support in  version 0.18. .. versionadded:: 0.19  SAGA solver. .. versionchanged:: 0.22  The default solver changed from 'liblinear' to 'lbfgs' in 0.22. .. versionadded:: 1.2  newton-cholesky solver. Multinomial support in version 1.6.",'lbfgs'


In [90]:
stacked_test_preds = stacking_model.predict_proba(X_test)[:, 1]

In [91]:
stacked_test_preds 

array([0.94925755, 0.03849563, 0.95725309, ..., 0.04931751, 0.11198631,
       0.04245551], shape=(270000,))

In [92]:
submission_pipeline(stacked_test_preds,"stacked_submission-base-xg-light-cat-meta-catBoost.csv")

Submission saved: stacked_submission-base-xg-light-cat-meta-catBoost.csv


##  XGB+CAT TUNED Stacking CLASSIFIER - RidgeClassfier  Meta Model

In [None]:
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.ensemble import StackingClassifier,VotingClassifier
from sklearn.linear_model import LogisticRegression,RidgeClassifier

import json

In [None]:
cat_params=json.load(open("/content/best_params_cat.json"))

In [None]:
xgb_params={
        "n_estimators"     : 972,
    "learning_rate"    : 0.08233334476657686,
    "max_depth"        : 3,
    "subsample"        : 0.6967792979720865,
    "colsample_bytree" : 0.7773146292728021,
    "reg_alpha"        : 1.911349598671315,
    "reg_lambda"       : 0.6194119678307304,
    "tree_method"      : "hist",
    "eval_metric"      : "logloss",
  # GPU acceleration (Colab T4/A100)
    "n_jobs"           : -1,
}

In [None]:
base_models = [
    ('cat_1',  CatBoostClassifier(**cat_params, random_seed=777, verbose=0, task_type='GPU')),
    ('cat_2',CatBoostClassifier(**cat_params, random_seed=42, verbose=0, task_type='GPU') ),
    ('cat_3',CatBoostClassifier(**cat_params, random_seed=1200
    , verbose=0, task_type='GPU') ),
    ('xgb_1',XGBClassifier(**xgb_params, random_state=99, device='cuda') ),
    ('xgb_2', XGBClassifier(**xgb_params, random_state=2024, device='cuda')),
    ('xgb_3', XGBClassifier(**xgb_params, random_state=2026, device='cuda')),


]

stacking_model = StackingClassifier(estimators=base_models,
                                    final_estimator=RidgeClassifier(alpha=100),
                                    cv=5,
                                    stack_method='predict_proba')

In [None]:
stacking_model.fit(X, Y)

In [None]:
final_preds = stacking_model.decision_function(X_test)
final_predictions =  1 / (1 + np.exp(-final_preds))

In [None]:
submission_pipeline(final_predictions,"stacked_submission10-base-xg-light-cat-meta-rigde.csv")