In [None]:
# TargetEncoder
!pip install -q scikit-learn==1.4
# KerasClassifier
!pip install -q --no-deps scikeras

import pandas as pd
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.preprocessing import OneHotEncoder, TargetEncoder
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_auc_score
from scikeras.wrappers import KerasClassifier

# Read and split the data
train = pd.read_csv('/kaggle/input/playground-series-s4e7/train.csv', index_col='id')
X_tr, X_va = train_test_split(train, test_size=0.2, stratify=train.Response, random_state=1)
y_tr = X_tr.pop('Response')
y_va = X_va.pop('Response')

# The Keras model with two hidden layers
def get_model(meta):
    model = keras.models.Sequential()
    model.add(keras.layers.Input(meta["X_shape_"][1:]))
    model.add(keras.layers.Dense(128, kernel_initializer='lecun_normal', activation='selu'))
    model.add(keras.layers.Dense(64, kernel_initializer='lecun_normal', activation='selu'))
    model.add(keras.layers.Dense(1, kernel_initializer='lecun_normal', activation='sigmoid'))
    return model

# The pipeline
ohe_columns = ['Gender', 'Region_Code', 'Vehicle_Age',
               'Vehicle_Damage', 'Policy_Sales_Channel',
               'Vintage']
pipeline = make_pipeline(
    ColumnTransformer([('one-hot-encode',
                        OneHotEncoder(drop='first',
                                      sparse_output=True,
                                      dtype=np.float32,
                                      max_categories=200),
                        ohe_columns),
                       ('target-encode',
                        make_pipeline(TargetEncoder(),
                                      FunctionTransformer(lambda x: x.astype(np.float32)),
                                      StandardScaler()),
                        ['Annual_Premium']),
                      ],
                      remainder=make_pipeline(FunctionTransformer(lambda X: X.astype(np.float32)),
                                              StandardScaler(copy=False))),
    KerasClassifier(
        get_model,
        loss="binary_crossentropy",
        optimizer=keras.optimizers.AdamW(learning_rate=0.003),
        validation_split=0.05,
        batch_size=1024,
        validation_batch_size=65536,
        epochs=20,
        callbacks=[keras.callbacks.ReduceLROnPlateau(patience=3),
                   keras.callbacks.EarlyStopping(patience=5)]
    )
)

# Execute the pipeline and measure the auc score
pipeline.fit(X_tr, y_tr)
y_pred = pipeline.predict_proba(X_va)[:,1]
print(f"# AUC: {roc_auc_score(y_va, y_pred):.5f}")