In [25]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
from feature_engine.discretisation import EqualFrequencyDiscretiser
from feature_engine.encoding import WoEEncoder
from feature_engine.pipeline import Pipeline
import pandas as pd
from category_encoders.woe import WOEEncoder

# Step 1: Load data
data = fetch_ucirepo(id=144)

In [26]:
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer



In [27]:
# another test
# Step 1: Load dataset
#data = fetch_ucirepo(id=222)  # Bank Marketing
X = data.data.features.copy()
y = data.data.targets.squeeze()

print(y)

# Convert target to binary 0/1
y = y.map({1: 0, 2: 1})

# Identify categorical and numerical features
categorical_vars = X.select_dtypes(include='object').columns.tolist()
numerical_vars = X.select_dtypes(include='number').columns.tolist()

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Build pipeline
pipe = Pipeline([
    ("cat_imputer", CategoricalImputer(imputation_method='missing', variables=categorical_vars)),
    ("num_imputer", MeanMedianImputer(imputation_method='median', variables=numerical_vars)),
    ("discretiser", EqualFrequencyDiscretiser(q=5, variables=numerical_vars, return_object=True)),
    ("woe_encoder", WOEEncoder(cols=categorical_vars + numerical_vars, regularization=0.01)),
    ("model", LogisticRegression(max_iter=1000, class_weight='balanced'))
])

# Fit pipeline
pipe.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipe.predict(X_test)
y_prob = pipe.predict_proba(X_test)[:, 1]

print("Classification Report:")
print(classification_report(y_test, y_pred))
print(f"ROC AUC Score: {roc_auc_score(y_test, y_prob):.4f}")

0      1
1      2
2      1
3      1
4      2
      ..
995    1
996    1
997    1
998    2
999    1
Name: class, Length: 1000, dtype: int64
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.71      0.79       210
           1       0.54      0.78      0.64        90

    accuracy                           0.73       300
   macro avg       0.71      0.75      0.71       300
weighted avg       0.78      0.73      0.74       300

ROC AUC Score: 0.8030


In [28]:
pipe.named_steps['model'].coef_.shape

(1, 20)