In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import resample
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import ExtraTreesClassifier


class Oversampler(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        majority_class = X[X['churn'] == 0]
        minority_class = X[X['churn'] == 1]

        minority_oversampled = resample(
            minority_class, replace=True, n_samples=len(majority_class), random_state=42
        )

        df_balanced = pd.concat([majority_class, minority_oversampled])
        df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
        return df_balanced.drop(columns=['churn']), df_balanced['churn']




In [None]:
categorical_features = ['col1', 'col2'] 

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)


best_params = {
    'bootstrap': False,
    'ccp_alpha': 0.0,
    'class_weight': 'balanced',
    'criterion': 'gini',
    'max_depth': 20,
    'max_features': 'log2',
    'max_leaf_nodes': None,
    'max_samples': None,
    'min_impurity_decrease': 0.0,
    'min_samples_leaf': 10,
    'min_samples_split': 15,
    'min_weight_fraction_leaf': 0.0,
    'n_estimators': 250,
    'n_jobs': None,
    'oob_score': False,
    'random_state': 42,
    'verbose': 0,
    'warm_start': False
}


pipeline = Pipeline([
    ('oversampler', Oversampler()),
    ('preprocessor', preprocessor),
    ('classifier', ExtraTreesClassifier(**best_params))
])

In [None]:
pipeline.fit(df, df['churn'])


preds = pipeline.predict(df)