In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split


df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")


df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')


X = df.drop(['Churn', 'customerID'], axis=1)
y = df['Churn'].replace({"Yes": 1, 'No': 0}) 


data_to_keep = X.dropna(axis=0).index
X_cleaned = X.loc[data_to_keep]
y_cleaned = y.loc[data_to_keep]

X_train, X_test, y_train, y_test = train_test_split(
    X_cleaned, 
    y_cleaned, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_cleaned
)

print(f"Data cleaned and split. X_train size: {len(X_train)} rows.")

Data cleaned and split. X_train size: 5625 rows.


  y = df['Churn'].replace({"Yes": 1, 'No': 0})


In [13]:
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline,make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.metrics import f1_score,confusion_matrix,classification_report

In [3]:
class CustomEncoder(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_copy = X.copy()
        
        binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
        for col in binary_cols:
            if col in X_copy.columns:
                X_copy[col] = X_copy[col].replace({"Yes": 1, 'No': 0}).astype(int)

        if 'gender' in X_copy.columns:
            X_copy['gender'] = X_copy['gender'].replace({'Male': 1, 'Female': 0}).astype(int)

        contract_mapping = {'Month-to-month': 0, 'One year': 1, 'Two year': 2}
        if 'Contract' in X_copy.columns:
            X_copy['Contract'] = X_copy['Contract'].map(contract_mapping).astype(int)
            
        return X_copy

In [4]:
class DropNaImputer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X.dropna(axis=0)

In [5]:
nominal_cols = [
    'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 
    'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 
    'PaymentMethod'
]

one_hot_preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', 
         OneHotEncoder(handle_unknown='ignore', sparse_output=False), 
         nominal_cols)
    ],
    remainder='passthrough',
    verbose_feature_names_out=False
)

In [19]:
preprocessing_pipline=Pipeline(steps=[
    ('custom_encoder', CustomEncoder()),
    ('one_hot_encode', one_hot_preprocessor),
    #('imputer', DropNaImputer()),"""
])

In [9]:
FULL_MODEL_PIPELINE = Pipeline(steps=[

    ('preprocessor', preprocessing_pipline),

    ('selector', SelectKBest(f_classif)),

    ('scaler', StandardScaler()),

    ('svc', SVC(random_state=0))
])

In [10]:
print(sorted(FULL_MODEL_PIPELINE.get_params().keys()))

['memory', 'preprocessor', 'preprocessor__custom_encoder', 'preprocessor__memory', 'preprocessor__one_hot_encode', 'preprocessor__steps', 'preprocessor__transform_input', 'preprocessor__verbose', 'scaler', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std', 'selector', 'selector__k', 'selector__score_func', 'steps', 'svc', 'svc__C', 'svc__break_ties', 'svc__cache_size', 'svc__class_weight', 'svc__coef0', 'svc__decision_function_shape', 'svc__degree', 'svc__gamma', 'svc__kernel', 'svc__max_iter', 'svc__probability', 'svc__random_state', 'svc__shrinking', 'svc__tol', 'svc__verbose', 'transform_input', 'verbose']


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split


df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")


df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')


X = df.drop(['Churn', 'customerID'], axis=1)
y = df['Churn'].replace({"Yes": 1, 'No': 0}) 


data_to_keep = X.dropna(axis=0).index
X_cleaned = X.loc[data_to_keep]
y_cleaned = y.loc[data_to_keep]

X_train, X_test, y_train, y_test = train_test_split(
    X_cleaned, 
    y_cleaned, 
    test_size=0.2, 
    random_state=42, 
    stratify=y_cleaned
)

print(f"Data cleaned and split. X_train size: {len(X_train)} rows.")

Data cleaned and split. X_train size: 5625 rows.


  y = df['Churn'].replace({"Yes": 1, 'No': 0})


In [15]:
hyper_params={
    'selector__k': range(7, 100),
    'svc__C':[1,10,100,1000],
    'svc__gamma':[1e-3,1e-4]
}

In [21]:
hyper_params = {
    'selector__k': range(7, 100), 
    'svc__C': [1, 10, 100, 1000],
    'svc__gamma': [1e-3, 1e-4]
}

# 2. Run the Grid Search
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report

grid = RandomizedSearchCV(
    FULL_MODEL_PIPELINE, 
    hyper_params, 
    scoring="f1", 
    cv=4, 
    n_iter=40, 
    n_jobs=-1, 
    verbose=2
)
    
grid.fit(X_train, y_train)

print("Best Parameters:", grid.best_params_)

# 3. Evaluate
y_pred = grid.predict(X_test)
print(classification_report(y_test, y_pred))

Fitting 4 folds for each of 40 candidates, totalling 160 fits


ValueError: 
All the 160 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
160 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\benmo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\benmo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\benmo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\pipeline.py", line 655, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\benmo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\pipeline.py", line 563, in _fit
    self._validate_steps()
  File "c:\Users\benmo\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\pipeline.py", line 340, in _validate_steps
    raise TypeError(
TypeError: All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' 'Pipeline(steps=[('custom_encoder', CustomEncoder()),
                ('one_hot_encode',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehot',
                                                  OneHotEncoder(handle_unknown='ignore',
                                                                sparse_output=False),
                                                  ['MultipleLines',
                                                   'InternetService',
                                                   'OnlineSecurity',
                                                   'OnlineBackup',
                                                   'DeviceProtection',
                                                   'TechSupport', 'StreamingTV',
                                                   'StreamingMovies',
                                                   'PaymentMethod'])],
                                   verbose_feature_names_out=False)),
                "('imputer', DropNaImputer()),"])' (type <class 'sklearn.pipeline.Pipeline'>) doesn't
