In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score, roc_auc_score
import joblib

# PyTorch and skorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier

# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

# --- 1. Data Loading and Preprocessing (Unchanged) ---
def load_and_preprocess_data():
    """
    Loads the telco customer churn dataset and performs initial cleaning
    """
    url = "https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv"
    df = pd.read_csv(url)

    df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
    df = df.drop('customerID', axis=1)
    df = df.dropna()
    df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

    return df

def create_preprocessing_pipeline(X):
    """
    Creates a preprocessing pipeline for numerical and categorical features
    """
    categorical_features = X.select_dtypes(include=['object']).columns.tolist()
    numerical_features = X.select_dtypes(include=[np.number]).columns.tolist()

    numerical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
    ])

    # 
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    return preprocessor

# --- 2. Deep Learning Model Definition (PyTorch - Unchanged) ---
class ChurnMLP(nn.Module):
    """
    Multilayer Perceptron for Binary Classification (Customer Churn)
    """
    def __init__(self, num_features=None, num_units=100, dropout_rate=0.5):
        super().__init__()
        
        if num_features is None:
            num_features = 30 

        self.layer1 = nn.Linear(num_features, num_units)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        self.layer2 = nn.Linear(num_units, num_units // 2)
        self.dropout2 = nn.Dropout(dropout_rate)
        
        self.layer3 = nn.Linear(num_units // 2, 1) # Output: 1 for binary classification (logit score)

    def forward(self, X):
        X = X.float() 
        
        X = self.dropout1(torch.relu(self.layer1(X)))
        X = self.dropout2(torch.relu(self.layer2(X)))
        
        # Returns raw logit scores (no sigmoid) for nn.BCEWithLogitsLoss
        return self.layer3(X)


# --- 3. Main Execution Function with Comparative Analysis (FIXED) ---
def main_with_dl():
    print("--- 1. Data Loading and Preprocessing ---")
    df = load_and_preprocess_data()
    X = df.drop('Churn', axis=1)
    y = df['Churn']

    # CRUCIAL FIX: Convert y to float32 numpy array and reshape to (N, 1) for skorch/PyTorch
    y = y.values.astype(np.float32).reshape(-1, 1)

    # Split and Stratify
    # y_test is a column vector here, but scikit-learn metrics handle the flattening later
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=SEED, stratify=y
    )

    preprocessor = create_preprocessing_pipeline(X)
    
    # Determine the input size for the MLP
    X_train_processed = preprocessor.fit_transform(X_train)
    num_features = X_train_processed.shape[1]
    del X_train_processed 

    # Prepare Deep Learning Classifier using skorch wrapper
    # Calculate class weights for imbalance handling in PyTorch
    neg_count = np.sum(y_train == 0)
    pos_count = np.sum(y_train == 1)
    pos_weight = torch.tensor([neg_count / pos_count], dtype=torch.float32)

    # 
    dl_model = NeuralNetClassifier(
        ChurnMLP,
        module__num_features=num_features,
        criterion=nn.BCEWithLogitsLoss, 
        optimizer=optim.Adam,
        max_epochs=20,
        iterator_train__shuffle=True,
        verbose=0,
        criterion__pos_weight=pos_weight
        # FIXED: Removed the invalid 'target_type' argument
    )

    # --- Define all models and their hyperparameter grids ---
    models = {
        'logistic_regression': {
            'model': LogisticRegression(random_state=SEED, class_weight='balanced', max_iter=1000),
            'params': {'classifier__C': [0.1, 1, 10]}
        },
        'random_forest': {
            'model': RandomForestClassifier(random_state=SEED, class_weight='balanced'),
            'params': {'classifier__n_estimators': [100, 200], 'classifier__max_depth': [10, 20]}
        },
        'deep_learning_mlp': {
            'model': dl_model,
            'params': {
                'classifier__module__num_units': [50, 100],
                'classifier__lr': [0.001, 0.01],
                'classifier__max_epochs': [20, 40]
            }
        }
    }

    best_score = 0
    best_model = None
    best_model_name = ""
    results = {}

    print("\n--- 2. Training and Hyperparameter Tuning (GridSearchCV) ---")
    
    # --- Train and evaluate each model using gridsearchCV ---
    for model_name, model_info in models.items():
        print(f"\nSTARTING: {model_name.replace('_', ' ').upper()}")

        pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('classifier', model_info['model'])
        ])

        grid_search = GridSearchCV(
            pipeline,
            model_info['params'],
            cv=3, 
            scoring='f1',
            n_jobs=-1,
            verbose=0
        )

        grid_search.fit(X_train, y_train)

        # Evaluate on test set
        y_pred = grid_search.predict(X_test)
        
        # Predict probability for ROC AUC
        if hasattr(grid_search.best_estimator_.named_steps['classifier'], 'predict_proba'):
             y_pred_proba = grid_search.predict_proba(X_test)[:, 1]
        else:
             # Skorch outputs a 2D array of probabilities when using BCEWithLogitsLoss
             y_pred_proba = grid_search.predict_proba(X_test).ravel()
        
        # y_test must be flattened for scikit-learn metrics
        y_test_flat = y_test.ravel()

        f1 = f1_score(y_test_flat, y_pred)
        roc_auc = roc_auc_score(y_test_flat, y_pred_proba)
        
        results[model_name] = {'F1': f1, 'ROC AUC': roc_auc, 'Best Params': grid_search.best_params_}

        print(f"{model_name.replace('_', ' ').upper()} - Best F1 Score on Test: {f1:.4f}")
        print(f"Best parameters: {grid_search.best_params_}")

        if f1 > best_score:
            best_score = f1
            best_model = grid_search.best_estimator_
            best_model_name = model_name

    # --- 4. Final Comparative Analysis and Evaluation ---
    
    print("\n" + "="*70)
    print("                FINAL COMPARATIVE MODEL PERFORMANCE")
    print("="*70)
    
    # Display results in a table
    results_df = pd.DataFrame(results).T
    results_df = results_df.sort_values(by='F1', ascending=False)
    
    print(results_df[['F1', 'ROC AUC']].to_markdown())
    
    print("\n" + "="*70)
    print(f"BEST PERFORMING MODEL: {best_model_name.replace('_', ' ').upper()} (F1: {best_score:.4f})")
    print("="*70)
    
    # Final evaluation of best model
    y_pred = best_model.predict(X_test)
    
    if hasattr(best_model.named_steps['classifier'], 'predict_proba'):
         y_pred_proba = best_model.predict_proba(X_test)[:, 1]
    else: 
         y_pred_proba = best_model.predict_proba(X_test).ravel()

    print(f"Accuracy: {accuracy_score(y_test_flat, y_pred):.4f}")
    print(f"F1 Score: {f1_score(y_test_flat, y_pred):.4f}")
    print(f"ROC AUC: {roc_auc_score(y_test_flat, y_pred_proba):.4f}")
    print("\nClassification Report (Best Model):")
    print(classification_report(y_test_flat, y_pred))

    # Save final pipeline to disk
    joblib.dump(best_model, 'telco_churn_comparison_pipeline.joblib')
    print("\nPipeline saved as telco_churn_comparison_pipeline.joblib")


if __name__ == "__main__":
    main_with_dl()


--- 1. Data Loading and Preprocessing ---

--- 2. Training and Hyperparameter Tuning (GridSearchCV) ---

STARTING: LOGISTIC REGRESSION


  y = column_or_1d(y, warn=True)


LOGISTIC REGRESSION - Best F1 Score on Test: 0.6069
Best parameters: {'classifier__C': 1}

STARTING: RANDOM FOREST


  return fit_method(estimator, *args, **kwargs)


RANDOM FOREST - Best F1 Score on Test: 0.6204
Best parameters: {'classifier__max_depth': 10, 'classifier__n_estimators': 100}

STARTING: DEEP LEARNING MLP




DEEP LEARNING MLP - Best F1 Score on Test: 0.6004
Best parameters: {'classifier__lr': 0.001, 'classifier__max_epochs': 40, 'classifier__module__num_units': 50}

                FINAL COMPARATIVE MODEL PERFORMANCE
|                     |       F1 |   ROC AUC |
|:--------------------|---------:|----------:|
| random_forest       | 0.62037  |  0.832012 |
| logistic_regression | 0.606925 |  0.835134 |
| deep_learning_mlp   | 0.600414 |  0.830416 |

BEST PERFORMING MODEL: RANDOM FOREST (F1: 0.6204)
Accuracy: 0.7669
F1 Score: 0.6204
ROC AUC: 0.8320

Classification Report (Best Model):
              precision    recall  f1-score   support

         0.0       0.88      0.79      0.83      1033
         1.0       0.55      0.72      0.62       374

    accuracy                           0.77      1407
   macro avg       0.72      0.75      0.73      1407
weighted avg       0.79      0.77      0.78      1407


Pipeline saved as telco_churn_comparison_pipeline.joblib
