In [3]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.impute import SimpleImputer


file_path = 'Churn_Modelling.csv' 
data = pd.read_csv(file_path)


data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)


X = data.drop('Exited', axis=1)
y = data['Exited']


categorical_features = ['Geography', 'Gender']
numerical_features = [col for col in X.columns if col not in categorical_features]


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])


models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


for model_name, model in models.items():
    
    pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
    
  
    pipeline.fit(X_train, y_train)
    
    
    y_pred = pipeline.predict(X_test)
    y_proba = pipeline.predict_proba(X_test)[:, 1] if hasattr(pipeline, "predict_proba") else None
    
    
    print(f"\n{model_name} Results:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    if y_proba is not None:
        print("ROC-AUC Score:", roc_auc_score(y_test, y_proba))




Logistic Regression Results:
Accuracy: 0.811
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.96      0.89      1607
           1       0.55      0.20      0.29       393

    accuracy                           0.81      2000
   macro avg       0.69      0.58      0.59      2000
weighted avg       0.78      0.81      0.77      2000

ROC-AUC Score: 0.7788792987423027

Random Forest Results:
Accuracy: 0.8635
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.46      0.57       393

    accuracy                           0.86      2000
   macro avg       0.81      0.71      0.75      2000
weighted avg       0.85      0.86      0.85      2000

ROC-AUC Score: 0.8562530975328991

Gradient Boosting Results:
Accuracy: 0.864
Classification Report:
               precision    recall  f1-score   support

           0       0