# <p align="center">**üìä Customer Churn Pipeline for best Model  üìä**</p>


<p align="center">
  <img src="https://thebrainpoint.com/wp-content/uploads/2025/10/Predictive-Analytics-Helps-Reduce-Customer-Churn.jpg" width="800"/>
</p>

# **My Information**

### ‚Äé‚Äß‚Çä**Àö‚úø[My Name]‚úøÀö** : **[Mohamed Reda Ramadan Khamis]**
### ‚Äé‚Äß‚Çä**Àö‚úø[My Phone Number]‚úøÀö** : **[01554725661]**

In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder ,OrdinalEncoder ,StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline as ImbPipeline

from xgboost import XGBClassifier
from sklearn.metrics import classification_report

from imblearn.over_sampling import SMOTE 

import warnings
warnings.filterwarnings("ignore")

In [2]:
# load Data
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [3]:
# Drop Unnecessary Columns
df.drop(['customerID','gender'], axis =1, inplace=True)

In [4]:
# Convert TotalCharges columns to nemuric and handle missing values
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"],errors='coerce')

df["TotalCharges"]=df["TotalCharges"].fillna(0)

In [5]:
# Handle 'No internet service' and 'No phone service'
df.replace(['No internet service','No phone service'],'No',inplace=True)

In [6]:
# Encode Target Column using Label Encoding or using Binary mapping
df["Churn"] = df["Churn"].map({"Yes" : 1, "No": 0})

In [7]:
# Split Data and Target
x = df.drop("Churn", axis=1)
y = df["Churn"]

x_train, x_test, y_train, y_test = train_test_split(x,y,stratify=y,test_size=0.2,random_state=42)

In [8]:
# Define Numerical Columns and (Ordinal Columns, Categoriacal Columns and Binary Columns)
# Ordinal Columns --> Ordinal Encoding
# Categorical Columns --> One Hot Encoding
# Binary Columns --> Binary Mapping OR OneHot Encoding (drop = 'if_binary')

numeric_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'SeniorCitizen']
ordinal_cols = ['Contract']
categorical_cols = ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
    'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaymentMethod' ]

In [9]:
# Numeric Pipeline
numeric_pipeline = Pipeline(steps=[
    ("imputer",SimpleImputer(strategy="median")),
    ("scalar",StandardScaler())
])

In [10]:
# Binary Pipline
binary_pipeline = Pipeline(steps=[
     ("imputer",SimpleImputer(strategy="most_frequent")),
    ('onehot', OneHotEncoder(drop='if_binary', handle_unknown='ignore'))
])

In [11]:
df["Contract"].unique()

array(['Month-to-month', 'One year', 'Two year'], dtype=object)

In [12]:
# Ordinal Pipeline (Contract)
ordinal_pipeline = Pipeline(steps = [
    ("imputer",SimpleImputer(strategy="most_frequent")),
    ('ordinal', OrdinalEncoder(categories=[['Month-to-month', 'One year', 'Two year']]))
])

In [13]:
# Categorical Pipeline (Multi-category)
categorical_pipeline = Pipeline([
    ("imputer",SimpleImputer(strategy="most_frequent")),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [14]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_pipeline, numeric_cols),
        ('bin', binary_pipeline, binary_cols),
        ('ord', ordinal_pipeline, ordinal_cols),
        ('cat', categorical_pipeline, categorical_cols)
    ],
    remainder='drop'
)

In [15]:
# Training Pipeline
training_pipeline = ImbPipeline(steps=[
    ('preprocessing', preprocessor),  # Encoding + Scaling
    ('smote', SMOTE(random_state=42)), # Handle imbalance
    ('model', XGBClassifier(
        colsample_bytree=1.0,
        learning_rate=0.03,
        max_depth=4,
        n_estimators=200,
        subsample=0.8,
        eval_metric='mlogloss',
        use_label_encoder=False
    ))
])

In [16]:
# Fit Pipline on Data
training_pipeline.fit(x_train,y_train)

  File "c:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\PC\AppData\Local\Programs\Python\Python311\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


In [17]:
# Evaluation
y_pred = training_pipeline.predict(x_test)
print("\n classification_report")
print(classification_report(y_test,y_pred))


 classification_report
              precision    recall  f1-score   support

           0       0.88      0.80      0.84      1035
           1       0.55      0.69      0.62       374

    accuracy                           0.77      1409
   macro avg       0.72      0.75      0.73      1409
weighted avg       0.79      0.77      0.78      1409



In [22]:
# Save Model
with open ("XGBOOST_pipline.pkl","wb") as f:
    pickle.dump(training_pipeline,f)
    
print("\n Model Saved Successfully to XGBOOST_pipline.pkl ")


 Model Saved Successfully to XGBOOST_pipline.pkl 


<div style="text-align: center;">
  <h1><b>üèÅ Model Saved Successfully for Best Preprocessing on telco_churn Datasets Completed Successfully</b></h1>
</div>