# Task2_End_to_End_ML_Pipeline_Telco_Churn

Objective: Build an end-to-end scikit-learn pipeline for Telco Churn. Demo mode uses synthetic data.

In [None]:
DEMO_MODE = True
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import joblib


In [None]:
if DEMO_MODE:
    import numpy as np
    n=500
    rng=np.random.default_rng(0)
    df = pd.DataFrame({'tenure':rng.integers(0,72,n),'MonthlyCharges':rng.uniform(20,120,n).round(2),'Contract':rng.choice(['Month-to-month','One year','Two year'],n),'InternetService':rng.choice(['DSL','Fiber optic','No'],n),'gender':rng.choice(['Male','Female'],n),'SeniorCitizen':rng.choice([0,1],n,p=[0.85,0.15])})
    prob = 0.4 - 0.003*df['tenure'] + 0.002*(df['MonthlyCharges']-50)
    df['Churn'] = (rng.random(n) < prob.clip(0.05,0.95)).astype(int)
else:
    df = pd.read_csv('Telco-Customer-Churn.csv')

print(df.head())

In [None]:
X = df.drop(columns=['Churn']); y = df['Churn']
Xtr, Xte, ytr, yte = train_test_split(X,y,stratify=y,random_state=42,test_size=0.2)
num_cols = X.select_dtypes(include=['int64','float64']).columns.tolist()
cat_cols = X.select_dtypes(exclude=['int64','float64']).columns.tolist()
num_pipeline = Pipeline([('imputer',SimpleImputer(strategy='median')),('scaler',StandardScaler())])
cat_pipeline = Pipeline([('imputer',SimpleImputer(strategy='most_frequent')),('ohe',OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer([('num',num_pipeline,num_cols),('cat',cat_pipeline,cat_cols)])
pipe = Pipeline([('preproc',preprocessor),('clf',RandomForestClassifier(n_estimators=100,random_state=0))])
param_grid={'clf__n_estimators':[100],'clf__max_depth':[5,10,None]}
gs=GridSearchCV(pipe,param_grid,cv=3,scoring='f1',n_jobs=1)
gs.fit(Xtr,ytr)
best=gs.best_estimator_
print('Best params',gs.best_params_)
pred=best.predict(Xte)
print('Accuracy',accuracy_score(yte,pred))
joblib.dump(best,'telco_churn_pipeline.joblib')
print('Saved pipeline')