In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.impute import SimpleImputer
import joblib

# Step 1: Load Dataset
data = pd.read_csv('Telco-Customer-Churn.csv')

# Step 2: Clean Data
data.drop('customerID', axis=1, inplace=True)
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})

# Step 3: Split features and target
X = data.drop('Churn', axis=1)
y = data['Churn']

# Step 4: Identify column types
num_cols = X.select_dtypes(include=['int64', 'float64']).columns
cat_cols = X.select_dtypes(include=['object']).columns

# Step 5: Preprocessing
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('cat', cat_pipeline, cat_cols)
])

# Step 6: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Logistic Regression Grid
logistic_pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('clf', LogisticRegression(max_iter=1000))
])
logistic_param_grid = {
    'clf__C': [0.1, 1.0, 10]
}
logistic_grid = GridSearchCV(logistic_pipeline, logistic_param_grid, cv=3)
logistic_grid.fit(X_train, y_train)

print("\n Logistic Regression Classification Report:")
y_pred_log = logistic_grid.predict(X_test)
print(classification_report(y_test, y_pred_log))

# Save
joblib.dump(logistic_grid.best_estimator_, 'logistic_churn_pipeline.pkl')


# Step 8: Random Forest Grid
rf_pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('clf', RandomForestClassifier())
])
rf_param_grid = {
    'clf__n_estimators': [100, 200],
    'clf__max_depth': [None, 10]
}
rf_grid = GridSearchCV(rf_pipeline, rf_param_grid, cv=3)
rf_grid.fit(X_train, y_train)

print("\n🌲 Random Forest Classification Report:")
y_pred_rf = rf_grid.predict(X_test)
print(classification_report(y_test, y_pred_rf))

# Save
joblib.dump(rf_grid.best_estimator_, 'rf_churn_pipeline.pkl')



 Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.90      0.88      1036
           1       0.68      0.59      0.64       373

    accuracy                           0.82      1409
   macro avg       0.77      0.75      0.76      1409
weighted avg       0.81      0.82      0.82      1409

