<a href="https://colab.research.google.com/github/AbhinandanRoy7/LearNex/blob/main/CustomerChurn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas scikit-learn xgboost




In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import GridSearchCV

In [3]:
data = pd.read_csv('/content/Telco_Customer_Churn.csv')

In [4]:
# handling missing values
imputer = SimpleImputer(strategy='median')
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')  # Convert TotalCharges to numeric
data['TotalCharges'] = imputer.fit_transform(data[['TotalCharges']])  # Impute missing values


data = data.drop('customerID', axis=1)# drop customerID as it's not useful for prediction


label_cols = ['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService',
              'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
              'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']# categorical data encoding

In [5]:
# encoding categorical features using LabelEncoder
le = LabelEncoder()
for col in label_cols:
    data[col] = le.fit_transform(data[col])


data['Churn'] = le.fit_transform(data['Churn'])# encode target variable 'Churn'

# splitting features and target
X = data.drop('Churn', axis=1)
y = data['Churn']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Scaling the numeric features (Standardization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# Model Selection: Logistic Regression, Random Forest, XGBoost
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()

rf = RandomForestClassifier(random_state=42)

xgb = XGBClassifier(random_state=42)


logreg.fit(X_train, y_train)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)

In [8]:
#Model Evaluation: function to evaluate the models
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")
    print(f"ROC-AUC: {auc:.2f}")

print("Logistic Regression Evaluation:")
evaluate_model(logreg, X_test, y_test)

print("\nRandom Forest Evaluation:")
evaluate_model(rf, X_test, y_test)

print("\nXGBoost Evaluation:")
evaluate_model(xgb, X_test, y_test)

Logistic Regression Evaluation:
Accuracy: 0.80
Precision: 0.65
Recall: 0.57
F1-Score: 0.61
ROC-AUC: 0.84

Random Forest Evaluation:
Accuracy: 0.78
Precision: 0.62
Recall: 0.48
F1-Score: 0.54
ROC-AUC: 0.82

XGBoost Evaluation:
Accuracy: 0.78
Precision: 0.60
Recall: 0.51
F1-Score: 0.55
ROC-AUC: 0.82


In [9]:
# Hyperparameter tuning (for Random Forest or XGBoost),grid search for random forest
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 10],
    'min_samples_split': [2, 5]
}
grid_rf = GridSearchCV(estimator=rf, param_grid=param_grid, scoring='f1', cv=5)
grid_rf.fit(X_train, y_train)

In [10]:
# Evaluating the best Random Forest model
best_rf = grid_rf.best_estimator_
print("\nBest Random Forest Evaluation:")
evaluate_model(best_rf, X_test, y_test)

#deploying the model
import joblib

#Saving the best model (XGBoost)
joblib.dump(xgb, 'xgboost_model.pkl')
print("Model saved as xgboost_model.pkl")


Best Random Forest Evaluation:
Accuracy: 0.80
Precision: 0.65
Recall: 0.50
F1-Score: 0.57
ROC-AUC: 0.83
Model saved as xgboost_model.pkl
