In [2]:
!pip install xgboost lifelines seaborn matplotlib pandas scikit-learn



In [3]:
# Required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from lifelines import CoxPHFitter

# Load dataset
df = pd.read_csv("sample_churn_dataset.csv")

# Convert TotalCharges to numeric (handle spaces or invalid entries)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Drop rows with missing values
df = df.dropna()

# Label encode categorical columns except the target
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    if column != 'Churn':
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

# Encode target column
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# Feature-target split
X = df.drop(['Churn'], axis=1)
y = df['Churn']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ===== Survival Analysis using Lifelines =====
df_survival = df.copy()

# Rename columns to match CoxPHFitter requirement
df_survival = df_survival.rename(columns={"tenure": "duration", "Churn": "event"})

# Fit Cox Proportional Hazards model
cph = CoxPHFitter()
cph.fit(df_survival, duration_col='duration', event_col='event')

# Summary of survival model
cph.print_summary()


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:
 [[2]]




0,1
model,lifelines.CoxPHFitter
duration col,'duration'
event col,'event'
baseline estimation,breslow
number of observations,10
number of events observed,4
partial log-likelihood,-1.96
time fit was run,2025-05-31 10:59:09 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
MonthlyCharges,-0.0,1.0,0.04,-0.09,0.08,0.92,1.08,0.0,-0.07,0.95,0.08
TotalCharges,-0.0,1.0,0.0,-0.01,0.0,0.99,1.0,0.0,-1.55,0.12,3.05
Contract,1.08,2.95,1.88,-2.59,4.76,0.07,116.52,0.0,0.58,0.56,0.83

0,1
Concordance,1.00
Partial AIC,9.92
log-likelihood ratio test,12.46 on 3 df
-log2(p) of ll-ratio test,7.39
