In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import xgboost as xgb

In [2]:
# Step 1: Load Data
df = pd.read_csv(r"D:\JOB\1 Pricing analyst\2 Revenue and profit optimization model\4 Long-Term Revenue Impact\churn_prediction_dataset.csv")

In [9]:
df

Unnamed: 0,subscription_length,monthly_price,total_spent,support_tickets,competitor_price,discount_received,churn
0,29,45.185061,1310.366778,4,39.001662,0,1
1,15,37.363011,560.445166,8,41.465184,10,0
2,8,31.324673,250.597384,0,27.671630,5,0
3,21,36.016774,756.352246,3,34.755843,15,0
4,19,47.259915,897.938384,9,48.546818,10,1
...,...,...,...,...,...,...,...
9995,2,8.466012,16.932024,4,10.078524,20,0
9996,30,40.493727,1214.811797,4,39.784459,0,1
9997,30,42.056437,1261.693108,8,34.490625,0,1
9998,32,35.523039,1136.737238,8,34.017244,5,1


In [3]:
# Step 2: Drop irrelevant columns
df = df.drop(columns=['customer_id'])  # Not useful for modeling

In [4]:
# Step 3: Split into features and target
X = df.drop('churn', axis=1)
y = df['churn']

In [5]:
# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
# Step 5: Define and train XGBoost model
model = xgb.XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    n_estimators=50,        # Speed-optimized
    max_depth=3,
    learning_rate=0.1,
    random_state=42
)
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [7]:
# Step 6: Make predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

In [8]:
# Step 7: Evaluate model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nROC AUC Score:", roc_auc_score(y_test, y_proba))

Confusion Matrix:
 [[886 107]
 [144 863]]

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.89      0.88       993
           1       0.89      0.86      0.87      1007

    accuracy                           0.87      2000
   macro avg       0.87      0.87      0.87      2000
weighted avg       0.88      0.87      0.87      2000


ROC AUC Score: 0.953041199018752


In [None]:
# I can create a model that tells me instantly if this customer will churn. But my focus as pricing analyst is MORE strategic.
# Feature importance (e.g. monthly_price, discount_received) , SHAP values to see how pricing affects churn
# AND Elasticity analysis (e.g., how churn changes with +5% price)
# As a Pricing Analyst, your core task is to answer:
# “If we change the price, how will that affect the churn rate?”