In [1]:
import os
os.chdir(r"C:\Users\debab\OneDrive\Desktop\Coustomer Churn\Notebooks")
print("Working directory:", os.getcwd())


Working directory: C:\Users\debab\OneDrive\Desktop\Coustomer Churn\Notebooks


# Decision Tree — Customer Churn Prediction

## Step 1 — Load Data

In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('../Data/Customer-Churn-Records.csv')
print('Shape:', df.shape)
df.head()


Shape: (10000, 18)


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Complain,Satisfaction Score,Card Type,Point Earned
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1,1,2,DIAMOND,464
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0,1,3,DIAMOND,456
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1,1,3,DIAMOND,377
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0,0,5,GOLD,350
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0,0,5,GOLD,425


## Step 2 — Data Cleaning

In [3]:
# Drop identifier columns with no predictive value
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

# Drop Complain — data leakage (directly causes churn, not a customer feature)
df.drop(['Complain'], axis=1, inplace=True)

# Remove duplicate rows
before = len(df)
df.drop_duplicates(inplace=True)
print(f'Duplicates removed: {before - len(df)}')
print(f'Remaining rows: {len(df)}')


Duplicates removed: 0
Remaining rows: 10000


In [4]:
# Check for missing values
print('Null values:\n', df.isnull().sum())
print('\nData types:\n', df.dtypes)


Null values:
 CreditScore           0
Geography             0
Gender                0
Age                   0
Tenure                0
Balance               0
NumOfProducts         0
HasCrCard             0
IsActiveMember        0
EstimatedSalary       0
Exited                0
Satisfaction Score    0
Card Type             0
Point Earned          0
dtype: int64

Data types:
 CreditScore             int64
Geography              object
Gender                 object
Age                     int64
Tenure                  int64
Balance               float64
NumOfProducts           int64
HasCrCard               int64
IsActiveMember          int64
EstimatedSalary       float64
Exited                  int64
Satisfaction Score      int64
Card Type              object
Point Earned            int64
dtype: object


## Step 3 — Data Preprocessing

In [5]:
# Check class distribution (target imbalance)
print('Target distribution:')
print(df['Exited'].value_counts())
print(f'\nChurn rate: {df["Exited"].mean():.2%}')


Target distribution:
Exited
0    7962
1    2038
Name: count, dtype: int64

Churn rate: 20.38%


In [6]:
# Check for outliers in numeric columns using IQR
numeric_cols = df.select_dtypes(include=['int64','float64']).drop(columns=['Exited']).columns
for col in numeric_cols:
    Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
    IQR = Q3 - Q1
    outliers = ((df[col] < Q1 - 1.5*IQR) | (df[col] > Q3 + 1.5*IQR)).sum()
    print(f'{col}: {outliers} outliers')


CreditScore: 15 outliers
Age: 359 outliers
Tenure: 0 outliers
Balance: 0 outliers
NumOfProducts: 60 outliers
HasCrCard: 0 outliers
IsActiveMember: 0 outliers
EstimatedSalary: 0 outliers
Satisfaction Score: 0 outliers
Point Earned: 0 outliers


In [7]:
# Descriptive statistics
df.describe()


Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Satisfaction Score,Point Earned
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2038,3.0138,606.5151
std,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402842,1.405919,225.924839
min,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0,1.0,119.0
25%,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0,2.0,410.0
50%,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0,3.0,605.0
75%,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0,4.0,801.0
max,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0,5.0,1000.0


## Step 4 — Feature Engineering

In [8]:
# Separate features and target
X = df.drop('Exited', axis=1)
y = df['Exited']
print('Features:', X.columns.tolist())
print('Target shape:', y.shape)


Features: ['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Satisfaction Score', 'Card Type', 'Point Earned']
Target shape: (10000,)


## Step 5 — Feature Encoding

In [9]:
# One-hot encode categorical columns: Geography, Gender, Card Type
# drop_first=True avoids multicollinearity (dummy variable trap)
X = pd.get_dummies(X, columns=['Geography', 'Gender', 'Card Type'], drop_first=True)
print('Encoded feature columns:')
print(X.columns.tolist())
print('Shape:', X.shape)


Encoded feature columns:
['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Satisfaction Score', 'Point Earned', 'Geography_Germany', 'Geography_Spain', 'Gender_Male', 'Card Type_GOLD', 'Card Type_PLATINUM', 'Card Type_SILVER']
Shape: (10000, 16)


## Step 6 — Train / Test Split

In [10]:
from sklearn.model_selection import train_test_split

# stratify=y ensures both splits preserve the same churn ratio
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(f'Train: {X_train.shape[0]} rows | Test: {X_test.shape[0]} rows')
print(f'Train churn rate: {y_train.mean():.2%} | Test churn rate: {y_test.mean():.2%}')


Train: 8000 rows | Test: 2000 rows
Train churn rate: 20.38% | Test churn rate: 20.40%


## Step 7 — Model Training

In [11]:
from sklearn.tree import DecisionTreeClassifier


In [12]:
# IMPROVED MODEL - Balanced regularization (FIXED VERSION)
model = DecisionTreeClassifier(
    # Tree depth (reduce overfitting but not too much)
    max_depth=8,                   # FIXED: was 7 (too strict) -> now 8
    
    # Split requirements (moderate regularization)
    min_samples_split=15,          # FIXED: was 20 (too strict) -> now 15
    min_samples_leaf=7,            # FIXED: was 10 (too strict) -> now 7
    
    # Feature selection
    max_features='sqrt',
    
    # Pruning parameters (lighter pruning)
    min_impurity_decrease=0.0005,  # FIXED: was 0.001 -> now 0.0005
    ccp_alpha=0.005,               # FIXED: was 0.01 -> now 0.005
    
    # Class imbalance
    class_weight='balanced',
    
    random_state=42
)

model.fit(X_train, y_train)
print('Decision Tree trained with FIXED parameters')
print(f'Tree depth: {model.get_depth()}')
print(f'Number of leaves: {model.get_n_leaves()}')


Decision Tree trained with FIXED parameters
Tree depth: 5
Number of leaves: 9


## Validation

In [13]:
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, confusion_matrix, classification_report
)
from sklearn.model_selection import cross_val_score
import numpy as np

train_preds = model.predict(X_train)
test_preds  = model.predict(X_test)

train_acc = accuracy_score(y_train, train_preds)
test_acc  = accuracy_score(y_test,  test_preds)
f1        = f1_score(y_test,        test_preds)
precision = precision_score(y_test, test_preds)
recall    = recall_score(y_test,    test_preds)

cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
cv_mean   = cv_scores.mean()
cv_std    = cv_scores.std()
gap       = train_acc - test_acc


In [14]:
# Train / Test Accuracy
print('=== Train / Test Accuracy ===')
print(f'Train Accuracy : {train_acc:.4f}')
print(f'Test  Accuracy : {test_acc:.4f}')


=== Train / Test Accuracy ===
Train Accuracy : 0.6040
Test  Accuracy : 0.6170


In [15]:
# Cross Validation Score
print('=== Cross Validation Score (5-fold) ===')
print(f'CV Mean  : {cv_mean:.4f}')
print(f'CV Std   : {cv_std:.4f}')
print(f'CV Scores: {cv_scores.round(4)}')


=== Cross Validation Score (5-fold) ===
CV Mean  : 0.5903
CV Std   : 0.0747
CV Scores: [0.728  0.502  0.559  0.5805 0.582 ]


In [16]:
# Confusion Matrix
print('=== Confusion Matrix ===')
cm = confusion_matrix(y_test, test_preds)
print(cm)
tn, fp, fn, tp = cm.ravel()
print(f'True  Negatives (Correctly predicted Stay)  : {tn}')
print(f'False Positives (Said Churn, actually Stay) : {fp}')
print(f'False Negatives (Said Stay, actually Churn) : {fn}')
print(f'True  Positives (Correctly predicted Churn) : {tp}')


=== Confusion Matrix ===
[[962 630]
 [136 272]]
True  Negatives (Correctly predicted Stay)  : 962
False Positives (Said Churn, actually Stay) : 630
False Negatives (Said Stay, actually Churn) : 136
True  Positives (Correctly predicted Churn) : 272


In [17]:
# Precision, Recall, F1
print('=== Precision / Recall / F1 ===')
print(f'Precision : {precision:.4f}')
print(f'Recall    : {recall:.4f}')
print(f'F1 Score  : {f1:.4f}')
print()
print(classification_report(y_test, test_preds, target_names=['Stayed','Churned']))


=== Precision / Recall / F1 ===
Precision : 0.3016
Recall    : 0.6667
F1 Score  : 0.4153

              precision    recall  f1-score   support

      Stayed       0.88      0.60      0.72      1592
     Churned       0.30      0.67      0.42       408

    accuracy                           0.62      2000
   macro avg       0.59      0.64      0.57      2000
weighted avg       0.76      0.62      0.65      2000



In [18]:
# Overfitting Check
print('=== Overfitting Check ===')
print(f'Train Accuracy : {train_acc:.4f}')
print(f'Test  Accuracy : {test_acc:.4f}')
print(f'Gap            : {gap:.4f}')
if gap > 0.05:
    print('Warning: Overfitting detected (gap > 5%)')
else:
    print('Good: No significant overfitting (gap <= 5%)')


=== Overfitting Check ===
Train Accuracy : 0.6040
Test  Accuracy : 0.6170
Gap            : -0.0130
Good: No significant overfitting (gap <= 5%)


In [19]:
# Class Imbalance
print('=== Class Imbalance ===')
print(f'Stayed  (0): {(y==0).sum()} ({(y==0).mean()*100:.1f}%)')
print(f'Churned (1): {(y==1).sum()} ({(y==1).mean()*100:.1f}%)')
ratio = (y==0).sum() / (y==1).sum()
print(f'Imbalance ratio: {ratio:.2f}:1')
if ratio > 2:
    print('Dataset is imbalanced — class_weight=balanced is recommended')
else:
    print('Dataset is relatively balanced')


=== Class Imbalance ===
Stayed  (0): 7962 (79.6%)
Churned (1): 2038 (20.4%)
Imbalance ratio: 3.91:1
Dataset is imbalanced — class_weight=balanced is recommended


In [20]:
# Generalization Ability
print('=== Generalization Ability ===')
print(f'CV Mean Accuracy  : {cv_mean:.4f}')
print(f'Test Accuracy     : {test_acc:.4f}')
gen_gap = abs(cv_mean - test_acc)
print(f'CV vs Test gap    : {gen_gap:.4f}')
if gen_gap < 0.02:
    print('Excellent generalization — model performs consistently on unseen data')
elif gen_gap < 0.05:
    print('Good generalization — minor variance between CV and test')
else:
    print('Poor generalization — model is unstable across different data splits')


=== Generalization Ability ===
CV Mean Accuracy  : 0.5903
Test Accuracy     : 0.6170
CV vs Test gap    : 0.0267
Good generalization — minor variance between CV and test


In [21]:
import mlflow
import mlflow.sklearn

# Use V2 to avoid deleted experiment issue
mlflow.set_experiment('Customer_Churn_DecisionTree_Improved_V2')

# Start MLflow run
with mlflow.start_run():
    # Log all parameters (FIXED VERSION)
    mlflow.log_param('max_depth',              8)
    mlflow.log_param('min_samples_split',      15)
    mlflow.log_param('min_samples_leaf',       7)
    mlflow.log_param('max_features',           'sqrt')
    mlflow.log_param('min_impurity_decrease',  0.0005)
    mlflow.log_param('ccp_alpha',              0.005)
    mlflow.log_param('class_weight',           'balanced')
    mlflow.log_param('tree_depth',             model.get_depth())
    mlflow.log_param('num_leaves',             model.get_n_leaves())
    
    # Log metrics
    mlflow.log_metric('train_accuracy',   train_acc)
    mlflow.log_metric('test_accuracy',    test_acc)
    mlflow.log_metric('f1_score',         f1)
    mlflow.log_metric('precision',        precision)
    mlflow.log_metric('recall',           recall)
    mlflow.log_metric('cv_accuracy',      cv_mean)
    mlflow.log_metric('overfitting_gap',  gap)
    
    # Log model
    mlflow.sklearn.log_model(model, 'decision_tree_model_improved')
    
    print('✅ MLflow run logged successfully!')
    print(f'   Experiment: Customer_Churn_DecisionTree_Improved_V2')
    print(f'   Test Accuracy: {test_acc:.4f}')
    print(f'   Overfitting Gap: {gap:.4f}')

2026/02/15 10:33:07 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/02/15 10:33:07 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/02/15 10:33:07 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/02/15 10:33:07 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/02/15 10:33:07 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/02/15 10:33:07 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/02/15 10:33:08 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/02/15 10:33:08 INFO alembic.runtime.migration: Will assume non-transactional DDL.


✅ MLflow run logged successfully!
   Experiment: Customer_Churn_DecisionTree_Improved_V2
   Test Accuracy: 0.6170
   Overfitting Gap: -0.0130
