In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from imblearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, roc_auc_score

In [3]:
data = pd.read_csv('churn_data_cleaned.csv')

# split target
X = data.drop(columns=['TARGET'])
y = data['TARGET']

# split data (keep a separate test set BEFORE applying SMOTE!)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [4]:
# SCALE DATA FIRST, before SMOTE
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) # Don't fit on test!

# Apply SMOTE to training set ONLY
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train)

In [7]:
# Fit KNN (using k=200)
knn = KNeighborsClassifier(n_neighbors=200)
knn.fit(X_train_smote, y_train_smote)

# Predict and evaluate
y_pred = knn.predict(X_test_scaled)
y_proba = knn.predict_proba(X_test_scaled)[:, 1]

target_names = ['Not Churn', 'Churn']

# Evaluate performance
print(classification_report(y_test, y_pred, target_names=target_names))
print('ROC AUC score:', roc_auc_score(y_test, y_proba))

              precision    recall  f1-score   support

   Not Churn       0.96      0.51      0.66     65253
       Churn       0.12      0.76      0.21      5785

    accuracy                           0.53     71038
   macro avg       0.54      0.64      0.44     71038
weighted avg       0.89      0.53      0.63     71038

ROC AUC score: 0.6971084796056295


In [8]:
# Fit KNN (using k=5)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_smote, y_train_smote)

# Predict and evaluate
y_pred = knn.predict(X_test_scaled)
y_proba = knn.predict_proba(X_test_scaled)[:, 1]

target_names = ['Not Churn', 'Churn']

# Evaluate performance
print(classification_report(y_test, y_pred, target_names=target_names))
print('ROC AUC score:', roc_auc_score(y_test, y_proba))

              precision    recall  f1-score   support

   Not Churn       0.94      0.73      0.82     65253
       Churn       0.13      0.44      0.20      5785

    accuracy                           0.71     71038
   macro avg       0.53      0.58      0.51     71038
weighted avg       0.87      0.71      0.77     71038

ROC AUC score: 0.6173967198294634
