In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from imblearn.pipeline import Pipeline  
from sklearn.metrics import classification_report, roc_auc_score

In [3]:
data = pd.read_csv('churn_data_cleaned.csv')

# split target
X = data.drop(columns=['TARGET'])
y = data['TARGET']

# split data (keep a separate test set before applying SMOTE!)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('smote', SMOTE(random_state=42)),
    ('svm', SVC(kernel='rbf', probability=True, random_state=42))
])

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
y_proba = pipeline.predict_proba(X_test)[:, 1]

target_names = ['Not Churn', 'Churn']

# Evaluate performance
print(classification_report(y_test, y_pred, target_names=target_names))
print('ROC AUC score:', roc_auc_score(y_test, y_proba))