In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv("../data/synthetic_customer_churn.csv")

label_encoders = {}
for col in ['payment_method', 'plan_type']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le


In [4]:
from sklearn.model_selection import train_test_split

X = data.drop('churn', axis=1)
y = data['churn']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)


In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# 1. Initialize model
log_model = LogisticRegression(max_iter=1000)

# 2. Train
log_model.fit(X_train, y_train)

# 3. Predict
log_preds = log_model.predict(X_test)

# 4. Evaluate
print(classification_report(y_test, log_preds))



              precision    recall  f1-score   support

           0       0.80      0.89      0.84       222
           1       0.53      0.35      0.42        78

    accuracy                           0.75       300
   macro avg       0.66      0.62      0.63       300
weighted avg       0.73      0.75      0.73       300



In [13]:
coefficients = pd.DataFrame({
    'feature': X.columns,
    'coefficient': log_model.coef_[0]
}).sort_values(by='coefficient', ascending=False)

coefficients


Unnamed: 0,feature,coefficient
3,complaints,0.213251
4,discount_used_pct,0.051478
6,monthly_charges,-0.001022
2,payment_method,-0.018504
5,plan_type,-0.049976
1,usage_frequency,-0.10333
0,tenure_months,-0.132765
