In [None]:
# Load Clean Data
import pandas as pd

df = pd.read_csv("../data/processed/clean_telco_churn.csv")
df.head()

In [None]:
# Define Features & Target
X = df.drop(columns=['Churn'])
y = df['Churn']

In [None]:
# Train / Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [None]:
# Train Logistic Regression Model
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(
    max_iter=1000,
    class_weight='balanced'
)

model.fit(X_train, y_train)

In [None]:
# Make Predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

In [None]:
# Evaluate Model Performance
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))

In [None]:
# Visualize Confusion Matrix
import seaborn as sns
import matplotlib.pyplot as plt

sns.heatmap(confusion_matrix(y_test, y_pred),
            annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix â€” Logistic Regression")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Interpret Model Coefficients
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)

coefficients.head(10)

## ðŸ“Œ Baseline Model Insights

- Logistic Regression achieved a ROC-AUC of X.XX.
- Features such as month-to-month contracts and high monthly charges
  strongly increase churn probability.
- Longer tenure and bundled services reduce churn likelihood.

This baseline model provides an interpretable benchmark for more
complex models.

In [None]:
# Save Baseline Results
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

baseline_results = {
    "model_name": "Logistic Regression",
    "roc_auc": roc_auc_score(y_test, y_prob),
    "precision": precision_score(y_test, y_pred),
    "recall": recall_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred)
}

baseline_results

In [None]:
# Save results to JSON
import json
import os

os.makedirs("../results", exist_ok=True)

with open("../results/baseline_logistic_regression.json", "w") as f:
    json.dump(baseline_results, f, indent=4)

In [None]:
# Verify Saved File
with open("../results/baseline_logistic_regression.json", "r") as f:
    loaded_results = json.load(f)

loaded_results