# 02. Logistic Regression (Baseline)

This notebook trains a Logistic Regression model as a baseline. It calculates key metrics (Accuracy, ROC-AUC, PR-AUC, Brier Score) and saves the model and results.

In [1]:
# Imports
import pandas as pd
import numpy as np
import pickle
import json
import os
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, roc_auc_score, roc_curve, 
    precision_recall_curve, auc, brier_score_loss,
    confusion_matrix, classification_report
)

import warnings
warnings.filterwarnings("ignore")

# Settings
%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12

# Create results directories
os.makedirs('../results/models', exist_ok=True)
os.makedirs('../results/metrics', exist_ok=True)
os.makedirs('../results/figures', exist_ok=True)

## 1. Load Processed Data

In [2]:
# Load data
with open('../data/processed/churn_data.pkl', 'rb') as f:
    data = pickle.load(f)

X_train_scaled = data['X_train_scaled']
y_train = data['y_train']
X_test_scaled = data['X_test_scaled']
y_test = data['y_test']
feature_names = data['feature_names']

print(f"Train samples: {len(X_train_scaled)}")
print(f"Test samples: {len(X_test_scaled)}")

Train samples: 265
Test samples: 83


## 2. Model Training

In [3]:
print("Training Logistic Regression...")
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train_scaled, y_train)

# Predictions
lr_pred_proba = lr_model.predict_proba(X_test_scaled)[:, 1]
lr_pred = (lr_pred_proba >= 0.5).astype(int)

print(" Model trained")

Training Logistic Regression...
 Model trained


## 3. Evaluation

In [4]:
# Metrics
accuracy = accuracy_score(y_test, lr_pred)
roc_auc = roc_auc_score(y_test, lr_pred_proba)

precision, recall, _ = precision_recall_curve(y_test, lr_pred_proba)
pr_auc = auc(recall, precision)

brier = brier_score_loss(y_test, lr_pred_proba)

print(f"Accuracy: {accuracy:.4f}")
print(f"ROC-AUC: {roc_auc:.4f}")
print(f"PR-AUC: {pr_auc:.4f}")
print(f"Brier Score: {brier:.4f}")

Accuracy: 0.6506
ROC-AUC: 0.6836
PR-AUC: 0.7689
Brier Score: 0.2293


In [5]:
# Confusion Matrix
cm = confusion_matrix(y_test, lr_pred)
print("Confusion Matrix:")
print(cm)
print("\nClassification Report:")
print(classification_report(y_test, lr_pred))

Confusion Matrix:
[[14 19]
 [10 40]]

Classification Report:
              precision    recall  f1-score   support

           0       0.58      0.42      0.49        33
           1       0.68      0.80      0.73        50

    accuracy                           0.65        83
   macro avg       0.63      0.61      0.61        83
weighted avg       0.64      0.65      0.64        83



## 4. Save Results

In [6]:
# Save Model
with open('../results/models/logistic_regression/logistic_regression.pkl', 'wb') as f:
    pickle.dump(lr_model, f)
print(" Model saved to ../results/models/logistic_regression/logistic_regression.pkl")

# Save Metrics
metrics = {
    'model': 'Logistic Regression',
    'accuracy': accuracy,
    'roc_auc': roc_auc,
    'pr_auc': pr_auc,
    'brier': brier
}

with open('../results/metrics/logistic_regression/logistic_regression.json', 'w') as f:
    json.dump(metrics, f, indent=4)
print(" Metrics saved to ../results/metrics/logistic_regression/logistic_regression.json")

 Model saved to ../results/models/logistic_regression/logistic_regression.pkl
 Metrics saved to ../results/metrics/logistic_regression/logistic_regression.json


In [7]:
#