In [1]:
import os
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 

In [2]:
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
processed_dir = os.path.join(project_root, "notebooks", "data")

X_train = pd.read_csv(os.path.join(processed_dir, "X_train_scaled.csv"))
y_train = pd.read_csv(os.path.join(processed_dir, "y_train.csv"))

X_valid = pd.read_csv(os.path.join(processed_dir, "X_valid_scaled.csv"))
y_valid = pd.read_csv(os.path.join(processed_dir, "y_valid.csv"))

df_test = pd.read_csv(os.path.join(processed_dir, "test_preprocessed.csv"))
# Assuming the test set has a 'Churn' column for evaluation
X_test = df_test.drop(columns=["Churn"])
y_test = df_test["Churn"]

In [4]:
mlflow.set_experiment("Customer Churn Classification")

# Start a new MLflow run
with mlflow.start_run(run_name="Baseline Logistic Regression") as run:
    print(f"Starting run: {run.info.run_name}")
    

    lgr_params = {
        'class_weight': 'balanced',
    }
    lgr = LogisticRegression(**lgr_params)
    
    # Log the model's parameters
    print("Logging parameters...")
    mlflow.log_params(lgr_params)

    # --- b. Train the Model ---
    print("Training the model...")
    # Use .values.ravel() to ensure y_train is a 1D array, which sklearn expects
    lgr.fit(X_train, y_train.values.ravel())

    # --- c. Evaluate on Validation Set ---
    print("Evaluating on validation set...")
    y_pred_valid = lgr.predict(X_valid)
    
    # Calculate validation metrics
    valid_accuracy = accuracy_score(y_valid, y_pred_valid)
    valid_precision = precision_score(y_valid, y_pred_valid)
    valid_recall = recall_score(y_valid, y_pred_valid)
    valid_f1 = f1_score(y_valid, y_pred_valid)
    
    # Log validation metrics
    print("Logging validation metrics...")
    validation_metrics = {
        "validation_accuracy": valid_accuracy,
        "validation_precision": valid_precision,
        "validation_recall": valid_recall,
        "validation_f1_score": valid_f1
    }
    mlflow.log_metrics(validation_metrics)
    print(f"Validation F1-Score: {valid_f1:.4f}")

    # --- d. Evaluate on Test Set ---
    print("Evaluating on test set...")
    y_pred_test = lgr.predict(X_test)

    # Calculate test metrics
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_precision = precision_score(y_test, y_pred_test)
    test_recall = recall_score(y_test, y_pred_test)
    test_f1 = f1_score(y_test, y_pred_test)

    # Log test metrics
    print("Logging test metrics...")
    test_metrics = {
        "test_accuracy": test_accuracy,
        "test_precision": test_precision,
        "test_recall": test_recall,
        "test_f1_score": test_f1
    }
    mlflow.log_metrics(test_metrics)
    print(f"Test F1-Score: {test_f1:.4f}")

  
    print("Generating and logging confusion matrix...")
    cm = confusion_matrix(y_test, y_pred_test)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title("Test Set Confusion Matrix")
    
    # Save the plot to a file and log it as an artifact
    plt.savefig("test_confusion_matrix.png")
    mlflow.log_artifact("test_confusion_matrix.png", "plots")
    plt.close()

    # --- f. Log the Model ---
    print("Logging the model...")
    mlflow.sklearn.log_model(lgr, "logistic_regression_model")
    
    print("\n✅ Run completed successfully!")
    print(f"To see your run, open a terminal and type: mlflow ui")

Starting run: Baseline Logistic Regression
Logging parameters...
Training the model...
Evaluating on validation set...
Logging validation metrics...
Validation F1-Score: 0.8648
Evaluating on test set...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Logging test metrics...
Test F1-Score: 0.0000
Generating and logging confusion matrix...




Logging the model...





✅ Run completed successfully!
To see your run, open a terminal and type: mlflow ui


# 📊 Summary of Results

- **Validation Set:** F1 = 0.865 ✅  
- **Test Set:** F1 = 0.0 ❌ (precision & recall also 0.0)  
- The model predicted **"Not Churned" for every customer** → complete failure.

---

# ⚠ Root Cause: Why Did It Fail?
1. **Severe Data Drift**  
   - Training vs. test data distributions differ drastically.  

2. **Over-Reliance on "Perfect Rules"**  
   - Training data had deterministic patterns (e.g., Monthly Contract = 100% churn).  
   - Logistic Regression assigned **huge weights** to these features.  

3. **Rigid Decision Boundary**  
   - Learned weights pushed probabilities below 0.5 for all test samples.  
   - Result: model defaulted to predicting **no churn at all**.  

---

# ✅ Proposed Next Step: Regularization
- Apply **L1 (Lasso) Regularization** to:  
  - Penalize overly large weights  
  - Reduce reliance on single "perfect rules"  
  - Encourage balanced, generalizable feature importance  
  - Automatically shrink irrelevant features to zero (feature selection)  
