In [1]:
# =============================================================================
# JUPYTER NOTEBOOK MAIN CODE (FIXED)
# =============================================================================

# Cell 1: Setup paths and imports
import sys
import os

# Add the correct paths to sys.path
sys.path.append('/content/churn_pipeline/modules')
sys.path.append('/content/churn_pipeline/utils')

# Set the data path
data_path = '/content/sample_data/WA_Fn-UseC_-Telco-Customer-Churn.csv'


from data_loader import DataLoader
from leakage_monitor import DataLeakageMonitor
from feature_engineer import FeatureEngineer
from preprocessor import Preprocessor
from cascade_model import CascadeModel
from experiment_runner import ExperimentRunner

print("✅ All modules imported successfully!")


# Initialize experiment runner
runner = ExperimentRunner(random_state=42)

# Run single experiment
print("Running single experiment...")
single_result = runner.run_experiment(data_path=data_path)

if single_result:
    print(f"\nSingle Experiment Results:")
    for metric, value in single_result.items():
        print(f"  {metric}: {value:.4f}")

# Cell 3: Run multiple experiments with different seeds (optional)
print("\n" + "="*60)
print("RUNNING MULTIPLE EXPERIMENTS")
print("="*60)

# Run 5 experiments with random seeds
results_df = runner.run_multiple_experiments(
    data_path=data_path,
    n_splits=5
)

# Display aggregated results
if len(results_df) > 0:
    print("\nAggregated Results Summary:")
    print(results_df[['Precision', 'Recall', 'F1-Score', 'ROC AUC', 'BusinessCost']].describe())

# Cell 4: Alternative - Run with specific seeds for reproducibility (optional)
print("\n" + "="*60)
print("RUNNING WITH SPECIFIC SEEDS")
print("="*60)

# Use specific seeds for reproducibility
specific_seeds = [42, 101, 202, 303, 404]
specific_results = runner.run_multiple_experiments(
    data_path=data_path,
    seeds=specific_seeds,
    results_path="artifacts/specific_seed_results.csv"
)

if len(specific_results) > 0:
    print("\nSpecific Seeds Results:")
    print(specific_results[['seed', 'Precision', 'Recall', 'F1-Score', 'ROC AUC', 'BusinessCost']])

print("\n✅ All experiments completed successfully!")
print("Results saved to artifacts/ directory")

✅ All modules imported successfully!
Running single experiment...
CHURN PREDICTION PIPELINE - FINAL VERSION
=== LOADING RAW DATA ===

=== HEAD OF RAW DATA ===
   gender  SeniorCitizen Partner Dependents  tenure PhoneService     MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection TechSupport StreamingTV StreamingMovies        Contract PaperlessBilling              PaymentMethod  MonthlyCharges TotalCharges Churn
0  Female              0     Yes         No       1           No  No phone service             DSL             No          Yes               No          No          No              No  Month-to-month              Yes           Electronic check           29.85        29.85    No
1    Male              0      No         No      34          Yes                No             DSL            Yes           No              Yes          No          No              No        One year               No               Mailed check           56.95       1889.5    No
2   