In [2]:
"""
Master Script: Run All Team Member Scripts
Executes complete heart disease classification pipeline
"""

# ============================================================================
# SECTION 1: IMPORT LIBRARIES AND SETUP
# ============================================================================

import os  # For directory and file operations
import subprocess  # For running external Python scripts
import sys  # For system-specific parameters and functions
from datetime import datetime  # For timestamp generation
import glob  # For file pattern matching

print("="*80)  # Print separator line for visual clarity
print("HEART DISEASE CLASSIFICATION - COMPLETE PIPELINE")  # Print main header
print("="*80)  # Print separator line for visual clarity
print(f"Pipeline started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")  # Display start time
print("="*80)  # Print separator line

# ============================================================================
# SECTION 2: CREATE REQUIRED DIRECTORY STRUCTURE
# ============================================================================

print("\n--- PHASE 1: SETTING UP DIRECTORY STRUCTURE ---")  # Announce setup phase

# Define required directories
required_dirs = [  # List of directories needed for project
    'data',  # Directory for input datasets
    'artifacts',  # Root directory for all outputs
    'artifacts/models',  # Directory for saved models
    'artifacts/metrics',  # Directory for evaluation metrics
    'artifacts/figures'  # Directory for visualization outputs
]

# Create directories if they don't exist
for directory in required_dirs:  # Iterate through required directories
    os.makedirs(directory, exist_ok=True)  # Create directory, ignore if exists
    print(f" Directory ensured: {directory}/")  # Confirm directory creation

# Check if input data file exists
data_file = 'data/heart_cleveland_upload.csv'  # Define data file path
if not os.path.exists(data_file):  # Check if file exists
    print(f"\n⚠ WARNING: Data file not found at {data_file}")  # Warn user
    print("Please ensure the Heart Disease Cleveland dataset is saved as 'data/heart.csv'")  # Provide instruction
    print("Pipeline cannot proceed without the dataset.")  # Explain consequence
    sys.exit(1)  # Exit with error code
else:  # If file exists
    print(f"\n Data file found: {data_file}")  # Confirm file presence

# ============================================================================
# SECTION 3: RUN MEMBER 1 SCRIPT (PREPROCESSING AND ML)
# ============================================================================

print("\n" + "="*80)  # Print separator line
print("PHASE 2: EXECUTING MEMBER 1 SCRIPT")  # Announce Member 1 execution
print("Preprocessing and Traditional ML Models (Decision Tree, Random Forest)")  # Describe content
print("="*80)  # Print separator line

member1_script = 'member1_preprocessing.ipynb'  # Define script filename

# Check if Member 1 script exists
if not os.path.exists(member1_script):  # Check if script file exists
    print(f"\n✗ ERROR: {member1_script} not found in current directory")  # Display error
    print("Please ensure the script is in the same directory as run_all.ipynb")  # Provide instruction
    sys.exit(1)  # Exit with error code

# Execute Member 1 script
try:  # Begin error handling block
    print(f"\nExecuting {member1_script}...")  # Announce execution
    result = subprocess.run([sys.executable, member1_script],  # Run Python script
                          capture_output=False,  # Show output in real-time
                          text=True,  # Use text mode for output
                          check=True)  # Raise exception on non-zero exit
    print(f"\n {member1_script} completed successfully")  # Confirm completion
except subprocess.CalledProcessError as e:  # Catch execution errors
    print(f"\n✗ ERROR: {member1_script} failed with exit code {e.returncode}")  # Display error
    print("Please check the error messages above for details")  # Provide instruction
    sys.exit(1)  # Exit with error code
except Exception as e:  # Catch any other errors
    print(f"\n✗ ERROR: Unexpected error while running {member1_script}")  # Display error
    print(f"Error details: {str(e)}")  # Display error details
    sys.exit(1)  # Exit with error code

# Verify Member 1 outputs
print("\nVerifying Member 1 outputs...")  # Announce verification
member1_expected = [  # List of expected output files
    'artifacts/models/preprocessor.pkl',  # Preprocessor object
    'artifacts/models/train_test_split.pkl',  # Train-test split
    'artifacts/models/decision_tree_model.pkl',  # Decision Tree model
    'artifacts/models/random_forest_model.pkl',  # Random Forest model
    'artifacts/metrics/decision_tree_metrics.json',  # DT metrics
    'artifacts/metrics/random_forest_metrics.json'  # RF metrics
]

all_present = True  # Flag to track if all files present
for file in member1_expected:  # Iterate through expected files
    if os.path.exists(file):  # Check if file exists
        print(f"   {file}")  # Confirm file presence
    else:  # If file missing
        print(f"  ✗ MISSING: {file}")  # Report missing file
        all_present = False  # Set flag to false

if not all_present:  # If any files missing
    print("\n⚠ WARNING: Some expected files from Member 1 are missing")  # Warn user
    print("Proceeding anyway, but Member 2 may encounter errors")  # Explain consequence

# ============================================================================
# SECTION 4: RUN MEMBER 2 SCRIPT (NAIVE BAYES AND DEEP LEARNING)
# ============================================================================

print("\n" + "="*80)  # Print separator line
print("PHASE 3: EXECUTING MEMBER 2 SCRIPT")  # Announce Member 2 execution
print("Naive Bayes and Deep Learning Models (MLP, LSTM)")  # Describe content
print("="*80)  # Print separator line

member2_script = 'member2_nb_dl.ipynb'  # Define script filename

# Check if Member 2 script exists
if not os.path.exists(member2_script):  # Check if script file exists
    print(f"\n✗ ERROR: {member2_script} not found in current directory")  # Display error
    print("Please ensure the script is in the same directory as run_all.ipynb")  # Provide instruction
    sys.exit(1)  # Exit with error code

# Execute Member 2 script
try:  # Begin error handling block
    print(f"\nExecuting {member2_script}...")  # Announce execution
    result = subprocess.run([sys.executable, member2_script],  # Run Python script
                          capture_output=False,  # Show output in real-time
                          text=True,  # Use text mode for output
                          check=True)  # Raise exception on non-zero exit
    print(f"\n {member2_script} completed successfully")  # Confirm completion
except subprocess.CalledProcessError as e:  # Catch execution errors
    print(f"\n✗ ERROR: {member2_script} failed with exit code {e.returncode}")  # Display error
    print("Please check the error messages above for details")  # Provide instruction
    sys.exit(1)  # Exit with error code
except Exception as e:  # Catch any other errors
    print(f"\n✗ ERROR: Unexpected error while running {member2_script}")  # Display error
    print(f"Error details: {str(e)}")  # Display error details
    sys.exit(1)  # Exit with error code

# Verify Member 2 outputs
print("\nVerifying Member 2 outputs...")  # Announce verification
member2_expected = [  # List of expected output files
    'artifacts/models/naive_bayes_model.pkl',  # Naive Bayes model
    'artifacts/models/mlp_model.h5',  # MLP model
    'artifacts/models/lstm_model.h5',  # LSTM model
    'artifacts/metrics/naive_bayes_metrics.json',  # NB metrics
    'artifacts/metrics/mlp_metrics.json',  # MLP metrics
    'artifacts/metrics/lstm_metrics.json',  # LSTM metrics
    'artifacts/metrics/all_models_comparison.csv'  # Aggregated metrics
]

all_present = True  # Flag to track if all files present
for file in member2_expected:  # Iterate through expected files
    if os.path.exists(file):  # Check if file exists
        print(f"   {file}")  # Confirm file presence
    else:  # If file missing
        print(f"  ✗ MISSING: {file}")  # Report missing file
        all_present = False  # Set flag to false

if not all_present:  # If any files missing
    print("\n⚠ WARNING: Some expected files from Member 2 are missing")  # Warn user

# ============================================================================
# SECTION 5: GENERATE FINAL SUMMARY
# ============================================================================

print("\n" + "="*80)  # Print separator line
print("PHASE 4: PIPELINE COMPLETION SUMMARY")  # Announce summary phase
print("="*80)  # Print separator line

# Count generated artifacts
model_files = glob.glob('artifacts/models/*')  # Find all model files
metric_files = glob.glob('artifacts/metrics/*')  # Find all metric files
figure_files = glob.glob('artifacts/figures/*')  # Find all figure files

print(f"\n Pipeline completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")  # Display end time
print(f"\n ARTIFACTS GENERATED:")  # Announce artifacts summary
print(f"    Models:  {len(model_files)} files in artifacts/models/")  # Count model files
print(f"    Metrics: {len(metric_files)} files in artifacts/metrics/")  # Count metric files
print(f"    Figures: {len(figure_files)} files in artifacts/figures/")  # Count figure files

# List all models trained
print(f"\n MODELS TRAINED:")  # Announce models section
models_trained = [  # List of all trained models
    "Decision Tree (with cross-validation)",  # DT model
    "Random Forest (with feature importance)",  # RF model
    "Naive Bayes (with bootstrap CI)",  # NB model
    "Multilayer Perceptron (MLP/ANN)",  # MLP model
    "LSTM (Advanced Extension)"  # LSTM model
]
for i, model in enumerate(models_trained, 1):  # Iterate through models
    print(f"   {i}. {model}")  # Display model with number

# List key output files
print(f"\n KEY OUTPUT FILES:")  # Announce key files section
print(f"\n   Models:")  # Models subsection
for file in sorted(model_files):  # Iterate through sorted model files
    size = os.path.getsize(file) / 1024  # Get file size in KB
    print(f"     • {file} ({size:.1f} KB)")  # Display file and size

print(f"\n   Metrics:")  # Metrics subsection
for file in sorted(metric_files):  # Iterate through sorted metric files
    print(f"     • {file}")  # Display file

print(f"\n   Figures:")  # Figures subsection
for file in sorted(figure_files):  # Iterate through sorted figure files
    print(f"     • {file}")  # Display file

# Display comparison results if available
comparison_file = 'artifacts/metrics/all_models_comparison.csv'  # Define comparison file
if os.path.exists(comparison_file):  # Check if comparison file exists
    print(f"\n MODEL PERFORMANCE COMPARISON:")  # Announce comparison section
    try:  # Begin error handling
        import pandas as pd  # Import pandas for reading CSV
        comparison_df = pd.read_csv(comparison_file)  # Read comparison CSV
        print("\n" + comparison_df.to_string(index=False))  # Display formatted table
    except Exception as e:  # Catch any errors
        print(f"   (Could not display comparison: {str(e)})")  # Display error message

print("\n" + "="*80)  # Print separator line
print("PIPELINE EXECUTION COMPLETE")  # Final message
print("="*80 + "\n")  # Print separator line with newline

HEART DISEASE CLASSIFICATION - COMPLETE PIPELINE
Pipeline started at: 2025-10-02 14:43:57

--- PHASE 1: SETTING UP DIRECTORY STRUCTURE ---
 Directory ensured: data/
 Directory ensured: artifacts/
 Directory ensured: artifacts/models/
 Directory ensured: artifacts/metrics/
 Directory ensured: artifacts/figures/

 Data file found: data/heart_cleveland_upload.csv

PHASE 2: EXECUTING MEMBER 1 SCRIPT
Preprocessing and Traditional ML Models (Decision Tree, Random Forest)

Executing member1_preprocessing.ipynb...

 member1_preprocessing.ipynb completed successfully

Verifying Member 1 outputs...
   artifacts/models/preprocessor.pkl
   artifacts/models/train_test_split.pkl
   artifacts/models/decision_tree_model.pkl
   artifacts/models/random_forest_model.pkl
   artifacts/metrics/decision_tree_metrics.json
   artifacts/metrics/random_forest_metrics.json

PHASE 3: EXECUTING MEMBER 2 SCRIPT
Naive Bayes and Deep Learning Models (MLP, LSTM)

Executing member2_nb_dl.ipynb...

 member2_nb_dl.ipynb c