# Minimal Viable Submission - Training Validation
This notebook runs and verifies the simplified `src/models/train.py` script.

In [None]:
import os
import sys

# Change directory to root implies simpler path handling if we were running in terminal,
# but within notebook, we use paths relative to root.
project_root = os.path.abspath('..')
script_path = os.path.join(project_root, 'src', 'models', 'train.py')

print(f"Project Root: {project_root}")
print(f"Training Script: {script_path}")

In [None]:
# Run the script
# We use !python to simulate CLI execution
!python {script_path}

In [None]:
# Verify Output Artifacts
expected_files = [
    os.path.join(project_root, 'models', 'production', 'model.pkl'),
    os.path.join(project_root, 'models', 'production', 'preprocessor.pkl'),
    os.path.join(project_root, 'data', 'processed', 'heart_disease_clean.csv')
]

print("Checking artifacts...")
for f in expected_files:
    status = "EXISTS" if os.path.exists(f) else "MISSING"
    print(f"{os.path.basename(f)}: {status}")

In [None]:
# Test Model Loading and Prediction
import joblib
import pandas as pd
import numpy as np

try:
    model_path = os.path.join(project_root, 'models', 'production', 'model.pkl')
    pre_path = os.path.join(project_root, 'models', 'production', 'preprocessor.pkl')
    
    model = joblib.load(model_path)
    preprocessor = joblib.load(pre_path)
    print("Models loaded successfully.")
    
    # Create Dummy Data (matching schema)
    cols = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']
    dummy_data = pd.DataFrame([
        [63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]
    ], columns=cols)
    
    # Preprocess
    processed = preprocessor.transform(dummy_data)
    # Predict
    pred = model.predict(processed)
    prob = model.predict_proba(processed)
    print(f"Prediction: {pred[0]}")
    print(f"Probabilities: {prob[0]}")
    
except Exception as e:
    print(f"Validation Failed: {e}")