# Notebook 4: Streamlit Integration Test

## Objective
This notebook tests the integration of our trained model with Streamlit components:
1. Load the saved model
2. Test sample predictions
3. Test SHAP explanations
4. Verify all components work correctly before deployment


In [5]:
# Import required libraries
import pandas as pd
import numpy as np
import json
import joblib

print("Libraries imported successfully!")


Libraries imported successfully!


## Step 1: Load Model and Metadata


In [None]:
# Optional: Diagnostic check for LightGBM and other ML libraries
# Run this cell if you encounter DLL or import errors
print("=" * 60)
print("DIAGNOSTIC: Checking ML Library Installations")
print("=" * 60)

libraries_to_check = {
    'lightgbm': 'lightgbm',
    'xgboost': 'xgboost',
    'sklearn': 'sklearn',
    'joblib': 'joblib',
    'shap': 'shap'
}

installation_issues = []

for lib_name, import_name in libraries_to_check.items():
    try:
        __import__(import_name)
        print(f"✓ {lib_name}: Installed")
    except ImportError as e:
        print(f"✗ {lib_name}: NOT INSTALLED - {e}")
        installation_issues.append(lib_name)
    except Exception as e:
        error_msg = str(e)
        if 'lightgbm' in error_msg.lower() or 'lib_lightgbm' in error_msg.lower() or 'dll' in error_msg.lower():
            print(f"⚠ {lib_name}: DLL ERROR - Cannot load library")
            print(f"   Error: {error_msg[:100]}...")
            installation_issues.append(lib_name)
            print(f"\n   FIX: pip uninstall lightgbm && pip install lightgbm")
        else:
            print(f"⚠ {lib_name}: ERROR - {error_msg[:100]}...")
            installation_issues.append(lib_name)

if installation_issues:
    print("\n" + "=" * 60)
    print("INSTALLATION ISSUES DETECTED")
    print("=" * 60)
    print(f"\nLibraries with issues: {', '.join(installation_issues)}")
    print("\nRecommended fixes:")
    for lib in installation_issues:
        if lib == 'lightgbm':
            print(f"\n  For {lib}:")
            print("    1. pip uninstall lightgbm")
            print("    2. pip install lightgbm")
            print("    3. If that fails, try: conda install -c conda-forge lightgbm")
            print("    4. For Windows: Install Visual C++ Redistributable")
            print("       https://aka.ms/vs/17/release/vc_redist.x64.exe")
        else:
            print(f"\n  For {lib}: pip install --upgrade {lib}")
    print("\nAfter fixing, restart your kernel and try again.")
else:
    print("\n✓ All libraries are properly installed!")

print("\n" + "=" * 60)


DIAGNOSTIC: Checking ML Library Installations
⚠ lightgbm: DLL ERROR - Cannot load library
   Error: Could not find module 'c:\Users\deirw\AppData\Local\Programs\Python\Python313\Lib\site-packages\ligh...

   FIX: pip uninstall lightgbm && pip install lightgbm
✓ xgboost: Installed
✓ sklearn: Installed
✓ joblib: Installed


  from .autonotebook import tqdm as notebook_tqdm


✓ shap: Installed

INSTALLATION ISSUES DETECTED

Libraries with issues: lightgbm

Recommended fixes:

  For lightgbm:
    1. pip uninstall lightgbm
    2. pip install lightgbm
    3. If that fails, try: conda install -c conda-forge lightgbm
    4. For Windows: Install Visual C++ Redistributable
       https://aka.ms/vs/17/release/vc_redist.x64.exe

After fixing, restart your kernel and try again.



In [8]:
# Load model
model = joblib.load('../models/best_model_pipeline.pkl')

# Load metadata
with open('../models/model_metadata.json', 'r') as f:
    model_metadata = json.load(f)

# Load selected features
with open('../models/selected_features.json', 'r') as f:
    selected_features = json.load(f)

print("=" * 60)
print("MODEL LOADED")
print("=" * 60)
print(f"Model Type: {model_metadata['model_name']}")
print(f"Features: {selected_features}")
print(f"Model Metrics:")
for metric, value in model_metadata['metrics'].items():
    print(f"  {metric}: {value:.4f}")


MODEL LOADED
Model Type: LightGBM
Features: ['Age', 'BMI', 'BloodPressure', 'Glucose', 'DiabetesPedigreeFunction', 'Insulin', 'Pregnancies']
Model Metrics:
  accuracy: 0.7597
  precision: 0.6269
  recall: 0.7778
  f1_score: 0.6942
  roc_auc: 0.8300


## Step 2: Test Sample Prediction


In [9]:
# Create a sample input (example patient data)
# This simulates what a user would input in the Streamlit app
sample_input = pd.DataFrame({
    'Age': [45],
    'BMI': [28.5],
    'BloodPressure': [85],
    'Glucose': [140],
    'DiabetesPedigreeFunction': [0.5]
})

# Ensure all selected features are present
for feature in selected_features:
    if feature not in sample_input.columns:
        # Add missing feature with a default value
        sample_input[feature] = [0]  # You may want to use median or mean from training data

# Reorder columns to match training data
sample_input = sample_input[selected_features]

print("=" * 60)
print("SAMPLE PREDICTION TEST")
print("=" * 60)
print(f"\nSample Input:")
print(sample_input.to_string(index=False))

# Make prediction
prediction_proba = model.predict_proba(sample_input)[0]
prediction_class = model.predict(sample_input)[0]

print(f"\nPrediction Results:")
print(f"  Probability (No Diabetes): {prediction_proba[0]:.4f} ({prediction_proba[0]*100:.2f}%)")
print(f"  Probability (Diabetes): {prediction_proba[1]:.4f} ({prediction_proba[1]*100:.2f}%)")
print(f"  Predicted Class: {'Diabetes' if prediction_class == 1 else 'No Diabetes'}")
print(f"  Risk Level: {'High' if prediction_proba[1] > 0.7 else 'Medium' if prediction_proba[1] > 0.5 else 'Low'}")

print("\n✓ Prediction test successful!")


SAMPLE PREDICTION TEST

Sample Input:
 Age  BMI  BloodPressure  Glucose  DiabetesPedigreeFunction  Insulin  Pregnancies
  45 28.5             85      140                       0.5        0            0

Prediction Results:
  Probability (No Diabetes): 0.4994 (49.94%)
  Probability (Diabetes): 0.5006 (50.06%)
  Predicted Class: Diabetes
  Risk Level: Medium

✓ Prediction test successful!


## Step 3: Test SHAP Explanation


In [10]:
# Try to load SHAP explainer
try:
    explainer_data = joblib.load('../models/shap_explainer.pkl')
    
    # Check if it's a direct explainer or data dict
    if isinstance(explainer_data, dict) and 'explainer_type' in explainer_data:
        print("SHAP explainer data loaded (will be recreated in Streamlit)")
        print(f"Explainer type: {explainer_data['explainer_type']}")
    else:
        print("SHAP explainer loaded successfully")
        # Test SHAP values
        shap_values = explainer_data.shap_values(sample_input)
        if isinstance(shap_values, list):
            shap_values = shap_values[1]
        print(f"SHAP values shape: {shap_values.shape}")
        print(f"SHAP values: {shap_values}")
    
    print("\n✓ SHAP explainer test successful!")
except FileNotFoundError:
    print("SHAP explainer file not found. Run Notebook 3 first.")
except Exception as e:
    print(f"Error loading SHAP explainer: {e}")


SHAP explainer loaded successfully
SHAP values shape: (1, 7)
SHAP values: [[ 0.25952804  0.02792136  0.02654563  0.27657203  0.06919113 -0.3196689
  -0.05817931]]

✓ SHAP explainer test successful!




## Step 4: Verify All Files Exist

Check that all necessary files are in place for Streamlit deployment.


In [11]:
import os

print("=" * 60)
print("FILE VERIFICATION")
print("=" * 60)

required_files = {
    'Model': '../models/best_model_pipeline.pkl',
    'Model Metadata': '../models/model_metadata.json',
    'Selected Features': '../models/selected_features.json',
    'SHAP Explainer': '../models/shap_explainer.pkl',
    'SHAP Feature Importance': '../models/shap_feature_importance.json'
}

all_exist = True
for name, filepath in required_files.items():
    exists = os.path.exists(filepath)
    status = "✓" if exists else "✗"
    print(f"{status} {name}: {filepath}")
    if not exists:
        all_exist = False

if all_exist:
    print("\n✓ All required files are present!")
    print("Ready for Streamlit deployment.")
else:
    print("\n✗ Some files are missing. Please run previous notebooks.")


FILE VERIFICATION
✓ Model: ../models/best_model_pipeline.pkl
✓ Model Metadata: ../models/model_metadata.json
✓ Selected Features: ../models/selected_features.json
✓ SHAP Explainer: ../models/shap_explainer.pkl
✓ SHAP Feature Importance: ../models/shap_feature_importance.json

✓ All required files are present!
Ready for Streamlit deployment.


## Summary

This notebook verified:
1. ✅ Model loads correctly
2. ✅ Predictions work as expected
3. ✅ SHAP explainer is available
4. ✅ All required files are present

**Next Steps:** The Streamlit application is ready to be deployed!
