In [1]:
import joblib
import pandas as pd

print("=" * 60)
print("VERIFICATION: Checking saved files")
print("=" * 60)

# 1. Check feature names
print("\n1. FEATURE NAMES:")
features = joblib.load('feature_names.pkl')
print(f"   Total features: {len(features)}")
print(f"   Type: {type(features)}")
print(f"   First 10: {features[:10]}")
print(f"   Last 5: {features[-5:]}")

# 2. Check model
print("\n2. MODEL:")
model = joblib.load('random_forest_model.joblib')
print(f"   Model type: {type(model)}")
print(f"   n_features_in_: {model.n_features_in_}")
if hasattr(model, 'feature_names_in_'):
    print(f"   feature_names_in_ (first 10): {model.feature_names_in_[:10]}")
else:
    print("   No feature_names_in_ attribute")

# 3. Check metadata
print("\n3. METADATA:")
metadata = joblib.load('model_metadata.pkl')
print(f"   Keys: {metadata.keys()}")
print(f"   Threshold: {metadata.get('model_info', {}).get('optimal_threshold')}")

# 4. CRITICAL: Test a simple prediction
print("\n4. TEST PREDICTION:")
print("   Creating test input with default values...")

# Create test dataframe with all features as 0
test_df = pd.DataFrame({feat: [0.0] for feat in features})

# Set some realistic values
test_df.at[0, 'time_in_hospital'] = 7.0
test_df.at[0, 'num_lab_procedures'] = 45.0
test_df.at[0, 'num_medications'] = 12.0
test_df.at[0, 'num_medications_changed'] = 0.0
test_df.at[0, 'total_hospital_visits'] = 3.0
test_df.at[0, 'number_emergency'] = 1.0
test_df.at[0, 'age_numeric'] = 58.0

# Set one-hot encoded features
test_df.at[0, 'gender_0'] = 1.0  # Female
test_df.at[0, 'admission_type_0'] = 1.0  # Emergency
test_df.at[0, 'discharge_disposition_0'] = 1.0  # Home
test_df.at[0, 'age_group_1'] = 1.0  # 46-65

print(f"   Test DataFrame shape: {test_df.shape}")
print(f"   Non-zero values: {(test_df != 0).sum().sum()}")

# Make prediction
prob = model.predict_proba(test_df)[0, 1]

print(f"\n   PREDICTION RESULT: {prob:.6f}")

if 0 <= prob <= 1:
    print(f"   ✅ VALID probability ({prob:.1%})")
else:
    print(f"   ❌ INVALID probability! Something is wrong!")
    
print("\n" + "=" * 60)

VERIFICATION: Checking saved files

1. FEATURE NAMES:
   Total features: 48
   Type: <class 'list'>
   First 10: ['time_in_hospital', 'num_lab_procedures', 'num_medications', 'num_medications_changed', 'total_hospital_visits', 'number_emergency', 'age_numeric', 'gender_0', 'gender_1', 'gender_2']
   Last 5: ['discharge_disposition_25', 'age_group_0', 'age_group_1', 'age_group_2', 'age_group_3']

2. MODEL:
   Model type: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
   n_features_in_: 48
   feature_names_in_ (first 10): ['time_in_hospital' 'num_lab_procedures' 'num_medications'
 'num_medications_changed' 'total_hospital_visits' 'number_emergency'
 'age_numeric' 'gender_0' 'gender_1' 'gender_2']

3. METADATA:
   Keys: dict_keys(['model_info', 'performance_metrics', 'confusion_matrix', 'feature_names'])
   Threshold: 0.4800000000000001

4. TEST PREDICTION:
   Creating test input with default values...
   Test DataFrame shape: (1, 48)
   Non-zero values: 10

   PREDICTION RESUL