In [None]:
import pandas as pd
import joblib
import json
import numpy as np

# Loading model dan metadata
model = joblib.load('health_risk_model.pkl')
with open('model_metadata.json') as f:
    metadata = json.load(f)

# Membuat fungsi untuk prediksi
def predict_health_risk(input_data: dict) -> dict:
    """
    Prediksi risiko kesehatan berdasarkan input user
    
    Args:
        input_data (dict): Data input dalam format dictionary
        
    Returns:
        dict: Hasil prediksi dengan format:
        {
            'prediction': 'ya'/'tidak',
            'prob_ya': float,
            'prob_tidak': float,
            'features': dict # input yang sudah diformat
        }
    """
    # Melakukan konversi ke DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Mevalidasi fitur
    required_features = metadata['feature_names']
    missing_features = [f for f in required_features if f not in input_df.columns]
    if missing_features:
        raise ValueError(f"Fitur berikut dibutuhkan: {', '.join(missing_features)}")
    
    # Memastikan urutan kolom sama seperti saat training
    input_df = input_df[required_features]
    
    # Memprediksi langsung menggunakan model pipeline
    try:
        # Melakukan prediksi kelas dan probabilitas
        prediction = model.predict(input_df)[0]
        probabilities = model.predict_proba(input_df)[0]
        
        # Melakukan konversi prediksi ke integer jika perlu
        if isinstance(prediction, np.bool_):
            prediction = int(prediction)
        elif isinstance(prediction, bool):
            prediction = 1 if prediction else 0
        
        # Melakukan mapping hasil
        # prediction_label = metadata['target_mapping'].get(str(prediction), "tidak diketahui")
        if probabilities[1] > 0.2:
            prediction_label = "IYA"
        else :
            prediction_label = "TIDAK"
        
        return {
            'prediction': prediction_label,
            'prob_ya': round(float(probabilities[1]), 4),
            'prob_tidak': round(float(probabilities[0]), 4),
            'features': input_data
        }
    except Exception as e:
        raise RuntimeError(f"Prediksi gagal: {str(e)}")

# Melakukan uji coba test data baru
if __name__ == "__main__":
    
    sample_data = {
        'age': 40,
        'annual_income_usd': 5,
        'smokes_per_day': 100,
        'drinks_per_week': 25,
        'gender': 'Male',
        'mental_health_status': 'Good',
        'social_support': 'Weak'
    }
    
    # Melakukan prediksi
    try:
        result = predict_health_risk(sample_data)
    except Exception as e:
        print(f"Error: {e}")
        result = None
    
    if result:
        # Menampilkan hasil
        print("\n" + "="*50)
        print("HASIL PREDIKSI RISIKO KESEHATAN")
        print("="*50)
        print(f"Status Risiko Kesehatan: {result['prediction'].upper()}")
        print(f"Probabilitas 'YA': {result['prob_ya']:.2%}")
        print(f"Probabilitas 'TIDAK': {result['prob_tidak']:.2%}")
        print("\nDetail Input:")
        for feature, value in result['features'].items():
            print(f"- {feature.replace('_', ' ').title()}: {value}")
        print("="*50)


HASIL PREDIKSI RISIKO KESEHATAN
Status Risiko Kesehatan: IYA
Probabilitas 'YA': 44.71%
Probabilitas 'TIDAK': 55.29%

Detail Input:
- Age: 40
- Annual Income Usd: 5
- Smokes Per Day: 100
- Drinks Per Week: 25
- Gender: Male
- Mental Health Status: Good
- Social Support: Weak
