In [88]:
import pandas as pd
import joblib

In [89]:
pipeline_path = "../models/best_income_model_pipeline.pkl"
pipeline = joblib.load(pipeline_path)


In [90]:
def predict_income(user_input: dict):
    """
    Predicts income class (<=50K / >50K) and probability based on user input.

    Parameters:
        user_input (dict): Keys = feature names, Values = user-provided values

    Returns:
        prediction (str): "<=50K" or ">50K"
        probability (float): Probability of income >50K
    """
    # Convert input dict to DataFrame
    input_df = pd.DataFrame([user_input])
    
    # Use pipeline to predict
    pred_class = pipeline.predict(input_df)[0]
    pred_prob = pipeline.predict_proba(input_df)[0][1]  # probability of >50K
    
    return ("<=50K" if pred_class == 0 else ">50K"), pred_prob

In [91]:
example_input = {
    "age": 44,
    "capital-gain": 7688,
    "capital-loss": 0.0,
    "hours-per-week": 40,
    "workclass": "Private",
    "education": "Undergraduate",
    "marital-status": "Married",
    "occupation": "Blue collar",
    "relationship": "Partnered",
    "gender": "Male",
}

In [92]:
prediction, probability = predict_income(example_input)
print(f"Income Prediction: {'More than $50K' if prediction == '>50K' else 'Less than or equal to $50K'}")

# Display the confidence based on the prediction
if prediction == ">50K":
    # For >50K predictions, show the actual probability
    print(f"Confidence of earning >$50K: {probability:.2%}")
else:
    # For ≤50K predictions, show the inverse probability (1-probability)
    print(f"Confidence of earning ≤$50K: {(1-probability):.2%}")

print("\n--- Prediction Details ---")
print(f"Raw prediction: {prediction}")
print(f"Probability of >50K: {probability:.4f}")
print(f"Probability of ≤50K: {1-probability:.4f}")

Income Prediction: More than $50K
Confidence of earning >$50K: 100.00%

--- Prediction Details ---
Raw prediction: >50K
Probability of >50K: 1.0000
Probability of ≤50K: 0.0000
