In [1]:
import pandas as pd
import joblib

In [2]:
# This will be preprocessing functions to allow the model to be intractable
# Please go to the next cell to test the model

FEATURE_ORDER = [
    'age', 'sex', 'bmi', 'children', 'smoker', 'northwest', 'southeast', 'southwest', 'is_obese', 'senior', 'multi_children', 'age_squared', 'bmi_smoker', 'age_smoker'
]

# This converts the data to how the model expects
def encode_data(data_dict):
    df = pd.DataFrame([data_dict])
    
    df['sex'] = df['sex'].map({'male': 1, 'female': 0})
    df['smoker'] = df['smoker'].map({'yes': 1, 'no': 0})
    
    df['northwest'] = (df['region'] == 'northwest').astype(int)
    df['southeast'] = (df['region'] == 'southeast').astype(int)
    df['southwest'] = (df['region'] == 'southwest').astype(int)

    df['is_obese'] = (df['bmi'] > 30).astype(int)
    df['senior'] = (df['age'] > 55).astype(int)
    df['multi_children'] = (df['children'] >= 2).astype(int)

    df['age_squared'] = df['age']**2
    df['bmi_smoker'] = df['bmi'] * df['smoker']
    df['age_smoker'] = df['age'] * df['smoker']
    
    try:
        encoded_df = df[FEATURE_ORDER]
    except KeyError as e:
        print(f"Feature Missing: {e}")
        return None
        
    return encoded_df

# This is for the classification models on whether it expects it to be high or low cost
def make_cost_classification(patient, model):
    try:
        model = joblib.load(model)
    except Exception:
        print(f"Model not found, make sure the model exists under the Saved-Models path within this directory")
        return
    
    print(f"Running classification model")
    print(f"Processing data for a {patient['age']} year-old {patient['sex']}")
    encoded_input = encode_data(patient)
    
    if encoded_input is not None:
        prediction = model.predict(encoded_input)
        try:
            probabilities = model.predict_proba(encoded_input)
        except:
            print(f"It appears a regression model is being used, if you believe that is not the case check the path being used.")
            return
        predicted_class = prediction[0]
        confidence = probabilities[0][predicted_class] * 100
        
        print(f"PREDICTION")
        print("-------------------------------------")
        
        if predicted_class == 1:
            print(f"Result: HIGH COSTS EXPECTED FOR THIS PATIENT")
        else:
            print(f"Result: LOW COSTS EXPECTED FOR THIS PATIENT")
        
        print(f"Model Confidence: {confidence:.2f}%")
        print("-------------------------------------")

# This is for the regression models on what cost the model expects
def make_cost_regression(patient, model):
    try:
        model = joblib.load(model)
    except Exception:
        print(f"Model not found, make sure the model exists under the Saved-Models path within this directory")
        return

    print(f"Running regression model")
    print(f"Processing data for a {patient['age']} year-old {patient['sex']}")
    encoded_input = encode_data(patient)


    if encoded_input is not None:
        prediction = model.predict(encoded_input)
        predicted_cost = prediction[0]
        
        print(f"PREDICTION")
        print("-------------------------------------")

        if predicted_cost < 0:
            predicted_cost = 0.0
        
        if predicted_cost <= 1:
            print(f"You may be using a classification model, ensure this is a regression model")
            
        print(f"Result: ${predicted_cost:.2f}")

        print("-------------------------------------")

In [3]:
# Set the model of interest here, this is an example of one of the models

MODEL = "./Saved-Models/medical_cost_gradient_boost_classifier_model.pkl"
# Remaining Models
# MODEL = "./Saved-Models/medical_cost_logistic_model.pkl"
# MODEL = "./Saved-Models/medical_cost_random_forest_classification_model.pkl"

# MODEL = "./Saved-Models/medical_cost_gradient_boost_regression_model.pkl"
# MODEL = "./Saved-Models/medical_cost_linear_regression_model.pkl"
# MODEL = "./Saved-Models/medical_cost_random_forest_regression_model.pkl"

# MODIFY THIS DICTIONARY TO CHANGE INPUT
patient_data = {
    # int
    'age': 42,
    # 'male' or 'female'
    'sex': 'female',
    # float
    'bmi': 29.5,
    # int
    'children': 2,
    # # 'yes' or 'no'
    'smoker': 'no',
    # 'northeast', 'northwest', 'southeast', or 'southwest'
    'region': 'northeast'
}

# Use this function to perform classification
# Only do for models built for Classification
# Gradient Boost Classification, Logistic Regression, Random Forest Classification
make_cost_classification(patient_data, MODEL)

print()

# Use this function to perform regression
# Only do for models built for Classification
# Gradient Boost Regression, Linear Regression, Random Forest Regression
make_cost_regression(patient_data, MODEL)

Running classification model
Processing data for a 42 year-old female
PREDICTION
-------------------------------------
Result: LOW COSTS EXPECTED FOR THIS PATIENT
Model Confidence: 98.73%
-------------------------------------

Running regression model
Processing data for a 42 year-old female
PREDICTION
-------------------------------------
You may be using a classification model, ensure this is a regression model
Result: $0.00
-------------------------------------
