In [4]:
import os

PROJECT_ROOT = "/teamspace/studios/this_studio/Medical_Cost_Project"
os.chdir(PROJECT_ROOT)

print("Current working directory:", os.getcwd())


Current working directory: /teamspace/studios/this_studio/Medical_Cost_Project


In [5]:
import pandas as pd

explanation_df = pd.read_csv(
    "models/M11_local_explanation_instance_0.csv"
)

explanation_df.head()


Unnamed: 0,feature,shap_value
0,smoker_no,8683.462906
1,smoker_yes,6633.747275
2,bmi,-5861.363774
3,age,-4949.775873
4,children,-518.426197


In [6]:
import os
print(os.path.exists("models/M11_local_explanation_instance_0.csv"))


True


In [7]:
# Sort by absolute SHAP value (descending)
explanation_df["abs_shap"] = explanation_df["shap_value"].abs()

top_features = (
    explanation_df
    .sort_values("abs_shap", ascending=False)
    .head(5)
)

top_features


Unnamed: 0,feature,shap_value,abs_shap
0,smoker_no,8683.462906,8683.462906
1,smoker_yes,6633.747275,6633.747275
2,bmi,-5861.363774,5861.363774
3,age,-4949.775873,4949.775873
4,children,-518.426197,518.426197


In [8]:
def rule_based_explanation(feature, shap_value):
    if feature == "smoker_yes":
        return "Smoking status significantly increases medical risk and insurance cost."
    
    if feature == "bmi":
        return "Higher BMI is associated with increased health risks, leading to higher insurance charges."
    
    if feature == "age":
        return "Increasing age generally leads to higher healthcare utilization and costs."
    
    if feature == "children":
        return "Having more dependents can increase overall insurance expenses."
    
    if "region" in feature:
        return "Geographical region has a moderate influence on healthcare costs."
    
    # Fallback (safe & generic)
    if shap_value > 0:
        return f"The feature '{feature}' contributes to an increase in the predicted insurance cost."
    else:
        return f"The feature '{feature}' contributes to a decrease in the predicted insurance cost."


In [9]:
sentences = []

for _, row in top_features.iterrows():
    feature = row["feature"]
    shap_value = row["shap_value"]
    
    sentence = rule_based_explanation(feature, shap_value)
    sentences.append(sentence)

final_explanation = (
    "The predicted insurance cost is influenced by multiple factors. "
    "The most important contributors are: "
    + " ".join(sentences)
)

print(final_explanation)


The predicted insurance cost is influenced by multiple factors. The most important contributors are: The feature 'smoker_no' contributes to an increase in the predicted insurance cost. Smoking status significantly increases medical risk and insurance cost. Higher BMI is associated with increased health risks, leading to higher insurance charges. Increasing age generally leads to higher healthcare utilization and costs. Having more dependents can increase overall insurance expenses.


In [10]:
output_path = "models/M13_text_explanation_instance_0.txt"

with open(output_path, "w") as f:
    f.write(final_explanation)

print(f"Saved explanation to: {output_path}")


Saved explanation to: models/M13_text_explanation_instance_0.txt


In [11]:
import os

os.path.exists("models/M13_text_explanation_instance_0.txt")


True