# Notebook 07 â€” Decision Tree Model for High-Risk Medication Errors

This notebook trains a simple, interpretable decision tree model.
The goal is to predict whether an event is a **high-risk** error based on engineered features.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.tree import export_text

file_path = "../data/Krista_240726_Final.xlsx"
med_df = pd.read_excel(file_path, sheet_name="Medication")
med_df.head()

## Create a binary high-risk target

Here we define a **high-risk** event as one that is either:
- a wrong medication error, or
- a dose-related error.

This is a simple example to demonstrate the decision tree workflow.

In [None]:
# Create engineered flags
med_df['wrong_med_flag'] = med_df['Pattern Specifics'].str.contains('wrong', case=False, na=False)
med_df['dose_error_flag'] = med_df['Pattern Specifics'].str.contains('dose', case=False, na=False)
med_df['sedation_flag'] = med_df['Medication 1'].str.contains('ketamine|fentanyl|midazolam|versed', case=False, na=False)
med_df['branch_air_flag'] = med_df['Branch'].str.contains('Air', case=False, na=False).astype(int)

# Define target: high-risk event
med_df['high_risk'] = ((med_df['wrong_med_flag']) | (med_df['dose_error_flag'])).astype(int)

med_df[['wrong_med_flag','dose_error_flag','sedation_flag','branch_air_flag','high_risk']].head()

## Select features and split data

In [None]:
features = med_df[['wrong_med_flag','sedation_flag','dose_error_flag','branch_air_flag']].astype(int)
target = med_df['high_risk']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
X_train.head()

## Train a simple decision tree model

In [None]:
model = DecisionTreeClassifier(max_depth=3, random_state=42)
model.fit(X_train, y_train)

predictions = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, predictions))
print('Confusion Matrix:')
print(confusion_matrix(y_test, predictions))

## Display decision rules

In [None]:
tree_rules = export_text(model, feature_names=list(features.columns))
print(tree_rules)