# Notebook 05 â€” Clustering

This notebook performs simple k-means clustering using engineered features from Notebook 04.

The goal is to identify natural groupings in medication-error behavior.

In [None]:
import pandas as pd
from sklearn.cluster import KMeans

file_path = "../data/Krista_240726_Final.xlsx"
med_df = pd.read_excel(file_path, sheet_name="Medication")

# Recreate engineered features from Notebook 04
med_df['wrong_med_flag'] = med_df['Pattern Specifics'].str.contains('wrong', case=False, na=False)
med_df['sedation_flag'] = med_df['Medication 1'].str.contains('ketamine|fentanyl|midazolam|versed', case=False, na=False)
med_df['dose_error_flag'] = med_df['Pattern Specifics'].str.contains('dose', case=False, na=False)
med_df['branch_air_flag'] = (med_df['Branch'].str.contains('Air', case=False, na=False)).astype(int)

# Select features for clustering
X = med_df[['wrong_med_flag', 'sedation_flag', 'dose_error_flag', 'branch_air_flag']].astype(int)

X.head()

## Run k-means clustering (k = 3)

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X)

med_df['cluster'] = clusters
med_df[['wrong_med_flag','sedation_flag','dose_error_flag','branch_air_flag','cluster']].head()