# Triage dataset exploration

Load the triage dataset (CSV), inspect distributions, and visualize risk vs vitals.

In [None]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("../data")
CSV_PATH = DATA_DIR / "triage_dataset.csv"

if not CSV_PATH.exists():
    print("Dataset not found. Run: python -m ml.train_model from backend/")
else:
    df = pd.read_csv(CSV_PATH)
    print(df.shape)
    print(df.head())

In [None]:
if CSV_PATH.exists():
    print(df["risk"].value_counts())
    print(df.describe())

In [None]:
import matplotlib.pyplot as plt

if CSV_PATH.exists():
    # Risk distribution
    df["risk"].value_counts().plot(kind="bar", title="Risk distribution")
    plt.ylabel("Count")
    plt.show()

In [None]:
import matplotlib.pyplot as plt

if CSV_PATH.exists():
    # Vitals by risk
    for col in ["spo2", "heart_rate", "temperature"]:
        if col in df.columns:
            df.boxplot(column=col, by="risk", figsize=(6, 4))
            plt.suptitle("")
            plt.title(f"{col} by risk")
            plt.show()