<span style="font-size:50px"><strong>ML4HC Project 1 - Task 4</strong></span>


In [21]:
import os
import pandas as pd
import numpy as np
from glob import glob
from tqdm import tqdm
from matplotlib import pyplot as plt

# Define paths
data_dir = "../ml4h_data/p1/"#"data"

pd.set_option('display.max_columns', None)


#  Prompting an LLM to solve a time-series problem - Q4.1

In [117]:
import pandas as pd
import ollama
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, average_precision_score
from ollama import Client

client = Client()

def create_patient_summaries(df):
    summaries = {}
    grouped = df.groupby("RecordID")
    for rid, group in grouped:
        features = []
        for var in group["Variable"].unique():
            vals = group[group["Variable"] == var]["Value"]
            if not vals.empty:
                stats = f"{var}: min={vals.min():.2f}, max={vals.max():.2f}, mean={vals.mean():.2f}"
                features.append(stats)
        text = f"Patient {rid} summary: " + ", ".join(features)
        summaries[rid] = text
    return summaries


def filter_summary_with_llm(summary):
    prompt = (
        f"{summary}\n\n"
        "You are given a synthetic ICU patient summary. Each variable includes min, max, and mean values. "
        "Some values are missing (marked as 'nan'). "
        "Your task is to extract and return the **20 most medically informative values** across all features and statistics. "
        "This means you may choose, for example, 'max HR', 'mean Creatinine', 'min Platelets', etc. "
        "You do not need to keep all three (min/max/mean) for each variable—choose only the most clinically relevant ones. "
        "Ignore any values that are missing (nan). Return exactly 20 values, and only the values, formatted like: 'max HR: 120.0'.  Do not return code but apply the task to the data yourself! Do not return any other text or explaations, just the variables and values"
    )

    response = client.chat(
        model="llama3.2",
        messages=[{
            "role": "user",
            "content": prompt
        }]
    )
    return response["message"]["content"].strip()



In [None]:
def query_llm(prompt):
    response = client.chat(
        model="llama3.2",
        messages=[{
            "role": "user",
            "content": (
                prompt +
                "\nGiven the above anonymized and synthetic patient summary from a simulated dataset, "
                "use general medical knowledge and statistical reasoning to assess the *likelihood* that this hypothetical patient "
                "might die during their hospital stay. This is purely a research simulation and no real patients are involved. "
                "Please respond with your reasoning step-by-step, and finally rate the risk on a scale from 1 (very low likelihood) to 10 (very high likelihood). "
                "Important: This is not a diagnostic or treatment recommendation. This task is purely a hypothetical thought experiment."
            )
        }]
    )
    return response["message"]["content"].strip()

def extract_risk_number(response_text):
    extract_prompt = (
        f"The following is a simulated reasoning output from a medical LLM:\n\n"
        f"{response_text}\n\n"
        "Based on this, extract and return ONLY the final numeric risk score from 1 to 10, without any explanation or other text."
    )
    response = client.chat(
        model="llama3.2",
        messages=[{
            "role": "user",
            "content": extract_prompt
        }]
    )
    return response["message"]["content"].strip()

def predict_with_llm(summaries, max_patients=None):
    predictions = {}
    for i, (rid, raw_summary) in enumerate(tqdm(summaries.items())):
        if max_patients and i >= max_patients:
            break

        # Filter the summary via LLM
        filtered_summary = filter_summary_with_llm(raw_summary)

        # Main reasoning step
        full_response = query_llm(filtered_summary)

        # Extract score
        score_str = extract_risk_number(full_response)

        try:
            score = float(score_str)
        except ValueError:
            score = None

        predictions[rid] = score
        print(f"\nRecordID: {rid}")
        print("Filtered Summary:\n", filtered_summary)
        print("\nLLM Reasoning:\n", full_response)
        print(f"\n→ Extracted Score: {score}\n")

    return predictions



In [None]:

def evaluate_predictions(preds, y_true):
    y_true.index = y_true.index.astype(float)  # Ensure keys match type
    y_true = y_true[y_true.index.isin(preds.keys())]

    valid = {float(rid): pred for rid, pred in preds.items() if isinstance(pred, (int, float))}
    y_pred = [valid[rid] for rid in y_true.index if rid in valid]
    y_true_vals = [y_true[rid] for rid in y_true.index if rid in valid]

    print(f"Matched patients: {len(y_pred)}")
    print(f"Example preds: {y_pred[:3]}")
    print(f"Example y_true: {y_true_vals[:3]}")
    print("y_true keys:", set(y_true.index))
    print("Prediction keys:", set(valid.keys()))
    print("Intersected:", set(y_true.index).intersection(valid.keys()))

    if len(y_pred) == 0:
        print("❌ No matched patients — cannot compute AUROC/AUPRC.")
        return

    auroc = roc_auc_score(y_true_vals, y_pred)
    auprc = average_precision_score(y_true_vals, y_pred)
    print(f"LLM AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")


In [None]:
df = pd.read_parquet("data_c_cleaned.parquet")
df = df.melt(id_vars=["RecordID", "Time"], var_name="Variable", value_name="Value")

# Find RecordIDs where in-hospital death == 1
positive_ids = df[(df["Variable"] == "In-hospital_death") & (df["Value"] == 1)]["RecordID"].unique()

# Filter df to only those records
#df = df[df["RecordID"].isin(positive_ids) & (df["Variable"] != "In-hospital_death")& (df["Variable"] != "ICUType")].reset_index(drop=True)

y_true = df[df["Variable"] == "In-hospital_death"].groupby("RecordID")["Value"].first()
df = df[(df["Variable"] != "In-hospital_death")& (df["Variable"] != "ICUType")].reset_index(drop=True)

 

In [75]:
df

Unnamed: 0,RecordID,Time,Variable,Value
0,136938.0,0,Age,77.0
1,136938.0,1,Age,77.0
2,136938.0,2,Age,77.0
3,136938.0,3,Age,77.0
4,136938.0,4,Age,77.0
...,...,...,...,...
1058689,138065.0,44,TroponinI,
1058690,138065.0,45,TroponinI,
1058691,138065.0,46,TroponinI,
1058692,138065.0,47,TroponinI,


In [86]:
predssummaries = create_patient_summaries(df)
predssummaries

{132539.0: 'Patient 132539.0 summary: Age: min=54.00, max=54.00, mean=54.00, Gender: min=0.00, max=0.00, mean=0.00, Height: min=nan, max=nan, mean=nan, Weight: min=nan, max=nan, mean=nan, BUN: min=8.00, max=13.00, mean=10.50, Creatinine: min=0.70, max=0.80, mean=0.75, GCS: min=14.00, max=15.00, mean=14.92, Glucose: min=115.00, max=205.00, mean=160.00, HCO3: min=26.00, max=28.00, mean=27.00, HCT: min=30.30, max=33.70, mean=32.50, HR: min=58.00, max=86.00, mean=70.75, K: min=4.00, max=4.40, mean=4.20, Mg: min=1.50, max=1.90, mean=1.70, NIDiasABP: min=39.00, max=67.00, mean=49.70, NIMAP: min=58.67, max=91.00, mean=70.93, NISysABP: min=96.00, max=157.00, mean=113.39, Na: min=136.00, max=137.00, mean=136.50, Platelets: min=185.00, max=221.00, mean=203.00, RespRate: min=12.00, max=24.00, mean=17.39, Temp: min=35.60, max=38.20, mean=37.53, TroponinT: min=nan, max=nan, mean=nan, Urine: min=0.00, max=450.00, mean=144.44, WBC: min=9.40, max=11.20, mean=10.30, ALP: min=nan, max=nan, mean=nan, ALT

In [85]:
predssummaries

"import pandas as pd\n\n# Given data\ndata = {\n    'Patient 132592.0': [\n        {'Age': 'min=35.00, max=35.00, mean=35.00'},\n        {'HR': 'min=100.00, max=120.00, mean=110.00'},\n        {'Creatinine': 'min=1.20, max=2.40, mean=1.80'},\n        {'Platelets': 'min=150.00, max=250.00, mean=200.00'},\n        {'BUN': 'min=10.00, max=30.00, mean=20.00'},\n        {'GFR': 'min=70.00, max=100.00, mean=90.00'}\n    ]\n}\n\n# Initialize an empty list to store the values\nvalues = []\n\n# Iterate over each patient's data\nfor i, patient in enumerate(data['Patient 132592.0']):\n    # Iterate over each feature and statistic\n    for feature in ['Age', 'HR', 'Creatinine', 'Platelets', 'BUN', 'GFR']:\n        if feature in patient:\n            stat = list(patient[feature].split(', '))\n            \n            # Filter out nan values\n            filtered_stat = [x for x in stat if not x.startswith('nan=')]\n            \n            # Add the max and min values to the list\n            if 

In [102]:
preds = predict_with_llm(predssummaries, max_patients=20)
evaluate_predictions(preds, y_true)



  0%|          | 1/4000 [00:47<52:16:55, 47.07s/it]


RecordID: 132539.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max BUN: 13.00
2. mean Creatinine: 0.75
3. min Platelets: 185.00
4. max NIMAP: 91.00
5. max NISysABP: 157.00
6. max HR: 86.00
7. min HR: 58.00
8. mean Glucose: 160.00
9. max WBC: 11.20
10. mean GCS: 14.92
11. max SysABP: 157.00
12. max Diastolic Blood Pressure (DiasABP): 67.00
13. min Respiratory Rate (RespRate): 12.00
14. max Temp: 38.20
15. mean Temperature: 37.53
16. max Urine Output (Urine): 450.00
17. min Arterial Oxygen Saturation (SaO2): nan
18. max PaCO2: nan
19. mean Blood Pressure (MAP): nan
20. min Creatinine Clearance (not available in the data, using a default value of 30)

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll evaluate each value in the context of general medical knowledge and statistical reasoning.

1. **max BUN: 13.00**: Elevated BUN can indicate dehydration, kidney disease, or liver di

  0%|          | 2/4000 [01:18<41:51:46, 37.70s/it]


RecordID: 132540.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max HR: 90.0
2. mean Creatinine: 1.10
3. min Platelets: 135.00
4. mean BUN: 18.33
5. max SysABP: 138.0
6. mean GCS: 13.33
7. min DiasABP: 46.00
8. mean HCT: 28.66
9. max MAP: 98.00
10. mean HR: 78.77
11. min SaO2: 93.00
12. mean NIMAP: 77.22
13. max WBC: 13.30
14. mean K: 3.90
15. min Glucose: 105.00
16. mean FiO2: 0.45
17. max PaCO2: 46.00
18. mean GCS: 13.33
19. max PaO2: 445.00
20. mean NIDiasABP: 58.00

LLM Reasoning:
 I'll provide a step-by-step analysis of the given patient summary to estimate the likelihood of this hypothetical patient dying during their hospital stay.

1. **Vital signs**:
   - Max HR (heart rate): 90.0 beats per minute, which is slightly elevated but not critically high.
   - Mean HCT (hematocrit) and mean NIDiasABP (normalized incident blood pressure) indicate a stable condition, although slightly low.
   - Mean FiO2 (fraction of ins

  0%|          | 3/4000 [01:55<41:46:27, 37.63s/it]


RecordID: 132541.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the synthetic ICU patient summary:

1. max HR: 113.0
2. mean Creatinine: 4.67
3. min Platelets: 72.0
4. min GCS: 5.00
5. max K: 8.60
6. max NIDiasABP: 95.0
7. max SysABP: 148.0
8. mean BUN: 4.67
9. mean HCT: 28.46
10. mean HR: 83.66
11. min ALP: 105.00
12. min ALT: 75.00
13. max PaO2: 232.0
14. max TroponinT: nan (not provided, but would be a critical value if available)
15. mean Creatinine: 4.67
16. max Urine: 425.0
17. min WBC: 3.70
18. max MAP: 107.00
19. mean pH: 7.49
20. max PaCO2: 37.00

LLM Reasoning:
 I'll guide you through my reasoning step by step.

**Step 1: Assess the overall clinical picture**

The patient's synthetic ICU summary reveals several concerning signs, including:

* Elevated creatinine levels (4.67), indicating renal impairment
* Low platelet count (72.0), suggesting bone marrow suppression or bleeding disorder
* High systolic blood pressure (148.0) and mean a

  0%|          | 4/4000 [02:30<40:40:04, 36.64s/it]


RecordID: 132543.0
Filtered Summary:
 Here are the 20 most medically informative values:

1. min HR: 57.00
2. max HR: 83.00
3. mean NIDiasABP: 64.04
4. mean Creatinine: 17.67
5. min Platelets: 284.00
6. max Platelets: 391.00
7. mean WBC: 9.40
8. max BUN: 23.00
9. mean NIMAP: 82.96
10. min K: 3.80
11. mean GCS: 14.94
12. max DiaSBP: 81.00
13. mean HR: 69.21
14. max SysABP: 138.00
15. mean HCT: 37.44
16. min Temp: 35.10
17. mean Glucose: 117.33
18. max Mg: 2.10
19. mean Urine: 545.83
20. max NISysABP: 138.00

LLM Reasoning:
 I'll guide you through this process step by step.

**Step 1: Understanding the Data**

The given values are various laboratory and vital sign measurements for a hypothetical patient. To assess the likelihood of mortality, we need to consider these values in the context of general medical knowledge and statistical reasoning.

**Step 2: Identifying Potential Risk Factors**

Several values indicate potential risk factors that could contribute to a higher likelihood of 

  0%|          | 5/4000 [03:06<40:25:53, 36.43s/it]


RecordID: 132545.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max HR: 94.00
2. min Platelets: 97.00
3. mean Creatinine: 1.00
4. BUN max: 45.00
5. HCO3 max: 20.00
6. NIDiasABP min: 26.00
7. K max: 6.00
8. Respiratory Rate avg: 19.00
9. Temp avg: 36.88
10. SysABP max: 157.00
11. Urine mean: 62.13
12. WBC min: 3.80
13. HCT max: 32.40
14. HR mean: 74.16
15. DiasABP mean: 46.23
16. Platelets avg: 103.00
17. GCS min: 15.00
18. HCT avg: 29.55
19. BUN mean: 35.00
20. MAP max: nan

LLM Reasoning:
 I'll guide you through the steps of assessing the likelihood of this patient's death during their hospital stay.

**Step 1: Identify High-Risk Indicators**

From the provided values, I'll identify those that are commonly associated with poor outcomes or high mortality rates:

*   Low platelet count (min Platelets: 97.00): thrombocytopenia can increase bleeding risk and lead to complications.
*   Elevated BUN (max: 45.00) and creatinine

  0%|          | 6/4000 [03:37<38:09:50, 34.40s/it]


RecordID: 132547.0
Filtered Summary:
 Here are the 20 most medically informative values:

1. mean Creatinine: 0.97
2. max HR: 101.00
3. min Platelets: 173.00
4. min GCS: 7.00
5. mean BUN: 16.75
6. max SysABP: 141.00
7. max NISysABP: 129.00
8. min Respiratory Rate (RespRate): nan
9. min Temp: 35.80
10. mean HCO3: 19.75
11. max DiaSBP: 89.00
12. mean TroponinT: nan
13. max K: 5.10
14. min WBC: 12.70
15. max MAP: 107.00
16. mean NIDiasABP: 70.50
17. max PaO2: 179.00
18. max FiO2: 0.50
19. min SaO2: 96.00
20. max Creatinine: 1.40

LLM Reasoning:
 I'll guide you through the process of assessing the likelihood of this patient's death during their hospital stay based on the provided summary.

**Step 1: Understand the context**

The provided values are from a synthetic dataset, and we're using general medical knowledge to assess the risk of mortality. It's essential to recognize that each value is taken out of context without knowing the underlying clinical scenario or patient history.

**Ste

  0%|          | 7/4000 [04:08<37:05:28, 33.44s/it]


RecordID: 132548.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max HR: 80.0
2. min Platelets: 325.00
3. mean Creatinine: 3.60
4. mean TroponinT: nan (no value provided)
5. min Respiratory Rate: 10.00
6. max Respiratory Rate: 24.00
7. mean Blood Pressure (Systolic): 156.80
8. max Urine Output: 190.00
9. mean Urine Output: 62.97
10. min Platelet Count: 325.00
11. mean Hct (Hemoglobin Concentration): 31.60
12. max Creatinine: 4.10
13. min Blood Oxygen Saturation: nan (no value provided)
14. mean BUN: 32.50
15. max GCS (Glasgow Coma Scale): 15.00
16. min Blood Pressure (Diastolic): 56.00
17. mean Respiratory Rate: 13.65
18. max Oxygen Saturation (SaO2): nan (no value provided)
19. mean MAP (Mean Arterial Pressure): 101.51
20. min GCS: 15.00

LLM Reasoning:
 I'll guide you through the process of assessing the likelihood of this patient's death during their hospital stay based on general medical knowledge and statistical reaso

  0%|          | 8/4000 [04:41<36:44:32, 33.13s/it]


RecordID: 132551.0
Filtered Summary:
 1. max HR: 111.0
2. min Platelets: 91.0
3. mean Creatinine: 64.60
4. BUN: 81.00
5. GCS: 15.00
6. Glucose: 139.00
7. HCO3: 18.00
8. HCT: 34.10
9. HR: 71.95
10. K: 4.70
11. MAP: 89.00
12. NIDiasABP: 56.00
13. PaO2: 334.00
14. SaO2: 98.00
15. SysABP: 172.00
16. pH: 7.40
17. DiasABP: 54.00
18. mean Creatinine: 64.60
19. Lactate: 2.30
20. Urine: 130.0

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll evaluate various physiological parameters and their potential correlations with mortality risk.

1. **Hypertension**: The patient has high systolic blood pressure (SysABP: 172.00), which is a significant concern, as hypertension can lead to cardiovascular disease, stroke, or kidney damage.
2. **Anemia**: The patient's hematocrit (HCT) is low at 34.10%, indicating anemia. Anemia can increase the risk of infection, fatigue, and other complications.
3. **Acute Kidney Injury (AKI)**: The patient has elevated creat

  0%|          | 9/4000 [05:13<36:30:49, 32.94s/it]


RecordID: 132554.0
Filtered Summary:
 Here are the 20 most medically informative values:

1. max HR: 137.00
2. mean Creatinine: 0.70
3. min Platelets: 696.00
4. max NISysABP: 143.00
5. mean HCO3: 23.00
6. max BUN: 23.00
7. mean GCS: 15.00
8. min Diastolic Blood Pressure (NIDiasABP): 47.00
9. max Systolic Blood Pressure (NISysABP): 143.00
10. mean Lactate: nan
11. max Urine Output (Urine): 300.00
12. mean Respiratory Rate: 34.84
13. min Heart Rate (HR): 115.00
14. max Temperature: 37.30
15. mean PaO2: nan
16. mean Bilirubin: nan
17. mean pH: nan
18. max Blood Pressure (MAP): nan
19. mean Hematocrit (HCT): 28.30
20. min O2 Saturation (SaO2): nan

LLM Reasoning:
 I'll guide you through my reasoning process.

**Step 1: Identify Potential Red Flags**

From the provided patient summary, several values are below normal limits or outside the expected range for a healthy adult:

* Mean Creatinine: 0.70 (normal range: 0.6-1.2)
* Min Platelets: 696.00 (normal range: 150,000-450,000)
* Max BUN: 2

  0%|          | 10/4000 [05:52<38:19:51, 34.58s/it]


RecordID: 132555.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max HR: 99.00
2. mean Creatinine: 1.13
3. min Platelets: 139.00
4. mean BUN: 19.33
5. max GCS: 15.00
6. min HCO3: 23.00
7. mean SysABP: 124.64
8. max Glucose: 114.00
9. max NIDiasABP: 57.00
10. mean HR: 86.66
11. min HCT: 26.40
12. mean pH: 7.35
13. max Urine: 360.00
14. max DiasABP: 68.00
15. max MAP: 96.00
16. max SaO2: 99.00
17. min GCS: 10.00
18. mean NIMAP: 75.67
19. max PaCO2: 50.00
20. max FiO2: 1.00

LLM Reasoning:
 I'll guide you through my process of assessing the likelihood of this patient's death during their hospital stay.

**Step 1: Identify critical values**

To assess the risk, we need to identify values that are commonly associated with poor outcomes or increased mortality in critically ill patients. These may include:

* Low blood pressure (e.g., low MAP, DiasABP)
* High creatinine levels (>2.0) indicating kidney dysfunction
* Low hemoglobin

  0%|          | 11/4000 [06:26<38:19:25, 34.59s/it]


RecordID: 132556.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max HR: 161.00
2. min Platelets: 371.00
3. mean Creatinine: 1.23
4. min BUN: 55.00
5. max NIDiasABP: 60.00
6. mean GCS: 15.00
7. min HR: 57.00
8. mean HCO3: 18.33
9. max K: 4.80
10. max Glucose: 155.00
11. mean SysABP: 87.00
12. min NIMAP: 44.00
13. mean WBC: 25.70
14. max SysABP: 100.00
15. min HCT: 18.10
16. max HR: 161.00
17. mean Creatinine: 1.23
18. min Respiratory Rate: 10.00
19. max BUN: 64.00
20. min TroponinT: nan

These values were chosen because they represent critical physiological parameters that can indicate the severity of illness, organ dysfunction, or potential complications in an ICU patient.

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, we'll analyze each critical physiological parameter and its implications.

**Step 1: Cardiovascular Parameters**

* max HR: 161.00 (tachycardia) - This indicates

  0%|          | 12/4000 [06:58<37:22:09, 33.73s/it]


RecordID: 132567.0
Filtered Summary:
 Here are the 20 most medically informative values from the patient summary:

1. max HR: 112.0
2. min Platelets: 43.0
3. mean Creatinine: 0.55
4. max TroponinT: nan (but if it weren't, we'd expect a low value)
5. mean GCS: 14.18
6. min BUN: 9.00
7. max NIMAP: 78.0
8. mean HCO3: 26.00
9. min WBC: 9.00
10. max MAP: 87.00
11. max NIDiasABP: 53.00
12. mean HR: 96.19
13. mean PaO2: 190.60
14. min HCT: 27.40
15. max DiasABP: 65.0
16. min GCS: 10.00
17. mean pH: 7.41
18. max Creatinine: 0.60
19. min SaO2: 97.00
20. max SysABP: 135.0

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll analyze the provided values using general medical knowledge and statistical reasoning.

**Step 1: Identify potential comorbidities**

From the values, we can infer some possible underlying conditions or complications:

* Low platelet count (min Platelets: 43.0) may indicate thrombocytopenia or bone marrow suppression.
* Elevated tr

  0%|          | 13/4000 [07:45<41:43:58, 37.68s/it]


RecordID: 132568.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max NIDiasABP: 79.00
2. mean Creatinine: 1.35
3. min Platelets: 307.00
4. max HR: 100.00
5. mean Glucose: 107.50
6. min HCT: 28.10
7. max SysABP: 144.00
8. mean Urine output (assuming a reasonable calculation method): 105.47
9. min WBC: 14.10
10. max Na: 138.00
11. mean HCO3: 25.00
12. min Temp: 35.40
13. max BUN: 18.00
14. mean GCS: 15.00
15. min K: 3.70
16. max Mg: 2.30
17. mean HR: 88.39
18. max Creatinine (to indicate kidney function): 1.40
19. min MAP: nan, but a reasonable estimation can be done based on other parameters like diastolic blood pressure (assuming it’s roughly equal to NIDiasABP and SysABP); however, without exact MAP, we consider the next best option.
20. max DiasABP: 79.00

These values are selected based on their relevance to common critical care conditions such as cardiac issues (NIDiasABP), renal function (Creatinine), hematological ab

  0%|          | 14/4000 [08:25<42:34:31, 38.45s/it]


RecordID: 132570.0
Filtered Summary:
 Here are the 20 most medically informative values from the patient summary:

1. min Creatinine: 3.30
2. max GCS: 15.00
3. mean TroponinT: nan (since this value is missing, we can't include it)
4. max BUN: 89.00
5. max HCO3: 33.00
6. min HR: 55.00
7. min GCS: 14.00
8. mean Creatinine: 3.47
9. max K: 4.30
10. max NIMAP: 94.00
11. max WBC: 12.00
12. min Platelets: 183.00
13. min GCS: 14.00
14. mean Creatinine: 3.47
15. max HCT: 28.00
16. max NIDiasABP: 66.00
17. max SysABP: nan (since this value is missing, we can't include it)
18. min HR: 55.00
19. min MAP: nan (since this value is missing, we can't include it)
20. max Temp: 38.80

Note that some of these values are missing or not included due to the absence of 'min', 'max', and 'mean' values for certain variables.

LLM Reasoning:
 I'll guide you through my reasoning process step by step.

**Step 1: Understanding the Patient Summary**

The patient summary provides various vital signs and laboratory 

  0%|          | 15/4000 [09:04<42:45:55, 38.63s/it]


RecordID: 132573.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the patient summary:

1. max HR: 80.0
2. mean Creatinine: 1.25
3. min Platelets: 137.0
4. mean SysABP: 150.74
5. max NIDiasABP: 82.0
6. min Respiratory Rate: 14.0
7. mean BUN: 42.0
8. max NIMAP: 109.70
9. mean HCT (Hemoglobin Concentration): 29.85
10. mean GCS (Glasgow Coma Scale): 15.00
11. min Mean Arterial Pressure (MAP): not provided, but estimated as close to SysABP when available
12. max BUN: 44.0
13. mean Temperature: 36.95
14. mean TroponinT: not applicable due to lack of data
15. max HR: 80.0 (second instance)
16. min Blood Pressure: not provided, but estimated as close to SysABP when available
17. mean K (Potassium): 4.03
18. max Creatinine: not applicable due to lack of data
19. min WBC Count: 11.60
20. max HCT (Hemoglobin Concentration): 31.80

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll break down the information provid

  0%|          | 16/4000 [09:49<44:45:50, 40.45s/it]


RecordID: 132575.0
Filtered Summary:
 Here are the 20 most medically informative values extracted from the ICU patient summary:

1. min BUN: 17.00
2. mean Creatinine: 1.03
3. max HR: 119.00
4. min Platelets: 121.00
5. mean WBC: 14.88
6. mean GCS: 10.86
7. max DiaSBP: 80.00
8. min HCT: 26.00
9. mean pH: 7.39
10. max PaO2: 244.00
11. min HR: 65.00
12. mean FiO2: 0.62
13. max SysABP: 153.00
14. mean Temp: 36.82
15. max SaO2: 100.00
16. max MAP: 106.00
17. min ALD (assuming ALD is an error and it's actually Albumin, but no value is available): 
18. min Lactate: 1.50
19. mean Na: 136.00
20. max PaCO2: 49.00

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, we'll go through each value in the summary and consider its potential impact on mortality risk.

**Step 1: Vital Signs**

- min HR: 65.00 (normal range: 40-100) - Low blood pressure could be a concern but not immediately fatal.
- max HR: 119.00 (normal range: 40-100) - Elevated heart rate, which 

  0%|          | 17/4000 [10:22<42:14:00, 38.17s/it]


RecordID: 132577.0
Filtered Summary:
 1. max BUN: 47.00
2. min Creatinine: 1.70
3. mean GCS: 12.12
4. max Glucose: 341.00
5. mean HCO3: 24.78
6. min HR: 71.00
7. max K: 4.40
8. min Mg: 1.70
9. max NIDiasABP: 98.00
10. min Platelets: 81.00
11. mean RespRate: 27.62
12. max SaO2: 96.00
13. mean TroponinT: nan
14. max Urine: 500.00
15. min WBC: 9.40
16. max DiaSBP: 120.00
17. mean HCT: 36.00
18. max PaO2: 117.00
19. mean NIDiasABP: 74.91
20. max MAP

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll analyze each parameter and consider its potential impact on mortality risk. Please note that this is a hypothetical simulation, and actual predictions should only be made after thorough evaluation by healthcare professionals.

**Step 1: Overall Clinical Picture**

The patient has several laboratory values and vital signs within normal limits (or near-normal) for most parameters. This suggests that the patient does not have overt acute illnesses or 

  0%|          | 18/4000 [11:00<42:09:11, 38.11s/it]


RecordID: 132582.0
Filtered Summary:
 Here are the 20 most medically informative values from the patient summary:

1. max HR: 108.0
2. mean Creatinine: 1.10
3. min Platelets: 145.00
4. mean BUN: 30.75
5. max GCS: 15.00
6. min HCT: 25.80
7. mean NIDiasABP: 44.64
8. max K: 4.20
9. mean Glucose: 165.00
10. min HR: 77.00
11. mean WBC: 4.24
12. max SysABP: 138.00
13. mean Temp: 36.40
14. max NIMAP: 81.33
15. min Mg: 1.80
16. mean HCO3: 27.33
17. max DiasABP: 60.00
18. mean RespirRate: 19.58
19. max SaO2 (assuming this is available): not included due to lack of 'SaO2' data, using the next best - mean Platelets: 156.80 
20. min Temp: 35.40

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll analyze the provided data step by step.

**Step 1: Vital Signs**

The patient's vital signs show:

* High heart rate (max HR: 108.0)
* Elevated mean BUN (30.75), which may indicate kidney dysfunction or dehydration
* Slightly elevated blood glucose (mean Glucos

  0%|          | 19/4000 [11:37<41:50:39, 37.84s/it]


RecordID: 132584.0
Filtered Summary:
 Here are the 20 most medically informative values from the patient summary:

1. max HR: 122.0
2. mean Creatinine: 1.09
3. min Platelets: 49.00
4. mean GCS: 9.92
5. mean BUN: 18.43
6. max DiasABP: 116.0
7. min HCT: 15.10
8. mean TroponinT: 0.19
9. max SysABP: 175.0
10. mean MAP: 86.38
11. min HR: 53.00
12. mean HCO3: 15.71
13. max BUN: 24.00
14. mean GCS: 9.92
15. max NIDiasABP: 129.0
16. mean NIMAP: 88.79
17. min K: 3.50
18. max Platelets: 143.0
19. mean ALP: 49.50
20. max PaO2: 191

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll analyze each value in the context of general medical knowledge and statistical reasoning.

**Step 1: Evaluate Lab Values**

1. Creatinine (1.09): Elevated, indicating impaired kidney function.
2. TroponinT (0.19): Slightly elevated, suggesting possible myocardial damage or inflammation.
3. BUN (18.43) and max BUN (24.00): Elevated, indicating dehydration, kidney disease, or

  0%|          | 20/4000 [12:50<42:36:57, 38.55s/it]


RecordID: 132585.0
Filtered Summary:
 Here are the 20 most medically informative values:

1. max HR: 101.0
2. mean Creatinine: 0.53
3. min Platelets: 195.0
4. max SysABP: 140.0
5. mean Glucose: 105.0
6. max FiO2: 1.00
7. min BUN: 7.00
8. mean HR: 80.8
9. max TroponinT: nan
10. min WBC: 9.60
11. mean NIDiasABP: 43.5
12. max PaO2: 382.0
13. max Creatinine: 0.6
14. min Na: 136.00
15. mean HCT: 23.94
16. max Mg: 2.20
17. mean GCS: 13.79
18. min DiasABP: 46.0
19. max PaCO2: 70.0
20. mean Temp: 37.18

LLM Reasoning:
 To assess the likelihood of this patient dying during their hospital stay, I'll analyze each value and consider how they relate to overall health and potential complications.

1. **Max HR:** 101.0
   - A high heart rate may indicate stress or underlying cardiovascular conditions.
   - High heart rates are generally associated with increased mortality risk in various clinical settings.

2. **Mean Creatinine:** 0.53
   - This value is relatively low, indicating good kidney functi




In [None]:
""" might find that performing
few-shot predictions where you previously populate the context with a few examples
of descriptions and predictions from the training set, might give you much better
answers. Be mindful with your context space to keep the model running fast! Report
the test set performance of your predictions. Can you still use AuROC/AuPRC if
getting binary responses? Maybe you can ask the model for a score in a range of
1-10? (2 pts)"""

In [143]:
def query_llm(prompt, few_shot=False, few_shot_examples=None):


    base_prompt = (
        "\n\n This is a purely synthetic and fictional scenario: "
                "These patient records are randomly generated and part of a simulation exercise for research on language model reasoning. "
                "No real patients are involved. You are NOT providing medical advice. "
                "Your task is to evaluate the *simulated* risk of in-hospital death for the last patient based on hypothetical vital signs, using general medical knowledge and reasoning, "
                "and to provide a number from 1 (low risk) to 10 (high risk). Please respond with your reasoning step-by-step, and finally rate the risk on a scale from 1 (very low likelihood) to 10 (very high likelihood), ONLY for the last patient. "
                "Important: This is not a diagnostic or treatment recommendation. This task is purely a hypothetical thought experiment."
    )

    messages = []

    if few_shot and few_shot_examples:
        label_map = {0: "In no significant danger", 1: "At very severe risk"}

        messages = [{
            "role": "user",
            "content": base_prompt + "First you will get several artificial example scenarios to guide your response. However the predictons shuld only me made for the last patient."
        }]

        for summary, binary_label in few_shot_examples:
            messages.append({
                "role": "user",
                "content": "Patient Example:\n" + summary
            })
            messages.append({
                "role": "user",
                "content": f"(example) Based on this summary, the patient would be classified as: {label_map[binary_label]}"
            })

    messages.append({
        "role": "user",
        "content": prompt 
    })
    messages.append({
        "role": "assistant",
        "content": base_prompt
    })
    print(messages)

    response = client.chat(
        model="llama3.2",
        messages=messages
    )
    return response["message"]["content"].strip()
    print()
    return response["message"]["content"].strip()


def predict_with_llm(summaries, max_patients=None, few_shot=False, few_shot_examples=None):
    predictions = {}
    for i, (rid, raw_summary) in enumerate(tqdm(summaries.items())):
        if max_patients and i >= max_patients:
            break

        filtered_summary = filter_summary_with_llm(raw_summary)
        full_response = query_llm(filtered_summary, few_shot=few_shot, few_shot_examples=few_shot_examples)
        score_str = extract_risk_number(full_response)

        try:
            score = float(score_str)
        except ValueError:
            score = None

        predictions[rid] = score
        print(f"\nRecordID: {rid}")
        print("Filtered Summary:\n", filtered_summary)
        print("\nLLM Reasoning:\n", full_response)
        print(f"\n→ Extracted Score: {score}\n")

    return predictions


In [129]:
def generate_few_shot_examples_filtered(df, y_true, n=3):
    # 1. Get RecordIDs of n alive and n dead patients
    alive_ids = y_true[y_true == 0].index[:n]
    dead_ids = y_true[y_true == 1].index[:n]
    selected_ids = list(alive_ids) + list(dead_ids)

    # 2. Filter df down to just those patients
    df_small = df[df["RecordID"].isin(selected_ids)].reset_index(drop=True)

    # 3. Generate filtered summaries for selected patients
    all_summaries = create_patient_summaries(df_small)
    few_shot = []

    for rid in selected_ids:
        raw_summary = all_summaries[rid]
        filtered_summary = filter_summary_with_llm(raw_summary)
        label = y_true[rid]
        few_shot.append((filtered_summary, label))

    return few_shot


In [130]:
df = pd.read_parquet("data_a_cleaned.parquet")
df = df.melt(id_vars=["RecordID", "Time"], var_name="Variable", value_name="Value")

y_true = df[df["Variable"] == "In-hospital_death"].groupby("RecordID")["Value"].first()
df = df[(df["Variable"] != "In-hospital_death")& (df["Variable"] != "ICUType")].reset_index(drop=True)
 
few_shot_examples = generate_few_shot_examples_filtered(df, y_true, n=1)
few_shot_examples

[("'min Creatinine: 0.70\n'max BUN: 13.00\n'mean TroponinT: nan\n'min WBC: 9.40\n'max Glucose: 205.00\n'max NIMAP: 91.00\n'max HCO3: 28.00\n'min HR: 58.00\n'mean GCS: 14.92\n'mean NIDiasABP: 49.70\n'max Mg: 1.90\n'max HCT: 33.70\n'max SaO2: nan\n'max Platelets: 221.00\n'min BUN: 8.00\n'mean ALP: nan\n'mean SysABP: nan\n'max Urine: 450.00\n'max MAP: nan\n'mean HCT: 32.50",
  np.float64(0.0)),
 ("'max GCS: 15.00\n'min BUN: 58.00\n'mean Creatinine: 0.68\n'min HCT: 32.60\n'max SysABP: 172.00\n'mean HR: 71.95\n'max TroponinT: nan\n'mean NIDiasABP: 28.72\n'min K: 3.90\n'max HCO3: 18.00\n'mean ALP: 47.00\n'mean PaO2: 130.40\n'min GCS: 8.00\n'max Mg: 4.10\n'mean Urine: 43.81\n'min WBC: 16.10\n'nan FiO2: nan\n'mean DiasABP: 40.31\n'max NIMAP: 92.67",
  np.float64(1.0))]

In [145]:
df = pd.read_parquet("data_c_cleaned.parquet")
df = df.melt(id_vars=["RecordID", "Time"], var_name="Variable", value_name="Value")

# Find RecordIDs where in-hospital death == 1
positive_ids = df[(df["Variable"] == "In-hospital_death") & (df["Value"] == 1)]["RecordID"].unique()

# Filter df to only those records
#df = df[df["RecordID"].isin(positive_ids) & (df["Variable"] != "In-hospital_death")& (df["Variable"] != "ICUType")].reset_index(drop=True)

y_true = df[df["Variable"] == "In-hospital_death"].groupby("RecordID")["Value"].first()
df = df[(df["Variable"] != "In-hospital_death")& (df["Variable"] != "ICUType")].reset_index(drop=True)
predssummaries = create_patient_summaries(df)
predssummaries
 

{152871.0: 'Patient 152871.0 summary: Age: min=71.00, max=71.00, mean=71.00, Gender: min=1.00, max=1.00, mean=1.00, Height: min=167.60, max=167.60, mean=167.60, Weight: min=79.20, max=89.60, mean=86.09, BUN: min=36.00, max=41.00, mean=37.60, Creatinine: min=3.50, max=4.20, mean=3.82, DiasABP: min=44.00, max=88.00, mean=56.55, FiO2: min=0.40, max=0.50, mean=0.44, GCS: min=9.00, max=11.00, mean=10.00, Glucose: min=82.00, max=88.00, mean=83.80, HCO3: min=19.00, max=20.00, mean=19.60, HCT: min=28.60, max=30.40, mean=29.35, HR: min=57.00, max=76.00, mean=62.32, K: min=3.90, max=5.00, mean=4.27, Lactate: min=0.70, max=1.50, mean=1.10, MAP: min=63.00, max=112.00, mean=82.81, Mg: min=1.50, max=2.20, mean=1.93, NIDiasABP: min=nan, max=nan, mean=nan, NIMAP: min=nan, max=nan, mean=nan, NISysABP: min=nan, max=nan, mean=nan, Na: min=138.00, max=142.00, mean=139.80, PaCO2: min=31.00, max=42.00, mean=37.00, PaO2: min=75.00, max=114.00, mean=97.89, Platelets: min=101.00, max=117.00, mean=109.67, RespR

In [None]:
preds = predict_with_llm(predssummaries, max_patients=10,few_shot=True, few_shot_examples=few_shot_examples)
evaluate_predictions(preds, y_true)



  0%|          | 0/4000 [00:00<?, ?it/s]

[{'role': 'user', 'content': '\n\n This is a purely synthetic and fictional scenario: These patient records are randomly generated and part of a simulation exercise for research on language model reasoning. No real patients are involved. You are NOT providing medical advice. Your task is to evaluate the *simulated* risk of in-hospital death for the last patient based on hypothetical vital signs, using general medical knowledge and reasoning, and to provide a number from 1 (low risk) to 10 (high risk). Please respond with your reasoning step-by-step, and finally rate the risk on a scale from 1 (very low likelihood) to 10 (very high likelihood), ONLY for the last patient. Important: This is not a diagnostic or treatment recommendation. This task is purely a hypothetical thought experiment.First you will get several artificial example scenarios to guide your response. However the predictons shuld only me made for the last patient.'}, {'role': 'user', 'content': "Patient Example:\n'min Cre

#  Using LLMs to retrieve embeddings - Q4.2

In [None]:
def get_llm_embedding(summary):
    response = client.embeddings(
        model="llama3.2",
        prompt=summary
    )
    return response["embedding"]

def extract_llm_embeddings(summaries, max_patients=None):
    embeddings, record_ids = [], []

    for i, (rid, summary) in enumerate(tqdm(summaries.items())):
        if max_patients and i >= max_patients:
            break
        try:
            emb = get_llm_embedding(summary)
            embeddings.append(emb)
            record_ids.append(rid)
        except Exception as e:
            print(f"Error for {rid}: {e}")
    
    return np.array(embeddings), np.array(record_ids)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score

def train_probe(X, y, X_test, y_test):
    clf = LogisticRegression(max_iter=1000)
    clf.fit(X, y)
    y_pred = clf.predict_proba(X_test)[:, 1]
    auroc = roc_auc_score(y_test, y_pred)
    auprc = average_precision_score(y_test, y_pred)
    print(f"LLM Embeddings - AUROC: {auroc:.4f}, AUPRC: {auprc:.4f}")


In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

def plot_embeddings_tsne(embeddings, labels):
    reduced = TSNE(n_components=2, perplexity=30).fit_transform(embeddings)
    plt.figure(figsize=(6, 5))
    plt.scatter(reduced[:, 0], reduced[:, 1], c=labels, cmap='coolwarm', alpha=0.7)
    plt.title("LLM Embedding Space (t-SNE)")
    plt.colorbar(label="In-Hospital Death")
    plt.show()


#  Using time-series foundation models - Q4.3