In [1]:
!pip install bitsandbytes accelerate

Collecting bitsandbytes
  Downloading bitsandbytes-0.49.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.49.2-py3-none-manylinux_2_24_x86_64.whl (60.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.49.2


In [2]:
from google.colab import userdata
token = userdata.get('HF_TOKEN')

In [3]:
import getpass
from huggingface_hub import login

# Hugging Face login
if token is None:
    token = getpass.getpass("Enter HF token (with write access): " )
login(token=token)

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# 1. Define the 4-bit configuration (Crucial for Speed & Low RAM)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model_id = "Ismailea04/medgemma-4b-cardiology-merged" # Replace with your actual path

# 2. Load Model with Quantization and Map to GPU
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config, # <--- This fixes the RAM issue
    device_map="auto",               # <--- This puts it on the GPU automatically
    torch_dtype=torch.float32        # <--- Explicitly set torch_dtype
)

Loading model...


config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/883 [00:00<?, ?it/s]

In [21]:
# 3. Define the Inference Function
def generate_response(prompt):
    # Format the prompt exactly like the training data (Alpaca style)
    formatted_prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a cardiology expert. Answer the following question truthfully.

### Input:
{prompt}

### Response:
"""

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cuda")

    # Ensure pad_token is set for generation
    if tokenizer.pad_token is None:
        if tokenizer.eos_token is not None:
            tokenizer.pad_token = tokenizer.eos_token
        else:
            # Fallback if neither pad_token nor eos_token is available
            # This might require adding a special token if not present
            # For simplicity, assuming eos_token is usually available or can be set.
            # If a model truly has neither, this needs a more complex solution
            # e.g., tokenizer.add_special_tokens({'pad_token': '[PAD]'})
            pass # Or raise an error, or set a dummy

    # Generate (Adjust max_new_tokens for longer answers)
    with torch.inference_mode():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id, # Use the now-guaranteed pad_token_id
        )

    # Decode and print only the new part
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Clean up the output to show only the 'Response' part
    return response.split('### Response:')

In [67]:
def create_patient_specific_cardio_prompt(patient_id, current_vitals_window):
    # 1. Fetch Patient Memory (from your local Edge Database / LangGraph Agent)
    # This represents the "AI learning the normal profile"
    patient_memory = {
        "id": "P-402",
        "age": 65,
        "history": "Hypertension, no prior myocardial infarction.",
        "baseline_hr": "Resting HR typically 62-68 bpm.",
        "baseline_bp": "Average 130/85 mmHg with medication.",
        "known_anomalies": "Occasional benign PVCs (Premature Ventricular Contractions)."
    }

    # 2. Format the Current Data Window (e.g., last 15 minutes)
    current_data = f"""
    Time: 02:00 AM - HR: 65, SpO2: 98%
    Time: 02:05 AM - HR: 66, SpO2: 98%
    Time: 02:10 AM - HR: 85, SpO2: 97%
    Time: 02:15 AM - HR: 110, SpO2: 94%
    """

    # 3. Construct the Patient-Steered Prompt
    prompt = f"""
    You are a Cardiology Monitor AI. Interpret the following vital signs.

    PATIENT BASELINE (NORMAL STATE):
    - Age: {patient_memory['age']}
    - History: {patient_memory['history']}
    - Normal Heart Rate: {patient_memory['baseline_hr']}
    - Known Behaviors: {patient_memory['known_anomalies']}

    CURRENT SENSOR DATA (LAST 15 MIN):
    {current_data}

    TASK:
    1. Compare the current data to the patient's specific baseline (Name it : ##Comparaison).
    2. Identify any clinical anomalies (Name it : ##Detection).
    3. Provide a short clinical interpretation of what might be happening (Name it : ##Interpretation).
    """

    return prompt

# Example usage with your Steering function:
prompt = create_patient_specific_cardio_prompt(1,1)

In [68]:
prompt

"\n    You are a Cardiology Monitor AI. Interpret the following vital signs.\n    \n    PATIENT BASELINE (NORMAL STATE):\n    - Age: 65\n    - History: Hypertension, no prior myocardial infarction.\n    - Normal Heart Rate: Resting HR typically 62-68 bpm.\n    - Known Behaviors: Occasional benign PVCs (Premature Ventricular Contractions).\n    \n    CURRENT SENSOR DATA (LAST 15 MIN):\n    \n    Time: 02:00 AM - HR: 65, SpO2: 98%\n    Time: 02:05 AM - HR: 66, SpO2: 98%\n    Time: 02:10 AM - HR: 85, SpO2: 97%\n    Time: 02:15 AM - HR: 110, SpO2: 94%\n    \n    \n    TASK:\n    1. Compare the current data to the patient's specific baseline (Name it : ##Comparaison).\n    2. Identify any clinical anomalies (Name it : ##Detection).\n    3. Provide a short clinical interpretation of what might be happening (Name it : ##Interpretation).\n    "

In [69]:
# 4. Test it
response = generate_response(prompt)

In [70]:
print(response)

["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nYou are a cardiology expert. Answer the following question truthfully.\n\n### Input:\n\n    You are a Cardiology Monitor AI. Interpret the following vital signs.\n    \n    PATIENT BASELINE (NORMAL STATE):\n    - Age: 65\n    - History: Hypertension, no prior myocardial infarction.\n    - Normal Heart Rate: Resting HR typically 62-68 bpm.\n    - Known Behaviors: Occasional benign PVCs (Premature Ventricular Contractions).\n    \n    CURRENT SENSOR DATA (LAST 15 MIN):\n    \n    Time: 02:00 AM - HR: 65, SpO2: 98%\n    Time: 02:05 AM - HR: 66, SpO2: 98%\n    Time: 02:10 AM - HR: 85, SpO2: 97%\n    Time: 02:15 AM - HR: 110, SpO2: 94%\n    \n    \n    TASK:\n    1. Compare the current data to the patient's specific baseline (Name it : ##Comparaison).\n    2. Identify any clinical anomalies (Name it : ##Detection

In [71]:
print(response[-1])


##Comparaison:
The patient's current heart rate (HR) is 110 bpm, while their baseline HR is 62-68 bpm. The SpO2 has decreased to 94% from 98%.

##Detection:
There is a notable increase in heart rate (HR) and decrease in SpO2.

##Interpretation:
The current elevated heart rate and low SpO2 could be a sign of hypoxemia or the presence of an acute event such as arrhythmia, pulmonary embolism, or even cardiac ischemia. Given the patient's baseline of benign PVCs, it is possible that they are experiencing an arrhythmia.
Given the context, it is likely that the patient is experiencing a cardiac event.
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```
```


In [73]:
if '###' in response[-1]:
  print(response[-1].split('###')[0])
elif '```' in response[-1]:
  print(response[-1].split('```')[0])


##Comparaison:
The patient's current heart rate (HR) is 110 bpm, while their baseline HR is 62-68 bpm. The SpO2 has decreased to 94% from 98%.

##Detection:
There is a notable increase in heart rate (HR) and decrease in SpO2.

##Interpretation:
The current elevated heart rate and low SpO2 could be a sign of hypoxemia or the presence of an acute event such as arrhythmia, pulmonary embolism, or even cardiac ischemia. Given the patient's baseline of benign PVCs, it is possible that they are experiencing an arrhythmia.
Given the context, it is likely that the patient is experiencing a cardiac event.

