# CardioSense — Component 4
## LLM Chatbot for Mental Health Assessment & Monitoring (CVD Patients)

This notebook fine-tunes a ~1B chat model using QLoRA (4-bit) on lightweight datasets:
- ShenLab/MentalChat16K
- Empathetic Dialogues
- MedQuAD (filtered to cardio + adherence + lifestyle)
- Optional: CounselChat (Kaggle)

Outputs: LoRA adapter, optional merged model, inference helper, ZIP exporter.


In [1]:
# GPU check
import torch, sys, platform
print("Python:", sys.version.split()[0])
print("Platform:", platform.platform())
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
else:
    raise RuntimeError("No GPU detected. Colab: Runtime -> Change runtime type -> GPU")


Python: 3.12.12
Platform: Linux-6.6.105+-x86_64-with-glibc2.35
Torch: 2.9.0+cu126
CUDA available: True
GPU: NVIDIA A100-SXM4-80GB


## Install dependencies

In [2]:
!pip -q install -U "transformers>=4.41.0" "datasets>=2.19.0" "accelerate>=0.31.0" "peft>=0.11.1" "trl>=0.9.6" "bitsandbytes>=0.43.0" "sentencepiece" "evaluate" "scikit-learn"
!pip -q install -U kaggle kagglehub


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/511.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.4/517.4 kB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.4/86.4 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Auth (Hugging Face + Kaggle)
- Add `HF_TOKEN` in Colab Secrets (recommended)
- For Kaggle optional dataset: upload `kaggle.json` into `/content`

In [29]:
# Kaggle auth (supports NEW KAGGLE_API_TOKEN + legacy kaggle.json)
import os, json
from pathlib import Path

def setup_kaggle_auth():
    """
    Priority:
      1) /content/kaggle.json upload (legacy)
      2) Colab Secret KAGGLE_API_TOKEN (new)
      3) Colab Secrets KAGGLE_USERNAME + KAGGLE_KEY (legacy)
      4) Existing env vars
    Returns: dict(mode=..., ok=bool, details=str)
    """
    # ---- legacy file upload ----
    src = Path("/content/kaggle.json")
    dst = Path("/root/.kaggle/kaggle.json")
    if src.exists():
        dst.parent.mkdir(parents=True, exist_ok=True)
        dst.write_text(src.read_text())
        os.chmod(dst, 0o600)
        return {"mode": "legacy_json", "ok": True, "details": f"Using {dst}"}

    # ---- read secrets if running on Colab ----
    token = None
    username = None
    key = None
    try:
        from google.colab import userdata  # type: ignore
        token = userdata.get("KAGGLE_API_TOKEN")
        username = userdata.get("KAGGLE_USERNAME")
        key = userdata.get("KAGGLE_KEY")
    except Exception:
        pass

    # ---- fallback to env ----
    token = token or os.environ.get("KAGGLE_API_TOKEN")
    username = username or os.environ.get("KAGGLE_USERNAME")
    key = key or os.environ.get("KAGGLE_KEY")

    # ---- new token mode ----
    if token:
        os.environ["KAGGLE_API_TOKEN"] = token
        return {
            "mode": "api_token",
            "ok": True,
            "details": "KAGGLE_API_TOKEN is set. Prefer kagglehub for downloads."
        }

    # ---- legacy env mode ----
    if username and key:
        dst.parent.mkdir(parents=True, exist_ok=True)
        dst.write_text(json.dumps({"username": username, "key": key}))
        os.chmod(dst, 0o600)
        os.environ["KAGGLE_USERNAME"] = username
        os.environ["KAGGLE_KEY"] = key
        return {"mode": "legacy_env", "ok": True, "details": "Wrote legacy kaggle.json from env/secrets."}

    return {
        "mode": "none",
        "ok": False,
        "details": "No Kaggle credentials found. Add KAGGLE_API_TOKEN in Colab Secrets or upload kaggle.json."
    }

info = setup_kaggle_auth()
print("Kaggle auth mode:", info["mode"])
print(info["details"])


Kaggle auth mode: api_token
KAGGLE_API_TOKEN is set. Prefer kagglehub for downloads.


## Config

In [30]:
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
OUT_DIR = "/content/cardiosense_component4_llm"
import os
os.makedirs(OUT_DIR, exist_ok=True)
print("MODEL_ID:", MODEL_ID)
print("OUT_DIR:", OUT_DIR)


MODEL_ID: TinyLlama/TinyLlama-1.1B-Chat-v1.0
OUT_DIR: /content/cardiosense_component4_llm


## Load datasets

In [32]:
import os
import re
import pandas as pd
from pathlib import Path
from datasets import load_dataset, Dataset, concatenate_datasets

RNG_SEED = 42

# -------------------------
# Kaggle auth (NEW token)
# -------------------------
def setup_kaggle_token():
    """
    Recommended (secure): Colab -> Runtime -> Secrets -> add:
      - KAGGLE_API_TOKEN = <your token>

    This function loads it into os.environ so kagglehub can use it.
    Also supports legacy kaggle.json if you uploaded it to /content.
    """
    # 1) legacy kaggle.json upload support
    src = Path("/content/kaggle.json")
    if src.exists():
        dst = Path("/root/.kaggle/kaggle.json")
        dst.parent.mkdir(parents=True, exist_ok=True)
        dst.write_text(src.read_text())
        os.chmod(dst, 0o600)
        return True, "Using legacy /content/kaggle.json"

    # 2) Colab Secrets (preferred)
    token = None
    try:
        from google.colab import userdata  # type: ignore
        token = userdata.get("KAGGLE_API_TOKEN")
    except Exception:
        token = None

    # 3) Environment fallback
    token = token or os.environ.get("KAGGLE_API_TOKEN")

    if token:
        os.environ["KAGGLE_API_TOKEN"] = token
        return True, "Using KAGGLE_API_TOKEN (new token mode)"

    return False, "No Kaggle credentials found (set KAGGLE_API_TOKEN in Secrets or upload kaggle.json)."

k_ok, k_msg = setup_kaggle_token()
print("Kaggle auth:", k_ok, "-", k_msg)

# -------------------------
# Helpers
# -------------------------
def _clean_text(x):
    x = str(x).strip()
    x = re.sub(r"\s+", " ", x)
    return x

# -------------------------
# Hugging Face datasets
# -------------------------
def load_mentalchat16k():
    ds = load_dataset("ShenLab/MentalChat16K", split="train")
    cols = set(ds.column_names)

    q_cols = [c for c in ["question", "prompt", "input", "instruction", "patient", "query"] if c in cols]
    a_cols = [c for c in ["answer", "response", "output", "assistant", "completion"] if c in cols]

    if q_cols and a_cols:
        q, a = q_cols[0], a_cols[0]
        return ds.map(
            lambda ex: {"user": _clean_text(ex[q]), "assistant": _clean_text(ex[a])},
            remove_columns=ds.column_names
        )

    text_col = next((c for c in ["text", "conversation", "dialogue"] if c in cols), None)
    if text_col is None:
        joined = ds.map(
            lambda ex: {"_joined": _clean_text(" ".join([str(ex[c]) for c in ds.column_names]))},
            remove_columns=ds.column_names
        )
        return joined.map(
            lambda ex: {"user": ex["_joined"][:800], "assistant": ex["_joined"][800:1600]},
            remove_columns=joined.column_names
        )

    def split_dialogue(t):
        t = _clean_text(t)
        parts = [p for p in re.split(r"(?:\r\n|\n|\r)+", t) if p.strip()]
        if len(parts) >= 2:
            return parts[0], " ".join(parts[1:])
        return t, "I hear you. Tell me more."

    return ds.map(
        lambda ex: {"user": split_dialogue(ex[text_col])[0], "assistant": split_dialogue(ex[text_col])[1]},
        remove_columns=ds.column_names
    )

def load_empathetic_dialogues():
    ds = load_dataset("Estwld/empathetic_dialogues_llm", split="train")
    cols = set(ds.column_names)

    q = next((c for c in ["prompt", "input", "user"] if c in cols), None)
    a = next((c for c in ["response", "output", "assistant"] if c in cols), None)

    if q and a:
        return ds.map(
            lambda ex: {"user": _clean_text(ex[q]), "assistant": _clean_text(ex[a])},
            remove_columns=ds.column_names
        )

    # fallback: first 2 columns
    q, a = ds.column_names[0], ds.column_names[1]
    return ds.map(
        lambda ex: {"user": _clean_text(ex[q]), "assistant": _clean_text(ex[a])},
        remove_columns=ds.column_names
    )

def load_medquad_cardiac(max_rows=8000):
    ds = load_dataset("lavita/MedQuAD", split="train")
    cols = set(ds.column_names)

    q_col = "question" if "question" in cols else ds.column_names[0]
    a_col = "answer" if "answer" in cols else ds.column_names[1]

    keywords = [
        "heart", "cardiac", "cardiovascular", "hypertension", "blood pressure", "cholesterol", "statin", "stroke",
        "angina", "myocardial", "infarction", "arrhythmia", "atrial fibrillation", "heart failure", "coronary",
        "chest pain", "palpitations", "tachycardia", "bradycardia", "diet", "exercise", "smoking", "alcohol",
        "stress", "depression", "anxiety", "adherence", "medication", "beta blocker", "aspirin"
    ]
    pat = re.compile("|".join([re.escape(k) for k in keywords]), re.IGNORECASE)

    ds = ds.filter(lambda ex: bool(pat.search(str(ex.get(q_col, ""))) or pat.search(str(ex.get(a_col, "")))))

    if len(ds) > max_rows:
        ds = ds.shuffle(seed=RNG_SEED).select(range(max_rows))

    return ds.map(
        lambda ex: {"user": _clean_text(ex[q_col]), "assistant": _clean_text(ex[a_col])},
        remove_columns=ds.column_names
    )

# -------------------------
# Kaggle dataset (optional)
# -------------------------
def load_counselchat_kaggle(optional=True, max_rows=12000):
    if not optional:
        return None
    if not k_ok:
        print("Skipping CounselChat (no Kaggle credentials).")
        return None

    outp = Path("/content/kaggle_counselchat")
    outp.mkdir(parents=True, exist_ok=True)

    # Prefer kagglehub (works with new token mode in many setups)
    try:
        import kagglehub
        dpath = Path(kagglehub.dataset_download("weiting016/counselchat-data"))
        # copy files into /content folder
        for f in dpath.glob("**/*"):
            if f.is_file():
                rel = f.relative_to(dpath)
                (outp / rel.parent).mkdir(parents=True, exist_ok=True)
                (outp / rel).write_bytes(f.read_bytes())
        print("Downloaded CounselChat via kagglehub:", dpath)
    except Exception as e:
        # Fallback to legacy CLI only if kaggle.json exists
        if Path("/root/.kaggle/kaggle.json").exists():
            import subprocess
            print("kagglehub failed; trying Kaggle CLI. Reason:", repr(e))
            subprocess.check_call([
                "kaggle", "datasets", "download",
                "-d", "weiting016/counselchat-data",
                "-p", str(outp),
                "--unzip"
            ])
        else:
            print("CounselChat download failed (token mode not supported by this environment).")
            print("Fix: upload kaggle.json OR install a kagglehub version that supports KAGGLE_API_TOKEN.")
            print("Reason:", repr(e))
            return None

    csvs = sorted(outp.glob("**/*.csv"), key=lambda p: p.stat().st_size, reverse=True)
    if not csvs:
        raise FileNotFoundError("No CSV found for CounselChat.")
    df = pd.read_csv(csvs[0])
    df.columns = [c.lower() for c in df.columns]

    q = next((c for c in ["question", "title", "text"] if c in df.columns), None)
    a = next((c for c in ["answer", "response", "counselor_answer"] if c in df.columns), None)

    if not q or not a:
        obj_cols = [c for c in df.columns if df[c].dtype == "object"]
        if len(obj_cols) < 2:
            return None
        q, a = obj_cols[0], obj_cols[1]

    df = df[[q, a]].dropna()
    df[q] = df[q].map(_clean_text)
    df[a] = df[a].map(_clean_text)
    df = df[(df[q].str.len() >= 10) & (df[a].str.len() >= 20)]

    if len(df) > max_rows:
        df = df.sample(n=max_rows, random_state=RNG_SEED)

    return Dataset.from_pandas(pd.DataFrame({"user": df[q], "assistant": df[a]}), preserve_index=False)

# -------------------------
# Load everything
# -------------------------
ds_mental = load_mentalchat16k()
ds_empathy = load_empathetic_dialogues()
ds_medquad = load_medquad_cardiac(max_rows=8000)
ds_kaggle = load_counselchat_kaggle(optional=True, max_rows=12000)

print("MentalChat16K:", len(ds_mental))
print("Empathy:", len(ds_empathy))
print("MedQuAD filtered:", len(ds_medquad))
print("CounselChat:", (len(ds_kaggle) if ds_kaggle is not None else 0))


Kaggle auth: True - Using KAGGLE_API_TOKEN (new token mode)


Map:   0%|          | 0/16084 [00:00<?, ? examples/s]

Map:   0%|          | 0/19533 [00:00<?, ? examples/s]

Filter:   0%|          | 0/47441 [00:00<?, ? examples/s]

Map:   0%|          | 0/6990 [00:00<?, ? examples/s]

Downloading from https://www.kaggle.com/api/v1/datasets/download/weiting016/counselchat-data?dataset_version_number=1...


100%|██████████| 1.02M/1.02M [00:00<00:00, 2.04MB/s]

Extracting files...
Downloaded CounselChat via kagglehub: /root/.cache/kagglehub/datasets/weiting016/counselchat-data/versions/1
MentalChat16K: 16084
Empathy: 19533
MedQuAD filtered: 6990
CounselChat: 1449





## Build chat-format dataset

In [8]:
from datasets import DatasetDict

SYSTEM_PROMPT = (
    "You are CardioSense, a supportive mental health chatbot for people living with cardiovascular disease. "
    "You respond with empathy and practical steps. "
    "You do not diagnose. You encourage professional care when needed. "
    "If the user mentions self-harm, suicide, or immediate danger, you urge them to seek urgent help and contact local emergency services."
)

def add_cvd_context(u: str) -> str:
    return "I have cardiovascular disease. " + u

def to_messages(ex):
    import random
    user = ex["user"]; assistant = ex["assistant"]
    if random.random() < 0.25:
        user = add_cvd_context(user)
    return {"messages":[
        {"role":"system","content":SYSTEM_PROMPT},
        {"role":"user","content":user},
        {"role":"assistant","content":assistant}
    ]}

def prep(ds):
    ds2 = ds.map(lambda ex: {"user": _clean_text(ex["user"]), "assistant": _clean_text(ex["assistant"])})
    ds2 = ds2.filter(lambda ex: len(ex["user"])>=10 and len(ex["assistant"])>=20)
    return ds2.map(to_messages, remove_columns=ds2.column_names)

ds_list = [prep(ds_mental), prep(ds_empathy), prep(ds_medquad)]
if ds_kaggle is not None:
    ds_list.append(prep(ds_kaggle))

ds_all = concatenate_datasets(ds_list).shuffle(seed=RNG_SEED)
split = ds_all.train_test_split(test_size=0.05, seed=RNG_SEED)
train_ds, eval_ds = split["train"], split["test"]

print("Train:", len(train_ds), "Eval:", len(eval_ds))
print(train_ds[0]["messages"])


Map:   0%|          | 0/16084 [00:00<?, ? examples/s]

Filter:   0%|          | 0/16084 [00:00<?, ? examples/s]

Map:   0%|          | 0/16055 [00:00<?, ? examples/s]

Map:   0%|          | 0/19533 [00:00<?, ? examples/s]

Filter:   0%|          | 0/19533 [00:00<?, ? examples/s]

Map:   0%|          | 0/19342 [00:00<?, ? examples/s]

Map:   0%|          | 0/6990 [00:00<?, ? examples/s]

Filter:   0%|          | 0/6990 [00:00<?, ? examples/s]

Map:   0%|          | 0/4866 [00:00<?, ? examples/s]

Train: 38249 Eval: 2014
[{'content': 'You are CardioSense, a supportive mental health chatbot for people living with cardiovascular disease. You respond with empathy and practical steps. You do not diagnose. You encourage professional care when needed. If the user mentions self-harm, suicide, or immediate danger, you urge them to seek urgent help and contact local emergency services.', 'role': 'system'}, {'content': "I have cardiovascular disease. I've always been a worrywart but recently my anxiety has skyrocketed. Even simple tasks like going to the grocery store or answering phone calls make me extremely nervous. It's become so intense that I end up avoiding activities altogether. I long for guidance on how to navigate through my anxiety and regain control over my life.", 'role': 'user'}, {'content': "It sounds like you're experiencing a significant increase in anxiety lately, especially when it comes to everyday tasks and activities. I understand that this has made things difficult

## Load model (4-bit) + QLoRA setup

In [23]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float32 # Changed to float32 to avoid BFloat16 issues
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    use_fast=True,
    token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float32, # Explicitly load model in float32 to prevent BFloat16 issues
    token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
)
model.config.use_cache = False
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
)

print("Tokenizer chat_template:", bool(getattr(tokenizer, "chat_template", None)))


`torch_dtype` is deprecated! Use `dtype` instead!


Tokenizer chat_template: True


## Convert messages -> text for SFT training

In [10]:
def to_text(ex):
    msgs = ex["messages"]
    if getattr(tokenizer, "chat_template", None):
        return {"text": tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=False)}
    sys = msgs[0]["content"]; u = msgs[1]["content"]; a = msgs[2]["content"]
    return {"text": f"<|system|>\n{sys}\n<|user|>\n{u}\n<|assistant|>\n{a}{tokenizer.eos_token}"}

train_txt = train_ds.map(to_text, remove_columns=train_ds.column_names)
eval_txt  = eval_ds.map(to_text, remove_columns=eval_ds.column_names)
print(train_txt[0]["text"][:400])


Map:   0%|          | 0/38249 [00:00<?, ? examples/s]

Map:   0%|          | 0/2014 [00:00<?, ? examples/s]

<|system|>
You are CardioSense, a supportive mental health chatbot for people living with cardiovascular disease. You respond with empathy and practical steps. You do not diagnose. You encourage professional care when needed. If the user mentions self-harm, suicide, or immediate danger, you urge them to seek urgent help and contact local emergency services.</s>
<|user|>
I have cardiovascular disea


## Train (SFTTrainer + QLoRA)

In [33]:
from transformers import TrainingArguments
from trl import SFTTrainer

MAX_SEQ_LEN = 1024

args = TrainingArguments(
    output_dir=f"{OUT_DIR}/runs",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=25,
    eval_strategy="steps",
    eval_steps=200,
    save_steps=200,
    save_total_limit=2,
    fp16=False, # Disabling fp16 to avoid BFloat16 NotImplementedError
    bf16=False,
    gradient_checkpointing=False, # Disabling gradient checkpointing
    optim="adamw_torch", # Changed optimizer to a valid option
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=train_txt,
    eval_dataset=eval_txt,
    peft_config=lora_config
)

trainer.train()


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
200,0.8649,0.780067,0.83532,1274046.0,0.791339
400,0.8233,0.747838,0.783647,2544453.0,0.79803
600,0.7973,0.730504,0.764337,3804543.0,0.802312
800,0.7729,0.719952,0.758869,5087000.0,0.804141
1000,0.7961,0.711132,0.751601,6371046.0,0.805616
1200,0.7711,0.703871,0.727496,7645690.0,0.806983
1400,0.7533,0.697817,0.724551,8934700.0,0.809146
1600,0.7481,0.694209,0.733953,10213237.0,0.81009
1800,0.76,0.692132,0.724019,11497192.0,0.80999
2000,0.7495,0.691285,0.72686,12767883.0,0.810819


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=2391, training_loss=0.7983838033097801, metrics={'train_runtime': 9362.8105, 'train_samples_per_second': 4.085, 'train_steps_per_second': 0.255, 'total_flos': 1.3359496491454464e+17, 'train_loss': 0.7983838033097801, 'entropy': 0.7184731817245483, 'num_tokens': 15271232.0, 'mean_token_accuracy': 0.811301818370819, 'epoch': 1.0})

## Save adapter + optional merged model

In [34]:
from transformers import AutoModelForCausalLM
from peft import PeftModel

ADAPTER_DIR = f"{OUT_DIR}/adapter"
MERGED_DIR  = f"{OUT_DIR}/merged"

trainer.model.save_pretrained(ADAPTER_DIR)
tokenizer.save_pretrained(ADAPTER_DIR)
print("Saved adapter:", ADAPTER_DIR)

# Merge (optional). Comment out if you only want adapter.
base_fp16 = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
)
peft_model = PeftModel.from_pretrained(base_fp16, ADAPTER_DIR)
merged = peft_model.merge_and_unload()
import os
os.makedirs(MERGED_DIR, exist_ok=True)
merged.save_pretrained(MERGED_DIR, safe_serialization=True)
tokenizer.save_pretrained(MERGED_DIR)
print("Saved merged model:", MERGED_DIR)


Saved adapter: /content/cardiosense_component4_llm/adapter
Saved merged model: /content/cardiosense_component4_llm/merged


## Inference helper (real-world usage)

In [43]:
from transformers import pipeline

gen = pipeline("text-generation", model=MERGED_DIR, tokenizer=tokenizer, device_map="auto")

def chat_once(user_text: str, max_new_tokens=220, temperature=0.7, top_p=0.9):
    msgs = [{"role":"system","content":SYSTEM_PROMPT},{"role":"user","content":user_text}]
    if getattr(tokenizer, "chat_template", None):
        prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
    else:
        prompt = f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_text}\n<|assistant|>\n"
    out = gen(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_p=top_p,
              pad_token_id=tokenizer.eos_token_id)[0]["generated_text"]
    if "<|assistant|>" in out:
        return out.split("<|assistant|>")[-1].strip()
    return out[len(prompt):].strip()

print(chat_once(" "))


Device set to use cuda:0


I've been feeling really overwhelmed lately. My husband and I have been going through a difficult time in our relationship, and I'm finding it hard to find the motivation to do the things I used to enjoy. I've been having trouble sleeping, and I've been feeling anxious and irritable. I've been trying to stay positive, but it's hard when everything feels so uncertain.


## Zip artifacts (Colab download)

In [36]:
import zipfile
from pathlib import Path

zip_path = Path("/content/CardioSense_Component4_LLMChatbot.zip")
root = Path(OUT_DIR)

with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
    for p in root.rglob("*"):
        if p.is_file():
            z.write(p, arcname=str(p.relative_to(root)))

print("ZIP created:", zip_path)

try:
    from google.colab import files  # type: ignore
    files.download(str(zip_path))
except Exception:
    print("Not in Colab. ZIP path:", zip_path)


ZIP created: /content/CardioSense_Component4_LLMChatbot.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [27]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 # Changed to bfloat16 for A100 compatibility
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    use_fast=True,
    token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float32, # Explicitly load model in float32 to prevent BFloat16 issues unless explicitly handled by bnb
    token=os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
)
model.config.use_cache = False
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
)

print("Tokenizer chat_template:", bool(getattr(tokenizer, "chat_template", None)))

Tokenizer chat_template: True


In [39]:
from transformers import TrainingArguments
from trl import SFTTrainer

MAX_SEQ_LEN = 1024

args = TrainingArguments(
    output_dir=f"{OUT_DIR}/runs",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=25,
    eval_strategy="steps",
    eval_steps=200,
    save_steps=200,
    save_total_limit=2,
    fp16=False, # Keeping fp16=False as we are using bf16
    bf16=True,  # Enabled bf16 for A100 compatibility
    gradient_checkpointing=False,
    optim="adamw_torch",
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=train_txt,
    eval_dataset=eval_txt,
    peft_config=lora_config
)

trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
200,0.8649,0.779599,0.828585,1274046.0,0.792105
400,0.8232,0.747969,0.78437,2544453.0,0.798056
600,0.7974,0.730393,0.765045,3804543.0,0.802449
800,0.7727,0.719951,0.760525,5087000.0,0.804186
1000,0.7956,0.71105,0.752874,6371046.0,0.805671
1200,0.7711,0.703761,0.728166,7645690.0,0.80714
1400,0.7535,0.697831,0.723934,8934700.0,0.809238
1600,0.7482,0.694173,0.735239,10213237.0,0.810011
1800,0.76,0.691967,0.725272,11497192.0,0.809926
2000,0.7498,0.691108,0.726891,12767883.0,0.810833


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=2391, training_loss=0.7983908483743767, metrics={'train_runtime': 9501.0409, 'train_samples_per_second': 4.026, 'train_steps_per_second': 0.252, 'total_flos': 1.3359496491454464e+17, 'train_loss': 0.7983908483743767, 'entropy': 0.7181888191699982, 'num_tokens': 15271232.0, 'mean_token_accuracy': 0.8120470662117004, 'epoch': 1.0})