In [1]:
!pip install -q transformers datasets peft accelerate bitsandbytes safetensors huggingface_hub sentencepiece

import torch, sys
print("Python:", sys.version.split()[0])
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())

!nvidia-smi -L || true


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hPython: 3.12.11
Torch: 2.8.0+cu126
CUDA available: True
GPU 0: Tesla T4 (UUID: GPU-e29d2dec-5519-4b5c-9ab7-0dbb29dccb92)


In [44]:
import os, torch
from pathlib import Path
from huggingface_hub import notebook_login

In [45]:
notebook_login()
MODEL_NAME = "tiiuae/falcon-rw-1b"
OUTPUT_DIR = "lora_adapter"
RESULTS_DIR = "results"

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [46]:
train_samples = 1000
eval_samples = 100
per_device_train_batch_size = 1
per_device_eval_batch_size = 1
num_train_epochs = 1
learning_rate = 3e-4
max_length = 512
seed = 42

In [47]:
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)
torch.manual_seed(seed)

<torch._C.Generator at 0x7c94e5707870>

In [48]:
print("Config done! Model:", MODEL_NAME)

Config done! Model: tiiuae/falcon-rw-1b


In [49]:
from datasets import load_dataset

In [50]:
dataset = load_dataset("lavita/AlpaCare-MedInstruct-52k")

In [51]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['output', 'input', 'instruction'],
        num_rows: 52002
    })
})


In [52]:
for i in range(3):
    print(f"\n--- Sample {i+1} ---")
    print("Instruction:", dataset["train"][i]["instruction"])
    print("Response:", dataset["train"][i]["output"][:200], "...")


--- Sample 1 ---
Instruction: Explain why a mass in the lung could cause shortness of breath.
Response: A mass in the lung could cause shortness of breath due to several reasons. First, the mass can physically obstruct the air passages, causing difficulty in airflow and leading to breathing difficulties ...

--- Sample 2 ---
Instruction: Write a brief reflection on what you learned from today's lecture on diabetes.
Response: Today's lecture on diabetes provided a comprehensive overview of the condition, its causes, risk factors, and management strategies. 

I learned that diabetes is a chronic disease characterized by hig ...

--- Sample 3 ---
Instruction: Provide a condensed summary about this prescription medication.
Response: Lexapro (Escitalopram) is a prescription medication that belongs to a class of drugs called selective serotonin reuptake inhibitors (SSRIs). It is primarily used for the treatment of depression and ge ...


In [53]:
def format_example(example):
    instr = example.get("instruction", "")
    resp = example.get("output", "")
    instr = instr.strip() if isinstance(instr, str) else ""
    resp = resp.strip() if isinstance(resp, str) else ""
    text = f"### Instruction:\n{instr}\n\n### Response:\n{resp}\n"
    return {"text": text}

In [54]:
dataset = dataset.map(format_example, remove_columns=dataset["train"].column_names)

In [55]:
print(dataset["train"][0]["text"][:500])

### Instruction:
Explain why a mass in the lung could cause shortness of breath.

### Response:
A mass in the lung could cause shortness of breath due to several reasons. First, the mass can physically obstruct the air passages, causing difficulty in airflow and leading to breathing difficulties. Second, if the mass is cancerous or infected, it can cause inflammation and damage to lung tissue, reducing its functional capacity and compromising normal breathing. Additionally, a lung mass can compr


In [56]:
from datasets import Dataset

In [57]:
dataset["train"] = dataset["train"].shuffle(seed=42).select(range(min(1000, len(dataset["train"]))))

In [58]:
split_data = dataset["train"].train_test_split(test_size=0.1, seed=42)
train_ds = split_data["train"]
val_ds = split_data["test"]

print("Train samples:", len(train_ds))
print("Validation samples:", len(val_ds))
print("\nExample training text:\n", train_ds[0]["text"][:300])

Train samples: 900
Validation samples: 100

Example training text:
 ### Instruction:
As a patient, ask questions about basic precautions to be taken during the flu season.

### Response:
To prevent catching the flu during the winter season, there are several basic precautions you can take:

1. Get vaccinated: The flu vaccine is the most effective way to protect your


In [59]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training

In [60]:
model_name = "tiiuae/falcon-rw-1b"

In [61]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "</s>"})

In [62]:
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

In [63]:
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)

Loading model...


In [64]:
model = prepare_model_for_kbit_training(model)
model.resize_token_embeddings(len(tokenizer))
print("Model loaded and prepared for training.")

You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


Model loaded and prepared for training.


In [65]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [66]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 1,572,864 || all params: 1,313,103,872 || trainable%: 0.1198


In [67]:
def tokenize_function(example):
    tokens = tokenizer(
        example["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

In [68]:
tokenized_train = train_ds.map(tokenize_function, batched=True, remove_columns=["text"])
tokenized_val = val_ds.map(tokenize_function, batched=True, remove_columns=["text"])

In [69]:
tokenized_train.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_val.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

In [70]:
print("Tokenization complete.")
print("Example token IDs:", tokenized_train[0]["input_ids"][:30])

Tokenization complete.
Example token IDs: tensor([21017, 46486,    25,   198,  1722,   257,  5827,    11,  1265,  2683,
          546,  4096, 31320,   284,   307,  2077,  1141,   262,  6562,  1622,
           13,   198,   198, 21017, 18261,    25,   198,  2514,  2948, 16508])


In [71]:
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

model.config.use_cache = False
model.gradient_checkpointing_enable()

train_loader = DataLoader(tokenized_train, batch_size=1, shuffle=True)
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

epochs = 1
print(f" Starting stable manual training for {epochs} epoch(s)...")

for epoch in range(epochs):
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
    for batch in loop:
        optimizer.zero_grad()
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch, use_cache=False)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        loop.set_postfix(loss=loss.item())

print(" Stable manual training complete! Model fine-tuned successfully.")


You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


 Starting stable manual training for 1 epoch(s)...


  return fn(*args, **kwargs)
Epoch 1/1: 100%|██████████| 900/900 [13:23<00:00,  1.12it/s, loss=1.74]

 Stable manual training complete! Model fine-tuned successfully.





In [72]:
import os

OUTPUT_DIR = "lora_adapter"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Saving adapter to:", OUTPUT_DIR)
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("Adapter and tokenizer saved successfully!")


Saving adapter to: lora_adapter




Adapter and tokenizer saved successfully!


In [41]:
#inference test
from transformers import AutoModelForCausalLM
from peft import PeftModel

model_name = "tiiuae/falcon-rw-1b"

print("Loading base model for inference")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True
)
base_model.resize_token_embeddings(len(tokenizer))

Loading base model for inference


Embedding(50258, 2048)

In [74]:
#inference test
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

model_name = "tiiuae/falcon-rw-1b"

# Load tokenizer here to ensure it's defined
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": "</s>"})


print("Loading base model for inference")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True
)
base_model.resize_token_embeddings(len(tokenizer))

model = PeftModel.from_pretrained(base_model, "lora_adapter")
model.eval()

def generate_with_disclaimer(prompt, max_new_tokens=150):
    input_text = f"### Instruction:\n{prompt}\n\n### Response:\n"
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, use_cache=False)
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "### Response:" in text:
        text = text.split("### Response:")[-1].strip()
    disclaimer = (
        "\n\n---\n *This response is for educational purposes only "
        "and is NOT medical advice. Consult a qualified healthcare professional.*"
    )
    return text + disclaimer

Loading base model for inference


In [75]:
print("Model test output:\n")
print(generate_with_disclaimer("Explain how to treat a mild fever at home safely."))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Model test output:

To treat a mild fever at home safely, follow these steps:

1. Assess the patient's condition:
- Check the temperature: Take the patient's temperature with a digital thermometer. It should be taken in the morning and after a period of rest.
- Check for other symptoms: Check for other symptoms such as chills, headache, muscle aches, and sore throat.
- Monitor the patient's condition: Monitor the patient's condition for any changes in symptoms or signs.
2. Consider other causes of fever:
- Check for other possible causes of fever such as infection, medication, or stress.
- Consult a healthcare professional: Consult a healthcare professional if the patient's fever persists or worsens.

---
 *This response is for educational purposes only and is NOT medical advice. Consult a qualified healthcare professional.*


In [78]:
import shutil
from google.colab import files

shutil.make_archive("lora_adapter", "zip", "lora_adapter")
print("Adapter zipped successfully as lora_adapter.zip")

files.download("lora_adapter.zip")


Adapter zipped successfully as lora_adapter.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>