#SAT 5114
#AI in HEALTHCARE PROJECT


##Install and load libraries

In [1]:
!pip install -qqq "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --progress-bar off
!pip install -qqq xformers trl peft accelerate bitsandbytes triton --progress-bar off
!pip install -qqq unsloth transformers accelerate datasets peft bitsandbytes wandb evaluate bert-score

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.12.0 which is incompatible.
tensorflow-metadata 1.17.1 requires protobuf<6.0.0,>=4.25.2; python_version >= "3.11", but you have protobuf 3.20.3 which is incompatible.
grpcio-status 1.71.0 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 3.20.3 which is incompatible.
ydf 0.11.0 requires protobuf<6.0.0,>=5.29.1, but you have protobuf 3.20.3 which is incompatible.[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━

##Import Libraries

In [2]:
# Import libraries
import torch
import unsloth
import numpy as np
from tqdm import tqdm
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    TextStreamer,
    LogitsProcessor,
    LogitsProcessorList,
    BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model
from sklearn.metrics import f1_score
import evaluate

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch Gemma3ForConditionalGeneration.
🦥 Unsloth Zoo will now patch everything to make training faster!


##Install and define evaluation

In [3]:
!pip install rouge_score
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")
bertscore_metric = evaluate.load("bertscore")

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=8b15b77f151f7057b1fba1daa2b39b6e5a9e9489eba189be4cfd2adbbe7e8f48
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

##Load and prepare the dataset

In [9]:
dataset = load_dataset("Shekswess/gemma_medquad_instruct_dataset", split="train[:400]")
dataset = dataset.train_test_split(test_size=0.1)

#lavita/ChatDoctor-HealthCareMagic-100k
#Shekswess/gemma_medquad_instruct_dataset

README.md:   0%|          | 0.00/1.64k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/17.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16359 [00:00<?, ? examples/s]

##Format the dataset

In [10]:
# Formatting function
def format_instruction(example):
    return {
        "prompt": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n",
        "response": example["output"]
    }

dataset = dataset.map(format_instruction)

Map:   0%|          | 0/360 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

##Model setup and quantization

In [11]:
# Model setup with 4-bit quantization
model_name = "unsloth/llama-3-8b"
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
    use_cache=False,
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

#LoRA configuration and PEFT

In [12]:
# Improved LoRA configuration
peft_config = LoraConfig(
    r=4,
    lora_alpha=8,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="lora_only",
    task_type="CAUSAL_LM",
    inference_mode=False,
)
model = get_peft_model(model, peft_config)
model.enable_input_require_grads()

##Data Collation and Tokenization

In [13]:
# Add data collator
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    pad_to_multiple_of=8,
    padding=True,
    return_tensors="pt",
)

# Update tokenize function
def tokenize_function(examples):
    texts = [p + r for p, r in zip(examples["prompt"], examples["response"])]
    tokenized = tokenizer(
        texts,
        max_length=512,
        truncation=True,
        padding="max_length",
        add_special_tokens=False
    )
    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "labels": tokenized["input_ids"].copy()
    }

tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/360 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

##Metrics calculation

In [14]:
# Metrics calculation (updated)
def compute_metrics(eval_pred):
    preds, labels = eval_pred
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)

    # Convert logits to token IDs (shape: [batch_size, seq_length])
    preds = np.argmax(preds, axis=-1)  # Add this line

    pred_texts = tokenizer.batch_decode(preds, skip_special_tokens=True)
    label_texts = tokenizer.batch_decode(labels, skip_special_tokens=True)

    results = {}

    # BLEU
    results["bleu"] = bleu_metric.compute(
        predictions=pred_texts,
        references=[[text] for text in label_texts]
    )["bleu"]

    # ROUGE
    results.update(rouge_metric.compute(
        predictions=pred_texts,
        references=label_texts
    ))

    # BERTScore
    bert_results = bertscore_metric.compute(
        predictions=pred_texts,
        references=label_texts,
        lang="en"
    )
    results["bert_score"] = np.mean(bert_results["f1"])

    return results

##Setup Training arguments and trainer

In [15]:

# Modified TrainingArguments with evaluation strategy
training_args = TrainingArguments(
    output_dir="./llama3_healthcare",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    save_strategy ='epoch',      # Changed to "epoch" to enable saving
    logging_strategy="no",     # Disable logging
    learning_rate=1e-5,
    weight_decay=1,
    num_train_epochs=4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    logging_steps=50,
    load_best_model_at_end=True,
    fp16=True,
    gradient_checkpointing=True,
    report_to="none",        # Disabled all reporting
    remove_unused_columns=False,
    # Add evaluation strategy for EarlyStoppingCallback
    eval_strategy = "epoch"  # or "steps" with logging_steps defined
)

# Trainer remains the same
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# Training will now only log to console
trainer.train()

Epoch,Training Loss,Validation Loss,Bleu,Rouge1,Rouge2,Rougel,Rougelsum,Bert Score
1,No log,0.809389,0.377964,0.685381,0.374715,0.575169,0.605812,0.88358
2,No log,0.66585,0.452822,0.738861,0.45531,0.651004,0.665704,0.90527
3,No log,0.642137,0.459082,0.738626,0.456576,0.652674,0.66594,0.905385
4,No log,0.639166,0.459274,0.739225,0.457579,0.652887,0.66685,0.905519


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TrainOutput(global_step=720, training_loss=1.4614861382378472, metrics={'train_runtime': 392.4554, 'train_samples_per_second': 3.669, 'train_steps_per_second': 1.835, 'total_flos': 3.321446050824192e+16, 'train_loss': 1.4614861382378472, 'epoch': 4.0})

##Model saving

In [16]:
# Save and generation code remains unchanged
model.save_pretrained("./llama3_healthcare_finetuned")
tokenizer.save_pretrained("./llama3_healthcare_finetuned")

('./llama3_healthcare_finetuned/tokenizer_config.json',
 './llama3_healthcare_finetuned/special_tokens_map.json',
 './llama3_healthcare_finetuned/tokenizer.json')

##Text generation

In [35]:
from transformers import pipeline # Import the pipeline function
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,

    max_new_tokens=256,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.2,
)

test_question = "What is the management for hypertension?"
result = generator(test_question, num_return_sequences=1)
print("\nGenerated Response:")
print(result[0]['generated_text'])

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoFo


Generated Response:
What is the management for hypertension? How to control it?
Hypertension can be controlled by a combination of lifestyle changes and medications. It’s important that you make these modifications because they’re less expensive than medication, cause fewer side effects and may even help reduce your need for medicine.
Lifestyle Changes – The first line treatment should always include dietary modification (i.e., reduction in salt intake), physical activity, weight loss if needed, moderation of alcohol consumption, stress reduction, and smoking cessation if applicable. These measures are inexpensive or free, but require commitment on part of patients; however, adherence rates have been reported at about 50%.
Medications – If blood pressure remains uncontrolled after three months despite following all advice above then doctors usually recommend starting with one type of drug called an “anti-hypertensive”. Thereafter another class(es) could also added depending upon indiv

#Add Graphical User Interface for input and output texts

##Install GUI library gradio

In [36]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

##Define predicition Function

In [37]:
import gradio as gr

def predict(input_text):
  result = generator(input_text, num_return_sequences=1)
  return result[0]['generated_text']

#Create the user interface

In [40]:
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="Medical Question Answering",
    description="Ask questions about medical topics and get answers from our AI model."
)



In [39]:
iface.launch()

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ecefae7ece90590802.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


