In [None]:
!pip install transformers datasets peft trl bitsandbytes accelerate

Collecting trl
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading trl-0.23.0-py3-none-any.whl (564 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, trl
Successfully installed bitsandbytes-0.47.0 trl-0.23.0


In [None]:
from datasets import load_dataset

ds = load_dataset("eswardivi/medical_qa")
ds = ds.remove_columns(["input"]).rename_column("instruction", "input")
train_test_split = ds["train"].train_test_split(test_size=307, train_size=6000, shuffle=True, seed=42)
new_ds = {"train": train_test_split["train"], "test": train_test_split["test"]}

# Verify the changes
print(new_ds)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

data_merged.json: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/6307 [00:00<?, ? examples/s]

{'train': Dataset({
    features: ['input', 'output'],
    num_rows: 6000
}), 'test': Dataset({
    features: ['input', 'output'],
    num_rows: 307
})}


In [None]:
new_ds['train'][2]

{'input': "I'm aware that people are recommended to stay away from taking ibuprofen, cortisone, and other anti-inflammatory drugs. But does this also apply to supplements? For example, St. John's Wort, green tea, and turmeric are all supposed to have anti-inflammatory properties. Would it be better to avoid or limit their consumption at this time?",
 'output': "Most health care groups have not made any recommendations regarding ibuprofen or other NSAIDs. We simply don't have the information to be able to recommend one way or another. If you have been taking these supplements, it should be fine to continue taking them."}

In [None]:
#!pip install bitsandbytes==0.47.0 --force-reinstall

# Initializing the model and tokenizer:

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "Qwen/Qwen2.5-0.5B"  # Consider upgrading to "Qwen/Qwen2.5-1.5B" for better performance if VRAM allows
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",              # GPU if available
    dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    trust_remote_code=True
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [None]:
#pip install transformers datasets peft trl bitsandbytes accelerate

In [None]:
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig

# Initializing the Format Prompt template:

In [None]:
def prompt_instruction_format(sample):
    return f"""### Instruction:
You are a medical expert. Analyze the patient's query and provide a detailed, accurate diagnosis, including recommended tests and treatments.

### Input:
{sample['input']}

### Response:
{sample['output']}"""

# Finetuning the model with LoRA:

In [None]:
peft_config = LoraConfig(
    r=8,  # Lower rank to reduce overfitting on small model
    lora_alpha=16,  # Adjusted scaling factor
    lora_dropout=0.1,  # Increased dropout for better regularization
    bias="none",  # No bias adaptation
    task_type="CAUSAL_LM",  # For causal language modeling
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]  # Expanded for Qwen
)

In [None]:
model = get_peft_model(model, peft_config)

In [None]:
from trl import SFTTrainer, SFTConfig

sft_args = SFTConfig(
    output_dir="./output",
    num_train_epochs=3,  # Increased to 3 for better convergence
    per_device_train_batch_size=4,  # Increased batch size (adjust based on VRAM)
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,  # Effective batch size: 4*4=16
    learning_rate=1e-4,  # Lowered for stability
    warmup_steps=10,
    logging_steps=10,
    save_strategy="steps",
    save_steps=100,
    eval_strategy="steps",  # Enabled evaluation
    eval_steps=100,  # Evaluate every 100 steps
    fp16=True,  # Mixed precision
    optim="adamw_8bit",  # Efficient optimizer
    gradient_checkpointing=True,  # Save VRAM
    packing=True,  # Concatenate examples for efficiency
    max_length=1024,  # Increased for longer contexts (was max_length=512)
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,  # Corrected to 'tokenizer' (was 'processing_class')
    train_dataset=new_ds["train"],
    eval_dataset=new_ds["test"],
    peft_config=peft_config,
    formatting_func=prompt_instruction_format,
    args=sft_args,
)



Applying formatting function to train dataset:   0%|          | 0/6000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/6000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/6000 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/6000 [00:00<?, ? examples/s]

Applying formatting function to eval dataset:   0%|          | 0/307 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/307 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/307 [00:00<?, ? examples/s]

Packing eval dataset:   0%|          | 0/307 [00:00<?, ? examples/s]

In [None]:
# Train and save
trainer.train()



The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
100,1.9007,2.040404,2.027679,1606613.0,0.589336


TrainOutput(global_step=156, training_loss=2.052690408168695, metrics={'train_runtime': 3461.953, 'train_samples_per_second': 0.715, 'train_steps_per_second': 0.045, 'total_flos': 5426519583403008.0, 'train_loss': 2.052690408168695, 'entropy': 1.9654763574185579, 'num_tokens': 2496348.0, 'mean_token_accuracy': 0.5922537795875383, 'epoch': 3.0})

In [None]:
# Step 8: Save the fine-tuned model and tokenizer
model.save_pretrained("./fine_tuned_qwen_medical_qa")
tokenizer.save_pretrained("./fine_tuned_qwen_medical_qa")

('./fine_tuned_qwen_medical_qa/tokenizer_config.json',
 './fine_tuned_qwen_medical_qa/special_tokens_map.json',
 './fine_tuned_qwen_medical_qa/chat_template.jinja',
 './fine_tuned_qwen_medical_qa/vocab.json',
 './fine_tuned_qwen_medical_qa/merges.txt',
 './fine_tuned_qwen_medical_qa/added_tokens.json',
 './fine_tuned_qwen_medical_qa/tokenizer.json')

In [None]:
from google.colab import drive

# Mount Google Drive and save
drive.mount('/content/drive')
output_dir = "/content/drive/MyDrive/fine_tuned_qwen_medical_qa"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

Mounted at /content/drive


('/content/drive/MyDrive/fine_tuned_qwen_medical_qa/tokenizer_config.json',
 '/content/drive/MyDrive/fine_tuned_qwen_medical_qa/special_tokens_map.json',
 '/content/drive/MyDrive/fine_tuned_qwen_medical_qa/chat_template.jinja',
 '/content/drive/MyDrive/fine_tuned_qwen_medical_qa/vocab.json',
 '/content/drive/MyDrive/fine_tuned_qwen_medical_qa/merges.txt',
 '/content/drive/MyDrive/fine_tuned_qwen_medical_qa/added_tokens.json',
 '/content/drive/MyDrive/fine_tuned_qwen_medical_qa/tokenizer.json')

In [None]:
#!pip install huggingface_hub

In [None]:

from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import os
from huggingface_hub import HfApi

local_dir = "./fine_tuned_qwen_medical_qa_updated"
os.makedirs(local_dir, exist_ok=True)

model.save_pretrained(local_dir)
tokenizer.save_pretrained(local_dir)

# 2. Create/ensure the Hugging Face repo exists
api = HfApi()
repo_id = "PavansaiGundaram/fine_tuned_qwen_medical_qa_LoRA_version"  # your HF username/repo
api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)

# 3. Push the new local directory to HF Hub
api.upload_folder(
    folder_path=local_dir,
    repo_id=repo_id,
    repo_type="model",
    commit_message="Upload fine-tuned Qwen2.5-0.5B (updated version)"
)

print(f"✅ Model pushed to https://huggingface.co/{repo_id}")


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...n_medical_qa_updated/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

  ...a_updated/adapter_model.safetensors:   6%|6         | 1.11MB / 17.6MB            

✅ Model pushed to https://huggingface.co/PavansaiGundaram/fine_tuned_qwen_medical_qa_LoRA_version
