# LoRA Fine-Tuning of Qwen2.5-3B-Instruct for Medical Study Assistant
This notebook demonstrates parameter-efficient fine-tuning of Qwen2.5-3B-Instruct on a medical dataset using LoRA and SFTTrainer, with validation, early stopping, and TensorBoard monitoring.

In [None]:
# Section 1: Install Required Libraries
!pip install -U transformers trl peft accelerate bitsandbytes


In [None]:
# Section 2: Import Libraries and Check Environment
import torch
import transformers
import trl
import peft
import bitsandbytes as bnb
import os

print('Torch CUDA available:', torch.cuda.is_available())
print('CUDA device count:', torch.cuda.device_count())
print('Transformers version:', transformers.__version__)
print('Transformers path:', transformers.__file__)
print('trl version:', trl.__version__)
print('peft version:', peft.__version__)
print('bitsandbytes version:', bnb.__version__)
print('bitsandbytes path:', bnb.__file__)


In [None]:
# Section 3: Load and Inspect Dataset
dataset_path = '/kaggle/input/data01/data/medical_dataset_kaggle.jsonl'  # Update path as needed
data = [json.loads(line) for line in open(dataset_path, 'r', encoding='utf-8')]
print(f'Loaded {len(data)} examples')
print('Sample:', data[0])


In [None]:
# Section 4: Load Model and Tokenizer with Quantization
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = 'Qwen/Qwen2.5-3B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
quant_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=quant_config,
    device_map='auto'
)
print('Model device:', next(model.parameters()).device)


In [None]:
# Section 5: Optimize GPU/CPU Performance
import torch

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

print('Model device:', next(model.parameters()).device)
print('CUDA available:', torch.cuda.is_available())
print('CUDA device count:', torch.cuda.device_count())
print('Current CUDA device:', torch.cuda.current_device())
print('Device name:', torch.cuda.get_device_name(torch.cuda.current_device()))

def print_gpu_utilization():
    import subprocess
    print(subprocess.getoutput('nvidia-smi'))

print_gpu_utilization()


In [None]:
# Section 6: Format and Tokenize Data
from datasets import Dataset

def format_example(example):
    prompt = example['instruction']
    if example.get('input'):
        prompt += '\n' + example['input']
    return {'prompt': prompt, 'completion': example['output']}

train_data = [format_example(e) for e in data]

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(example):
    text = example['prompt'] + '\n' + example['completion']
    result = tokenizer(
        text,
        truncation=True,
        max_length=1024,
        padding='max_length'
    )
    result['labels'] = result['input_ids'].copy()
    return result

train_dataset = Dataset.from_list(train_data)
tokenized_dataset = train_dataset.map(tokenize_function)
print(tokenized_dataset[0])


In [None]:
# Section 7: Apply LoRA Configuration
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=['q_proj', 'v_proj'],
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM'
)
model = get_peft_model(model, lora_config)


In [None]:
# Section 8: Split Dataset and Set Training Arguments
from transformers import TrainingArguments, EarlyStoppingCallback

split = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = split['train']
eval_dataset = split['test']

training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=10,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    logging_strategy='epoch',
    evaluation_strategy='epoch',
    report_to='tensorboard',
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False
)
early_stopping = EarlyStoppingCallback(early_stopping_patience=2)


In [None]:
# Section 9: Trainer Setup and Training (with Validation and Early Stopping)
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    callbacks=[early_stopping],
)
trainer.train()

# TensorBoard instructions
import os
if os.path.exists('./results'):
    try:
        from IPython.display import display
        display('To monitor progress, run: %load_ext tensorboard; %tensorboard --logdir ./results')
    except ImportError:
        print('To monitor progress, run: %load_ext tensorboard; %tensorboard --logdir ./results')

if trainer.is_world_process_zero():
    print(f'Model and tokenizer saved to {training_args.output_dir}')


In [None]:
# Section 10: Save Fine-Tuned Model and Tokenizer
export_dir = '/kaggle/working/finetuned-qwen-medassist'
model.save_pretrained(export_dir)
tokenizer.save_pretrained(export_dir)
print(f'Model saved to {export_dir}')

if os.path.exists(export_dir):
    print('Exported model directory exists and is ready for download or further use.')
else:
    print('Warning: Model directory not found. Check for errors above.')


In [None]:
# Section 11: Test Inference with Fine-Tuned Model
prompt = 'Summarize the key diagnostic criteria for tuberculosis.'
inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
outputs = model.generate(**inputs, max_new_tokens=256)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
import json