In [1]:
%%capture
!pip install unsloth wandb rouge-score bitsandbytes tyro trl --quiet

In [2]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
import re
from trl import SFTTrainer
from datasets import load_dataset, DatasetDict
from huggingface_hub import login as hf_login
import wandb
from transformers import TrainingArguments, Trainer
from rouge_score import rouge_scorer
from kaggle_secrets import UserSecretsClient

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-05-27 15:46:57.135882: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748360817.310531      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748360817.363508      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# Load secrets from Kaggle
secret = UserSecretsClient()
hf_token = secret.get_secret("HF_Token")
wandb_token = secret.get_secret("WANDB_Token")

# Login to Hugging Face and Weights & Biases
hf_login(token=hf_token)
wandb.login(key=wandb_token)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mstbreeze[0m ([33mstbreeze-arch-technologies[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# Define prompt template
prompt_style = '''Here is an instruction that describes a task, along with an input that provides further context. Write an appropriate response that fulfills the request. Before answering, you must think carefully about the question and create a step-by-step chain of thought to ensure your responses are logical & accurate. Make sure that the chain of thought is written between the think tags and the concluding response is between the response tags (do not overlap). Ensure your tone is professional yet easily understandable.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics & treatment planning. Additionally, you are also an expert in communication with advanced skills in conveying responses that are meaningful, respectful & highly valuable. Please answer the following medical question.

### Question: {}

### Response:
<think> {} </think>
<response> {} </response>'''

In [5]:
# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    token = hf_token
)

==((====))==  Unsloth 2025.5.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 6.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [6]:
# Load dataset
ds = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", 'en', split="train")
ds

README.md:   0%|          | 0.00/1.97k [00:00<?, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/58.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/19704 [00:00<?, ? examples/s]

Dataset({
    features: ['Question', 'Complex_CoT', 'Response'],
    num_rows: 19704
})

In [7]:
# Format dataset to fit the prompt style
eos = tokenizer.eos_token

def prompt_format (example):
    formatted_prompt = prompt_style.format(example['Question'], example['Complex_CoT'], example['Response']) + eos
    return {'prompt': formatted_prompt}
    
ds_tuned = ds.map(lambda x: prompt_format(x))
ds_tuned.to_json('Prompt Formatted Medical Dataset.jsonl')
print(ds_tuned['prompt'][0])

Map:   0%|          | 0/19704 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/20 [00:00<?, ?ba/s]

Here is an instruction that describes a task, along with an input that provides further context. Write an appropriate response that fulfills the request. Before answering, you must think carefully about the question and create a step-by-step chain of thought to ensure your responses are logical & accurate. Make sure that the chain of thought is written between the think tags and the concluding response is between the response tags (do not overlap). Ensure your tone is professional yet easily understandable.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics & treatment planning. Additionally, you are also an expert in communication with advanced skills in conveying responses that are meaningful, respectful & highly valuable. Please answer the following medical question.

### Question: Given the symptoms of sudden weakness in the left arm and leg, recent long-distance travel, and the presence of swollen and tender right lower leg, what 

In [8]:
# Split the dataset into train & test sets
ds_dict = ds_tuned.train_test_split(test_size=100, seed=42, shuffle=True)
ds_train = ds_dict['train']
ds_eval = ds_dict['test']

In [9]:
# Apply LoRA fine tuning to the model
model_lora = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 32,
    lora_dropout = 0,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None
)

Unsloth 2025.5.7 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [10]:
# Setup training arguments
training_args = TrainingArguments(
    output_dir="Outputs",
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps = 4,
    warmup_steps = 5,
    max_steps = 50,
    learning_rate = 2e-4,
    num_train_epochs = 1,
    bf16 = is_bfloat16_supported(),
    fp16 = not is_bfloat16_supported(),
    logging_steps = 10,
    optim = 'adamw_8bit',
    weight_decay = 0.01,
    lr_scheduler_type = 'linear',
    seed = 3407,
    report_to = 'wandb',
    run_name = "medical consultor bot"
)

# Initialize fine tuning trainer
trainer = SFTTrainer(
    model = model_lora,
    tokenizer = tokenizer,
    train_dataset = ds_train,
    eval_dataset = ds_eval,
    dataset_text_field = 'prompt',
    max_seq_length = 2048,
    dataset_num_proc = 2,
    args = training_args
)

Unsloth: Tokenizing ["prompt"] (num_proc=2):   0%|          | 0/19604 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["prompt"] (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

In [11]:
# Start training!
trainer_summary = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 19,604 | Num Epochs = 1 | Total steps = 50
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.8185
20,1.3128
30,1.2669
40,1.2452
50,1.1997


In [12]:
# Save the fine-tuned model
wandb.finish()

0,1
train/epoch,▁▃▄▆██
train/global_step,▁▃▅▆██
train/grad_norm,█▁▁▂▁
train/learning_rate,█▆▅▃▁
train/loss,█▂▂▂▁

0,1
total_flos,5758461346394112.0
train/epoch,0.0204
train/global_step,50.0
train/grad_norm,0.31147
train/learning_rate,0.0
train/loss,1.1997
train_loss,1.36861
train_runtime,1051.4616
train_samples_per_second,0.38
train_steps_per_second,0.048


In [13]:
new_prompt_style = '''Here is an instruction that describes a task, along with an input that provides further context. Write an appropriate response that fulfills the request. Before answering, you must think carefully about the question and create a step-by-step chain of thought to ensure your responses are logical & accurate. Make sure that the chain of thought is written between the think tags and the concluding response is between the response tags (do not overlap). Ensure your tone is professional yet easily understandable.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics & treatment planning. Additionally, you are also an expert in communication with advanced skills in conveying responses that are meaningful, respectful & highly valuable. Please answer the following medical question.

### Question: {}

### Response:
<think>'''

In [14]:
# Randomly select 500 entries from the dataset 
sample_ds = ds_tuned.shuffle(seed=42).select(range(50))

# evaluate model function
def model_eval (model, dataset):
    FastLanguageModel.for_inference(model)
    rougeL_scores = []
    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    for i in range(len(dataset)):
        question = dataset['Question'][i]
        response = dataset['Response'][i]
        
        inputs = tokenizer([new_prompt_style.format(question)], return_tensors='pt').to('cuda')

        outputs = model.generate(
            input_ids = inputs.input_ids,
            attention_mask = inputs.attention_mask,
            max_new_tokens = 1200,
            use_cache = True,
            eos_token_id = tokenizer.eos_token_id
        )

        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
        match = re.search(r"<response>(.*?)</response>", decoded, re.DOTALL)
        predicted = match.group(1).strip() if match else 'Response N/A'

        # Calculate Score
        score = scorer.score(response, predicted)
        rougeL_scores.append(score['rougeL'].fmeasure)

        print(f'Entry {i} complete.')

    avg_score = sum(rougeL_scores) / len(rougeL_scores)
    return avg_score

In [15]:
base_model_score = model_eval(model, sample_ds)

Entry 0 complete.
Entry 1 complete.
Entry 2 complete.
Entry 3 complete.
Entry 4 complete.
Entry 5 complete.
Entry 6 complete.
Entry 7 complete.
Entry 8 complete.
Entry 9 complete.
Entry 10 complete.
Entry 11 complete.
Entry 12 complete.
Entry 13 complete.
Entry 14 complete.
Entry 15 complete.
Entry 16 complete.
Entry 17 complete.
Entry 18 complete.
Entry 19 complete.
Entry 20 complete.
Entry 21 complete.
Entry 22 complete.
Entry 23 complete.
Entry 24 complete.
Entry 25 complete.
Entry 26 complete.
Entry 27 complete.
Entry 28 complete.
Entry 29 complete.
Entry 30 complete.
Entry 31 complete.
Entry 32 complete.
Entry 33 complete.
Entry 34 complete.
Entry 35 complete.
Entry 36 complete.
Entry 37 complete.
Entry 38 complete.
Entry 39 complete.
Entry 40 complete.
Entry 41 complete.
Entry 42 complete.
Entry 43 complete.
Entry 44 complete.
Entry 45 complete.
Entry 46 complete.
Entry 47 complete.
Entry 48 complete.
Entry 49 complete.


In [16]:
lora_model_score = model_eval(model_lora, sample_ds)

Entry 0 complete.
Entry 1 complete.
Entry 2 complete.
Entry 3 complete.
Entry 4 complete.
Entry 5 complete.
Entry 6 complete.
Entry 7 complete.
Entry 8 complete.
Entry 9 complete.
Entry 10 complete.
Entry 11 complete.
Entry 12 complete.
Entry 13 complete.
Entry 14 complete.
Entry 15 complete.
Entry 16 complete.
Entry 17 complete.
Entry 18 complete.
Entry 19 complete.
Entry 20 complete.
Entry 21 complete.
Entry 22 complete.
Entry 23 complete.
Entry 24 complete.
Entry 25 complete.
Entry 26 complete.
Entry 27 complete.
Entry 28 complete.
Entry 29 complete.
Entry 30 complete.
Entry 31 complete.
Entry 32 complete.
Entry 33 complete.
Entry 34 complete.
Entry 35 complete.
Entry 36 complete.
Entry 37 complete.
Entry 38 complete.
Entry 39 complete.
Entry 40 complete.
Entry 41 complete.
Entry 42 complete.
Entry 43 complete.
Entry 44 complete.
Entry 45 complete.
Entry 46 complete.
Entry 47 complete.
Entry 48 complete.
Entry 49 complete.


In [17]:
print(f"\nBase ROUGE-L:\t{base_model_score:.5f}")
print(f"\nLoRA ROUGE-L:\t{lora_model_score:.5f}")


Base ROUGE-L:	0.32806

LoRA ROUGE-L:	0.33147
