In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
%%capture

!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # Also get the latest version Unsloth!

# Import Libraries

In [5]:
# Modules for fine-tuning
from unsloth import FastLanguageModel
import torch # Import PyTorch
from trl import SFTTrainer # Trainer for supervised fine-tuning (SFT)
from unsloth import is_bfloat16_supported # Checks if the hardware supports bfloat16 precision
# Hugging Face modules
from huggingface_hub import login # Lets you login to API
from transformers import TrainingArguments # Defines training hyperparameters
from datasets import load_dataset # Lets you load fine-tuning datasets
# Import weights and biases
import wandb
# Import kaggle secrets
from kaggle_secrets import UserSecretsClient

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-04-21 13:55:30.192326: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745243730.474514      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745243730.554257      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Unsloth: Failed to patch Gemma3ForConditionalGeneration.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [7]:
# Initialize Hugging Face & WnB tokens
user_secrets = UserSecretsClient() # from kaggle_secrets import UserSecretsClient
hugging_face_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
wnb_token = user_secrets.get_secret("?Api_Key")

# Login to Hugging Face
login(hugging_face_token) # from huggingface_hub import login

# Login to WnB
wandb.login(key=wnb_token) # import wandb
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Medical COT Dataset_YouTube Walkthrough', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlolowaelmo456[0m ([33mlolowaelmo456-cairo-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


# Load the Model

In [8]:
import torch
from unsloth import FastLanguageModel

model,tokenizer=FastLanguageModel.from_pretrained(
    model_name='unsloth/DeepSeek-R1-Distill-Llama-8B',
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True
)

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.1.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [9]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0): LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((409

# Test Model before Finetune

In [13]:
from unsloth import apply_chat_template

# Define a system prompt under prompt_style 
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>{}"""

In [17]:
question = """A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or 
              sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, 
              what would cystometry most likely reveal about her residual volume and detrusor contractions?"""

question=prompt_style.format(question,"")
print(question)

Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or 
              sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, 
              what would cystometry most likely reveal about her residual volume and detrusor contractions?

### Response:
<think>


In [26]:
FastLanguageModel.for_inference(model) #improves speed and efficiency

input_tokens=tokenizer([question],return_tensors='pt').to('cuda')
output=model.generate(
    input_ids=input_tokens.input_ids,
    attention_mask=input_tokens.attention_mask,
    max_new_tokens=300,
    use_cache=True,
)

response=tokenizer.batch_decode(output)
print(response[0].split('### Response:')[1])


<think>
Okay, so I need to figure out what cystometry would show for this 61-year-old woman. Let me start by going through the information given.

She has a history of involuntary urine loss when she coughs or sneezes, but she doesn't leak at night. That makes me think about possible causes. Involuntary leakage during these activities is often related to stress incontinence, which usually involves the urethra and bladderneck. But since she doesn't leak at night, it's less likely to be related to genuine urinary retention or overactive bladder, which typically affects nighttime.

She underwent a gynecological exam and a Q-tip test. I'm not exactly sure what the Q-tip test entails, but I think it's used to check for urethral obstruction. If the Q-tip remains in the urethra after coughing, it might indicate obstruction, which could be due to a urethral stricture, tumor, or other narrowing. But if it falls out easily, it suggests there's no obstruction.

Now, considering her symptoms, if 

# Data Setup

In [27]:
# Updated training prompt style to add </think> tag 
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>
{}
</think>
{}"""


In [29]:
data= load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train[0:500]",trust_remote_code=True) # Keep only first 500 rows

In [30]:
data

Dataset({
    features: ['Question', 'Complex_CoT', 'Response'],
    num_rows: 500
})

In [35]:
print(data['Question'][0],'\n\n\n',data['Complex_CoT'][0],'\n\n\n',data['Response'][0])

A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions? 


 Okay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her abdominal pressure like coughing or sneezing. This sounds a lot like stress urinary incontinence to me. Now, it's interesting that she doesn't have any issues at night; she isn't experiencing leakage while sleeping. This likely means her bladder's ability to hold urine is fine when she isn't under physical stress. Hmm, that's a clue that we're dealing with something related to pressure rather than a bladder muscle problem. 

The fact that she underwent a Q-tip test is intriguing too. This test is usually done to assess 

>**Next step is to structure the fine-tuning dataset according to train prompt style—why?**
>
> - Each question is paired with chain-of-thought reasoning and the final response.
> - Ensures every training example follows a consistent pattern.
> - Prevents the model from continuing beyond the expected response lengt by adding the EOS token.

In [36]:
eos_token=tokenizer.eos_token
eos_token

'<｜end▁of▁sentence｜>'

In [39]:
def create_prompt(thinking,question,answer):
  return train_prompt_style.format(question,thinking,answer)+eos_token

In [62]:
from datasets import Dataset

prompts = []
for row in data:
    question = row["Question"]
    thinking = row["Complex_CoT"]
    answer = row["Response"]
    prompts.append(create_prompt(thinking, question, answer))

# Create a new dataset dictionary or save as needed
dataset = {"text": prompts}

# Assuming `dataset` is currently a dict with a "text" key
dataset = Dataset.from_dict(dataset)

In [63]:
print(dataset['text'][0])

Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?

### Response:
<think>
Okay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her abdominal pressu

# PEFT setup

In [58]:
from peft import LoraConfig, get_peft_model, TaskType

config=LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0,
    bias=None,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",'gate_proj','up_proj','down_proj'],
)

In [59]:
model=FastLanguageModel.get_peft_model(
    model,
    loftq_config =config,
    use_gradient_checkpointing='unsloth',
    use_rslora = False,
    random_state=42,
)

TypeError: Unsloth: Your model already has LoRA adapters. Your new parameters are different.

In [60]:
model.print_trainable_parameters()

trainable params: 41,943,040 || all params: 8,072,204,288 || trainable%: 0.5196


# Train the model

🧠 Summary
* Use Trainer if you're training classification models, regressors, token classifiers, or anything more general.

* Use SFTTrainer when you're fine-tuning instruction-following LLMs (like LLaMA, Mistral, etc.) on text-to-text datasets.



In [64]:
args=TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    max_steps=60,
    warmup_steps=5,
    learning_rate=2e-4,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    logging_steps = 10,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir = "outputs",
    report_to = "none", # Use this for WandB etc    
)

trainer=SFTTrainer(
    model=model,
    train_dataset=dataset,
    tokenizer=tokenizer,
    dataset_text_field='text',
    max_seq_length = 2048,
    dataset_num_proc = 2, # Uses 2 CPU threads to speed up data preprocessing
    args=args,
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

In [65]:
history=trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 500 | Num Epochs = 2 | Total steps = 60
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.9153
20,1.4213
30,1.368
40,1.3308
50,1.2954
60,1.3274


In [66]:
# Save the fine-tuned model
wandb.finish()

# Run model after fine-tuning

In [71]:
question="""A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or 
              sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, 
              what would cystometry most likely reveal about her residual volume and detrusor contractions?"""
question=prompt_style.format(question,"")

input_tokens=tokenizer([question],return_tensors='pt').to('cuda')
output=model.generate(
    input_ids=input_tokens.input_ids,
    attention_mask=input_tokens.attention_mask,
    max_new_tokens=1000,
    use_cache=True,
)

response=tokenizer.batch_decode(output)
print(response[0].split('### Response:')[1])


<think>
Okay, let's think about this. We've got a 61-year-old woman with a history of involuntary urine loss. That usually means she's experiencing some kind of bladder control issue, like a weak pelvic floor or maybe some nerve damage affecting her bladder control. But she's not losing urine at night, which is interesting. It suggests that she's not having any leakage when she's lying down, maybe because her bladder doesn't get full during the night.

Now, she's undergoing a gynecological exam and a Q-tip test. The Q-tip test is a standard way to check for urethral obstruction. It involves inserting a catheter and then pulling it back to see how far it can be withdrawn without resistance. If it's easy to pull out, it usually means there's no obstruction. If it's hard, we might suspect some kind of obstruction or blockage.

Given that she's got a history of involuntary loss, we're looking for a bladder that might not be contracting well, especially when she coughs or sneezes. That sou

In [72]:
question="""A 21-year-old woman with coughing and sneezing"""
question=prompt_style.format(question,"")

input_tokens=tokenizer([question],return_tensors='pt').to('cuda')
output=model.generate(
    input_ids=input_tokens.input_ids,
    attention_mask=input_tokens.attention_mask,
    max_new_tokens=1000,
    use_cache=True,
)

response=tokenizer.batch_decode(output)
print(response[0].split('### Response:')[1])


<think>
Alright, so we've got a 21-year-old woman who's been coughing and sneezing. That sounds like it could be a respiratory infection, like maybe a cold or flu. She's coughing, which is pretty common with those kinds of infections. Sneezing also fits right in with respiratory issues, especially if it's something like the flu or rhinosinusitis.

Hmm, I should think about what's going on with her. The sneezing and coughing could mean she's dealing with something like rhinosinusitis, which can happen when the nasal passages get infected. That can spread down to the throat and cause coughing. It's not uncommon for people to have both sneezing and coughing when they have a sinus infection.

Now, let's think about her symptoms. She's coughing and sneezing, which are pretty standard for a respiratory infection. But there are other things we need to consider. Are there any other symptoms that could help us figure out what's going on? Maybe she's also having a fever or a sore throat? That w