# ***Install Relevant Packages***

In [1]:
# install relevant packages

# Install the Unsloth library using pip
!pip install unsloth
# Reinstall Unsloth from GitHub with fresh files
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# Explanation of flags used in the second command:
# --force-reinstall   → Removes the old version and installs a fresh copy.
# --no-cache-dir      → Prevents using old cached files, ensuring a clean install.
# --no-deps           → Skips installing extra dependencies (only installs Unsloth itself).
# git+https://...     → Downloads and installs the latest version directly from GitHub.

Collecting unsloth
  Downloading unsloth-2025.3.18-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.3.14 (from unsloth)
  Downloading unsloth_zoo-2025.3.16-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.17-py3-none-any.whl.metadata (9.5 kB)
Collecting transformers!=4.47.0,>=4.46.1 (from unsloth)
  Downloading transformers-4.50.0-py3-none-any.whl.metadata (39 kB)
Collecting trl!=0.15.0,!=0.

# ***Import all relevant libraries***

In [2]:
#import all relevant libraries
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

from huggingface_hub import login
from transformers import TrainingArguments
from datasets import load_dataset

import wandb
from kaggle_secrets import UserSecretsClient

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# ***Intialize huggingface and wandb tokens***

In [3]:
user_secrets = UserSecretsClient()

hugging_face_token = user_secrets.get_secret("hugging_face")
wnb_token = user_secrets.get_secret("wnb")

In [4]:
#login to hugging face
login(hugging_face_token)

In [5]:
wandb.login(key=wnb_token)
run = wandb.init(
    project='Fine-tune-DeepSeek-R1', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msainkan001[0m ([33msainkan001-bymind-solutions[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Tracking run with wandb version 0.19.1
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250324_180826-917pvwyt[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mrare-leaf-12[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/sainkan001-bymind-solutions/Fine-tune-DeepSeek-R1[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/sainkan001-bymind-solutions/Fine-tune-DeepSeek-R1/runs/917pvwyt[0m


# ***Load the Model and Tokenizer***

In [6]:
# key parameters
max_seq_length = 2048
dtype = None
load_in_4bit = True

In [7]:
#load the Deepseek R1 model and tokenizer using unsolth
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit, 
    token = hugging_face_token, 
)

==((====))==  Unsloth 2025.3.18: Fast Llama patching. Transformers: 4.50.0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

# ***Test the Model before finetune***

In [8]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>{}"""

In [9]:
question = """A 50-year-old man with a history of chronic alcohol use presents with confusion, ataxia, 
              and ophthalmoplegia. His wife reports that he has been forgetful 
              and has difficulty recalling recent events. What is the most likely diagnosis, 
              and what immediate treatment should be administered?"""

In [10]:
#Enable Optimized inference mode for unsloth models
FastLanguageModel.for_inference(model)

# Format the question using the structured prompt
inputs = tokenizer([prompt_style.format(question, "")], return_tensors = "pt").to("cuda")

#Generate the response using the model
outputs = model.generate(
    input_ids = inputs.input_ids,
    attention_mask = inputs.attention_mask,
    max_new_tokens = 1200,
    use_cache = True
)

#Decode the output response in the readable text
response = tokenizer.batch_decode(outputs)

#Show and print only relevant response
print(response[0].split("### Response:")[1])


<think>
Okay, so I've got this medical case to think through. Let me start by breaking down the information given. The patient is a 50-year-old man with a history of chronic alcohol use. He's presenting with confusion, ataxia, and ophthalmoplegia. His wife mentions he's been forgetful and has trouble remembering recent events.

First, I need to consider the possible causes for these symptoms. I know that chronic alcohol use can lead to various neurological issues. The confusion and forgetfulness make me think of Wernicke-Korsakoff syndrome, which is a condition often seen in alcoholics. It's characterized by both cognitive deficits and oculovestibular issues.

Wait, the patient has ophthalmoplegia, which is the inability to move the eyes properly. In Wernicke's, there's usually nystagmus (involuntary eye movements) and ophthalmoplegia, especially the lateral gaze palsy. That fits here. Also, the ataxia is a sign of incoordination, which is present in Wernicke-Korsakoff.

Another thoug

# ***Finetune Step by Step***

In [11]:
# step 1:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [12]:
# step 2: Download the dataset using huggingface and format it for tuning
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split = "train[0:1000]", trust_remote_code = True)

README.md: 0.00B [00:00, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/74.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25371 [00:00<?, ? examples/s]

In [13]:
dataset

Dataset({
    features: ['Question', 'Complex_CoT', 'Response'],
    num_rows: 1000
})

In [14]:
dataset[0]

{'Question': 'A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?',
 'Complex_CoT': "Okay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her abdominal pressure like coughing or sneezing. This sounds a lot like stress urinary incontinence to me. Now, it's interesting that she doesn't have any issues at night; she isn't experiencing leakage while sleeping. This likely means her bladder's ability to hold urine is fine when she isn't under physical stress. Hmm, that's a clue that we're dealing with something related to pressure rather than a bladder muscle problem. \n\nThe fact that she underwent a Q-tip test is intriguing too. This 

In [15]:
# we need to format the dataset to fit our prompt style
end_of_sentence_token = tokenizer.eos_token
end_of_sentence_token

'<｜end▁of▁sentence｜>'

In [16]:
#formitting prompt function
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + end_of_sentence_token
        texts.append(text)
    return {
        "text": texts,
    }

In [17]:
# update dataset formatting
dataset_finetune = dataset.map(formatting_prompts_func, batched = True,)
dataset_finetune["text"][0]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

"Below is an instruction that describes a task, paired with an input that provides further context. \nWrite a response that appropriately completes the request. \nBefore answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n\n### Instruction:\nYou are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \nPlease answer the following medical question. \n\n### Question:\nA 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?\n\n### Response:\n<think>\nOkay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her ab

In [18]:
model_lora = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,  
    loftq_config=None,
)

Unsloth 2025.3.18 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [19]:
# Initialize the finetune trainer
trainer = SFTTrainer(
    model = model_lora,
    tokenizer = tokenizer,
    train_dataset = dataset_finetune,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,

    #training arguments
     args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
    
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [20]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.8735
20,1.4355
30,1.3791
40,1.3259
50,1.3587
60,1.3434


In [21]:
import wandb
# Save the fine-tuned model
wandb.finish()

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         train/epoch ▁▂▄▅▇██
[34m[1mwandb[0m:   train/global_step ▁▂▄▅▇██
[34m[1mwandb[0m:     train/grad_norm █▂▂▁▁▁
[34m[1mwandb[0m: train/learning_rate █▇▅▄▂▁
[34m[1mwandb[0m:          train/loss █▂▂▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:               total_flos 4.017529745276928e+16
[34m[1mwandb[0m:              train/epoch 0.96
[34m[1mwandb[0m:        train/global_step 60
[34m[1mwandb[0m:          train/grad_norm 0.18423
[34m[1mwandb[0m:      train/learning_rate 0
[34m[1mwandb[0m:               train/loss 1.3434
[34m[1mwandb[0m:               train_loss 1.45268
[34m[1mwandb[0m:            train_runtime 2692.2537
[34m[1mwandb[0m: train_samples_per_second 0.357
[34m[1mwandb[0m:   train_steps_per_second 0.022
[34m[1mwandb[0m: 
[34m[

In [22]:
question = "A 25-year-old woman presents with unintentional weight loss, heat intolerance, and frequent episodes of palpitations. On examination, she has a diffusely enlarged, non-tender thyroid gland and exophthalmos. Her laboratory results show low TSH and elevated free T3 and T4. What is the most likely diagnosis, and what are the treatment options?"


FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


<think>
Alright, let's see what we're dealing with here. We've got a 25-year-old woman who's losing weight unintentionally, feeling hot all the time, and having these sudden palpitations. That's a pretty unusual combination of symptoms. Let's break it down piece by piece.

First off, the palpitations. That's interesting because it's not just a single episode, but rather frequent episodes. I'm thinking there's something causing her heart rate to spike. Maybe a hyperthyroidism? Because I've heard that in hyperthyroidism, the heart can beat really fast, especially in a condition like Graves' disease.

Now, let's look at the thyroid aspect. The thyroid gland is enlarged and diffusely so. I've read that in Graves' disease, the thyroid can grow quite a bit. Plus, the exophthalmos is a big clue. Graves' disease is known for causing the eyes to bulge out because of the pressure from the enlarged thyroid gland.

Oh, and what about the lab results? Low TSH and high free T3 and T4. That definite

In [23]:
question = "A 60-year-old man with a history of smoking presents with persistent cough, hemoptysis, and unintentional weight loss. Imaging reveals a lung mass, and laboratory tests show hypercalcemia. What is the most likely underlying cause of his hypercalcemia, and what is the probable diagnosis?"

inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


<think>
Okay, let's think about this. We've got a 60-year-old man who's been smoking, which is a red flag because smoking is linked to lung problems. He's got a cough, coughing up blood, and he's losing weight without intending to. These symptoms make me think of lung cancer, especially given the smoking history.

Now, there's a lung mass on the imaging, which fits with this idea. But then there's the hypercalcemia, which is high levels of calcium in the blood. That's puzzling because lung cancer doesn't usually cause hypercalcemia. It makes me think there's something else going on.

Hypercalcemia often happens when there's a problem with the parathyroid glands. These glands are responsible for regulating calcium levels, so if they're not working right, you could end up with high calcium. The most common cause of hypercalcemia is hyperparathyroidism, which is when the parathyroid glands produce too much parathyroid hormone, leading to excess calcium.

So, considering all this, the man

In [24]:
new_model_online = "kingabzpro/DeepSeek-R1-Medical-COT"
new_model_local = "DeepSeek-R1-Medical-COT"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)

('DeepSeek-R1-Medical-COT/tokenizer_config.json',
 'DeepSeek-R1-Medical-COT/special_tokens_map.json',
 'DeepSeek-R1-Medical-COT/tokenizer.json')