In [1]:
# install relevant packages

# Install the Unsloth library using pip
!pip install unsloth
# Reinstall Unsloth from GitHub with fresh files
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# Explanation of flags used in the second command:
# --force-reinstall   → Removes the old version and installs a fresh copy.
# --no-cache-dir      → Prevents using old cached files, ensuring a clean install.
# --no-deps           → Skips installing extra dependencies (only installs Unsloth itself).
# git+https://...     → Downloads and installs the latest version directly from GitHub.

Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-3y4tdx45
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-3y4tdx45
  Resolved https://github.com/unslothai/unsloth.git to commit 9390bd528d4126840b142d5c354b8c1d7461f41e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2025.4.8-py3-none-any.whl size=263211 sha256=ef3600c32875b34a0ad29f5443c86e3438bba50a1a815946c7d3a9067530ff29
  Stored in directory: /tmp/pip-ephem-wheel-cache-p3i4qgua/wheels/d1/17/05/850ab10c33284a4763b0595cd8ea9d01fce6e221cac24b3c01
Successfully built unsloth
Installing collected packages: unsloth
 

In [2]:
!pip install unsloth



In [3]:
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-xt9962pa
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-xt9962pa
  Resolved https://github.com/unslothai/unsloth.git to commit 9390bd528d4126840b142d5c354b8c1d7461f41e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2025.4.8-py3-none-any.whl size=263211 sha256=2fc01c7ce6503daf312ffd62edd26de14fd2316dc1cfa452933df28ec222dff3
  Stored in directory: /tmp/pip-ephem-wheel-cache-zoqz3b6t/wheels/d1/17/05/850ab10c33284a4763b0595cd8ea9d01fce6e221cac24b3c01
Successfully built unsloth
Installing collected packages: unsloth
 

In [4]:
#import all relevant libraries
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

from huggingface_hub import login
from transformers import TrainingArguments
from datasets import load_dataset

import wandb
# from kaggle_secrets import UserSecretsClient


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


**Intialize huggingface and wandb tokens**

In [None]:
import wandb
from huggingface_hub import login  # Make sure this is installed: pip install huggingface_hub

# Option 1: Enter your secrets manually (recommended for Colab)
hugging_face_token = input("Enter your Hugging Face token: ")
wnb_token = input("Enter your Weights & Biases token: ")

# Option 2: Upload a JSON file with tokens (if you prefer not typing them each time)
# from google.colab import files
# uploaded = files.upload()  # Upload a file like `secrets.json` with keys "hugging_face" and "wnb"
# import json
# with open("secrets.json") as f:
#     secrets = json.load(f)
# hugging_face_token = secrets["hugging_face"]
# wnb_token = secrets["wnb"]

# Login to Hugging Face and W&B
login(hugging_face_token)
wandb.login(key=wnb_token)

# Initialize W&B run
run = wandb.init(
    project='Fine-tune-DeepSeek-R1',
    job_type="training",
    anonymous="allow"
)


In [None]:
#5b4b47ce1ec09902bfc8e341294ed96bde73e13c
#hf_tok=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

In [7]:
import os

# Disable Flash Attention, Triton-based ops
os.environ["FLASH_ATTENTION_FORCE_DISABLED"] = "1"
os.environ["DISABLE_TRITON"] = "1"
os.environ["USE_FLASH_ATTENTION"] = "0"

In [8]:
# key parameters
max_seq_length = 2048
dtype = None
load_in_4bit = True

In [9]:
#load the Deepseek R1 model and tokenizer using unsolth
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hugging_face_token,
)

==((====))==  Unsloth 2025.4.8: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [10]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{}

### Response:
<think>{}"""

In [11]:
question = """A 50-year-old man with a history of chronic alcohol use presents with confusion, ataxia,
              and ophthalmoplegia. His wife reports that he has been forgetful
              and has difficulty recalling recent events. What is the most likely diagnosis,
              and what immediate treatment should be administered?"""

In [12]:
#Enable Optimized inference mode for unsloth models
FastLanguageModel.for_inference(model)

# Format the question using the structured prompt
inputs = tokenizer([prompt_style.format(question, "")], return_tensors = "pt").to("cuda")

#Generate the response using the model
outputs = model.generate(
    input_ids = inputs.input_ids,
    attention_mask = inputs.attention_mask,
    max_new_tokens = 1200,
    use_cache = True
)

#Decode the output response in the readable text
response = tokenizer.batch_decode(outputs)

#Show and print only relevant response
print(response[0].split("### Response:")[1])


<think>
Alright, so I'm trying to figure out the most likely diagnosis for this 50-year-old man with a history of chronic alcohol use. He's presenting with confusion, ataxia, and ophthalmoplegia. His wife also mentioned he's been forgetful and has trouble remembering recent events. Okay, let's break this down step by step.

First, I know that chronic alcohol use can lead to various neurological issues. The symptoms he's showing—confusion, ataxia (which is a loss of coordination and balance), and ophthalmoplegia (paralysis or weakness of the eye muscles, often affecting the eyes' movement)—seem to point towards something related to his alcohol use.

I remember that when the brain's cerebellum is affected, it can cause coordination problems like ataxia. The cerebellum is responsible for coordination and balance. Chronic alcohol consumption can damage this area, leading to a condition called cerebellar atrophy. This atrophy can result in ataxia and other signs like unsteady gait.

Ophtha

In [13]:
# step 1:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [14]:
# step 2: Download the dataset using huggingface and format it for tuning
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split = "train[0:1000]", trust_remote_code = True)

In [15]:
dataset

Dataset({
    features: ['Question', 'Complex_CoT', 'Response'],
    num_rows: 1000
})

In [16]:
dataset[0]

{'Question': 'Given the symptoms of sudden weakness in the left arm and leg, recent long-distance travel, and the presence of swollen and tender right lower leg, what specific cardiac abnormality is most likely to be found upon further evaluation that could explain these findings?',
 'Complex_CoT': "Okay, let's see what's going on here. We've got sudden weakness in the person's left arm and leg - and that screams something neuro-related, maybe a stroke?\n\nBut wait, there's more. The right lower leg is swollen and tender, which is like waving a big flag for deep vein thrombosis, especially after a long flight or sitting around a lot.\n\nSo, now I'm thinking, how could a clot in the leg end up causing issues like weakness or stroke symptoms?\n\nOh, right! There's this thing called a paradoxical embolism. It can happen if there's some kind of short circuit in the heart - like a hole that shouldn't be there.\n\nLet's put this together: if a blood clot from the leg somehow travels to the l

In [17]:
# we need to format the dataset to fit our prompt style
end_of_sentence_token = tokenizer.eos_token
end_of_sentence_token

'<｜end▁of▁sentence｜>'

In [18]:
#formitting prompt function
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + end_of_sentence_token
        texts.append(text)
    return {
        "text": texts,
    }

In [19]:
# update dataset formatting
dataset_finetune = dataset.map(formatting_prompts_func, batched = True,)
dataset_finetune["text"][0]

"Below is an instruction that describes a task, paired with an input that provides further context. \nWrite a response that appropriately completes the request. \nBefore answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n\n### Instruction:\nYou are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \nPlease answer the following medical question. \n\n### Question:\nGiven the symptoms of sudden weakness in the left arm and leg, recent long-distance travel, and the presence of swollen and tender right lower leg, what specific cardiac abnormality is most likely to be found upon further evaluation that could explain these findings?\n\n### Response:\n<think>\nOkay, let's see what's going on here. We've got sudden weakness in the person's left arm and leg - and that screams something neuro-related, maybe a stroke?\n\nBut wait, there's more. The right lower leg i

In [20]:
model_lora = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2025.4.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [21]:
!pip install -U bitsandbytes triton



In [24]:
# Initialize the finetune trainer
trainer = SFTTrainer(
    model = model_lora,
    tokenizer = tokenizer,
    train_dataset = dataset_finetune,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,  # Reduced from 2 to lower memory usage

    #training arguments
     args=TrainingArguments(
        per_device_train_batch_size=1,  # Reduced from 2
        gradient_accumulation_steps=8,  # Increased from 4 to compensate for smaller batch size
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        # Add gradient checkpointing to save memory
        gradient_checkpointing=True,
    ),
    # Add packing option to False if you're having issues
    packing=False,
)

# Run training with explicit error catching
try:
    trainer_stats = trainer.train()
except RuntimeError as e:
    print(f"Error details: {e}")
    # If it's CUDA OOM, you might see this in the error
    if "CUDA out of memory" in str(e):
        print("This is a CUDA memory error. Try further reducing batch size or model size.")
    elif "PassManager::run failed" in str(e):
        print("This might be related to PyTorch/CUDA optimization passes failing.")

Unsloth: Tokenizing ["text"]:   0%|          | 0/1000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss


Error details: PassManager::run failed
This might be related to PyTorch/CUDA optimization passes failing.


In [25]:
import wandb
# Save the fine-tuned model
wandb.finish()

In [26]:
question = "A 25-year-old woman presents with unintentional weight loss, heat intolerance, and frequent episodes of palpitations. On examination, she has a diffusely enlarged, non-tender thyroid gland and exophthalmos. Her laboratory results show low TSH and elevated free T3 and T4. What is the most likely diagnosis, and what are the treatment options?"


FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


<think>
Okay, so I'm trying to figure out the diagnosis and treatment for this 25-year-old woman who's presenting with unintentional weight loss, heat intolerance, and frequent palpitations. She has an enlarged thyroid gland that's not tender and exophthalmos. The labs show low TSH and high free T3 and T4. Hmm, let me break this down.

First, the symptoms: weight loss, heat intolerance, and palpitations. Those are classic signs of hyperthyroidism, but I should consider other possibilities too. She's young, so it's not as common as in older adults. Let's think about the thyroid function tests. Low TSH and elevated free T3 and T4. That points towards hyperthyroidism because TSH is low when the thyroid is overactive, and free T4/T3 levels are high.

Now, the thyroid exam: diffusely enlarged, non-tender. That suggests it's not a nodular goiter but a more generalized enlargement. Exophthalmos is present, which is when the eyes bulge out because the thyroid is pushing forward. That's a sign

In [27]:
question = "A 60-year-old man with a history of smoking presents with persistent cough, hemoptysis, and unintentional weight loss. Imaging reveals a lung mass, and laboratory tests show hypercalcemia. What is the most likely underlying cause of his hypercalcemia, and what is the probable diagnosis?"

inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


<think>
Okay, let's tackle this medical case. So, we have a 60-year-old man with a history of smoking. He's presenting with a persistent cough, hemoptysis, and unintentional weight loss. Imaging shows a lung mass, and his lab results show hypercalcemia. The question is asking for the most likely underlying cause of his hypercalcemia and the probable diagnosis.

First, I'll start by breaking down the key symptoms and findings. Persistent cough and hemoptysis are classic signs of lung disease, especially in someone who smokes. Smoking is a major risk factor for both chronic obstructive pulmonary disease (COPD) and lung cancer. The unintentional weight loss could be due to various factors, including the underlying lung disease itself, inflammation, or perhaps something more systemic.

Now, the hypercalcemia is interesting. High calcium levels can be caused by several conditions. The primary types are primary hyperparathyroidism (PHPT), secondary hyperparathyroidism, tertiary hyperparathy

In [28]:
new_model_online = "kingabzpro/DeepSeek-R1-Medical-COT"
new_model_local = "DeepSeek-R1-Medical-COT"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)

('DeepSeek-R1-Medical-COT/tokenizer_config.json',
 'DeepSeek-R1-Medical-COT/special_tokens_map.json',
 'DeepSeek-R1-Medical-COT/tokenizer.json')