In [None]:
# source: https://www.datacamp.com/tutorial/fine-tuning-deepseek-r1-reasoning-model
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
# !pip install -U huggingface_hub
import os
from huggingface_hub import login

# Set your token as an environment variable (usually done before)
os.environ["HUGGINGFACE_HUB_TOKEN"] = ""

# Login automatically using the environment variable
login(token=os.environ["HUGGINGFACE_HUB_TOKEN"])

In [None]:
# 1. Install wandb if not already installed
# !pip install -q wandb

# 2. Import
import wandb
import os

# 3. (Recommended) Set WANDB_API_KEY as an environment variable securely
# --- You can paste it directly (not ideal), use Colab Secrets, or upload from a file ---
# BEST PRACTICE: Store token in a separate cell, do not share this
os.environ["WANDB_API_KEY"] = ""

# 4. Login using the env variable
wandb.login(key=os.environ["WANDB_API_KEY"])

# 5. Initialize your run
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Medical COT Dataset',
    job_type="training",
    anonymous="allow"
)

In [None]:
# if dtype is None:
#     dtype = torch.float16  # or whatever is optimal

# If using 4-bit quantization, override dtype to match quant settings
# if load_in_4bit:
#     dtype = torch.float16

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048  # input limit
dtype = None
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = os.environ["HUGGINGFACE_HUB_TOKEN"],
)

In [None]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{}

### Response:
<think>{}"""

In [None]:
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"


FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

In [None]:
# Loading and processing the dataset
# We will slightly change the prompt style for processing the dataset 
# by adding the third placeholder for the complex chain of thought column.

train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [None]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [None]:
# We will load the first 500 samples from the 
# FreedomIntelligence/medical-o1-reasoning-SFT dataset, 
# which is available on the Hugging Face hub. 
# After that, we will map the text column using the formatting_prompts_func function.
from datasets import load_dataset
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train[0:500]",trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset["text"][0]

In [None]:
from datasets import load_dataset

# Training split: first 500
train_dataset = load_dataset(
    "FreedomIntelligence/medical-o1-reasoning-SFT",
    "en",
    split="train[0:500]",
    trust_remote_code=True
)

# Validation split: next 100
val_dataset = load_dataset(
    "FreedomIntelligence/medical-o1-reasoning-SFT",
    "en",
    split="train[500:600]",
    trust_remote_code=True
)

# Test split: next 100
test_dataset = load_dataset(
    "FreedomIntelligence/medical-o1-reasoning-SFT",
    "en",
    split="train[600:700]",
    trust_remote_code=True
)

In [None]:
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
val_dataset = val_dataset.map(formatting_prompts_func, batched=True)
test_dataset = test_dataset.map(formatting_prompts_func, batched=True)

In [None]:
val_dataset

In [None]:
print(val_dataset[0]["text"])

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

training_config = TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=1,
        num_train_epochs = 20,
        # max_steps=60,
        warmup_steps=5,
        learning_rate=2e-4,
        evaluation_strategy="steps",  # or "epoch"
        save_strategy="steps",        # or "epoch"
        eval_steps=1,
        save_steps=1,                 # (Only used with save_strategy="steps")
        logging_steps=1,
        optim="adamw_torch_fused",
        weight_decay=0,               #0.01,
        lr_scheduler_type="constant",     # "cosine", "constant"
        seed=3407,
        output_dir="outputs",
        save_total_limit=2,           # 🧹 Keep only last 2 checkpoints
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        metric_for_best_model="eval_loss",   # 👈 Use validation loss as criterion
        load_best_model_at_end=True,
        greater_is_better=False,             # 👈 Lower loss = better
    )

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset.select(range(20)),
    eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=os.cpu_count(),  # set number of CPU cores to use when tokenizing and mapping
    args=training_config,
)

In [None]:
trainer_stats = trainer.train()

In [None]:
trainer.train(resume_from_checkpoint=True)

In [None]:
trainer.save_model("best_model")
tokenizer.save_pretrained("best_model")

In [None]:
from IPython.display import display, Javascript

# This just disconnects the runtime
display(Javascript('google.colab.kernel.disconnect()'))

In [None]:
metric_for_best_model = "eval_f1",
greater_is_better = "True"
def compute_metrics(eval_preds):
    from sklearn.metrics import f1_score
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return {
        "f1": f1_score(labels, predictions, average="macro")
    }

# Then pass compute_metrics=compute_metrics to the trainer