In [None]:
!pip install unsloth # install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # Also get the latest version Unsloth!


In [None]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from unsloth import is_bfloat16_supported
from huggingface_hub import login
from transformers import TrainingArguments
from datasets import load_dataset
import wandb

In [None]:
from google.colab import userdata
hf_token  = userdata.get('HF_Token')
login(hf_token)

In [None]:
from os import name
wnb_oken = userdata.get('WNB_Token')
wandb.login(key=wnb_oken)

run = wandb.init(
    project="Fine tuning DeepSeek R1",
)

# **1. Setup Pretrained DeepSeek R1**




In [None]:
model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B"
max_seq_lenth = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_lenth,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token
)


# **2. Setting up System Prompt**

In [None]:
train_prompt_style  = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

# **3. Prepare the data for fine-tuning**




In [None]:
EOS_token = tokenizer.eos_token

def formatting_prompt_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []

    for inputs, cots, response in zip(inputs, cots, outputs): # Fixed: Corrected indentation for the for loop and corrected the arguments within zip to use the variables defined earlier in the function.
        text = train_prompt_style.format(inputs, cots, response) + EOS_token
        texts.append(text) # Fixed: Append to texts list instead of text.

    return {"text": texts} # Fixed: Return the accumulated texts list instead of a single text.

# **4. Load the Dataset**

In [None]:
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split="train[:400]",trust_remote_code=True)
dataset = dataset.map(formatting_prompt_func, batched=True)
dataset["text"][0]

# 5. Adding the low-rank adopter to the model.

In [None]:
lora_model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [

        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

# **6. Training**

In [None]:
trainer = SFTTrainer(
    model=lora_model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length = max_seq_lenth,
    dataset_num_proc = 1,

     # Define training args
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 1,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir = "outputs",

    )

)

In [None]:
trainer_stats = trainer.train()

In [None]:
wandb.finish()

In [None]:
question = """Given the symptoms of sudden weakness in the left arm and leg, recent long-distance travel, and the presence of swollen and tender right lower leg, what specific cardiac abnormality is most likely to be found upon further evaluation that could explain these findings?

"""

FastLanguageModel.for_inference(model)

inputs = tokenizer([train_prompt_style.format(question, "","")], return_tensors="pt").to("cuda")

outputs = lora_model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
    )
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])

In [None]:
new_model_online = "ABRm15/DeepSeek-R1-Fine-tuned-Medical"
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)

model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")