<a href="https://colab.research.google.com/github/Su-Mo7743/DeepSeekR1_Fine_tuned/blob/main/Deepseek_R1_model_Finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#install the  unsloth library
# covert runtime processor from cpu to  T4
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [2]:
#Install the packages and run below command
!pip install -q datasets trl transformers

In [3]:
#Then make sure you dowloaded the all packages otherwise it will make impact when pulling the model from HF
#pip show -q  datasets trl transformers unsloth

In [4]:
# Access the Hugingface token
from google.colab import userdata
from huggingface_hub import login
login(userdata.get('HF_TOKEN'))

SecretNotFoundError: Secret HF_TOKEN does not exist.

In [None]:
# Dowloading DeepSeek-R1-Distill-Llama-8B from huggingface using unsloth for faster finetuning

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048
dtype = None
load_in_4bit = True

model,tokenizer = FastLanguageModel.from_pretrained(
    model_name = 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = userdata.get('HF_TOKEN')
)

In [None]:
#Config lora (Low-rank)

In [None]:
#Config lora (Low-rank)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        'q_proj',
        'k_proj',
        'v_proj',
        'o_proj',
        'gate_proj',
        'up_proj',
        'down_proj'
        ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = 'none',
    use_gradient_checkpointing = 'unsloth',
    random_state = 3407,
    use_rslora = False,
    loftq_config = None

    )

In [None]:
#This model template using for the training
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.
### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [None]:
#format the data
EOS_token = tokenizer.eos_token

def formatting_propmts_func(examples):
  input = examples['Question']
  cots = examples['Complex_CoT']
  res = examples['Response']
  text = []
  for input,cots,res in zip(input,cots,res):
    prompt = train_prompt_style.format(input,cots,res)+ EOS_token
    text.append(prompt)
  return  {'text': text}

In [None]:
#Pull data from the HF
# im only taking 100 records
from datasets import load_dataset
data = load_dataset('FreedomIntelligence/medical-o1-reasoning-SFT','en',split='train[:100]', trust_remote_code=True)
dataset = data.map(formatting_propmts_func,batched = True)

In [None]:
# check the how the data looklike
#dataset['text']

In [None]:
# Do supervised-finetuning using the trl and config the parameter
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    args=TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,# Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps = 5,
        max_steps = 60,
        learning_rate=2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported (),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to = "none", # Use this for WandB etc
    )
  )

In [None]:
# Here where we train over data with model. Don't panic it wil take more time around 20 minites
# if you training again it will reduce the loss

In [None]:

training_stats = trainer.train()

In [None]:
#sample question and prompt like the data we have
#Try different question which realted to our data
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.
Please answer the following medical question.
### Question:
{}

### Response:
<think>
{}
</think>"""
#sample question
question = "A 40-year-old female presents with fever, fatigue, and diffuse painful swelling in the midline of the neck. Fine needle aspiration cytology (FNAC) reveals epithelioid cells and giant cells. Based on these clinical and cytological findings, what is the most likely diagnosis?"

In [None]:
# Let's inference to see how our finetuned model looklike take up to max 1 minites
FastLanguageModel.for_inference(model)
input = tokenizer([prompt_style.format(question,"")],return_tensors='pt').to('cuda')
outputs = model.generate(
    input_ids = input.input_ids,
    attention_mask = input.attention_mask,
    max_new_tokens = 1200,
    eos_token_id = tokenizer.eos_token_id,
    use_cache = True
)
response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
print(response[0].split("### Response:")[1])