In [69]:
import warnings
warnings.filterwarnings(action="ignore")

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

import os
from IPython.display import Markdown, display

import re
import time
from uuid import uuid4
from bs4 import BeautifulSoup
from langchain.tools import Tool
from langchain_chroma import Chroma
from langchain_core.documents import Document
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_community.utilities import ApifyWrapper
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.tools.retriever import create_retriever_tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [65]:
DBclientId = os.getenv("DBclientId")
DBtoken = os.getenv("DBtoken")
OpenAIApiKey = os.getenv("OPENAI_API_KEY")
PineConeAPIKey = os.getenv("PineConeAPI")
ApifyToken = os.getenv("ApifyToken")

pc = Pinecone(api_key=PineConeAPIKey)

name = "agent-db-test"

pc.create_index(
        name=name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"))


index = pc.Index(name=name)

apify = ApifyWrapper(apify_api_token=ApifyToken)
llm_model = ChatOpenAI(model="gpt-4o-mini", api_key=OpenAIApiKey)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = PineconeVectorStore(index=index, 
                                   embedding=embeddings, 
                                   pinecone_api_key=PineConeAPIKey)

In [80]:
def CreateDocRetrievalTool(url:str, llm):

    def extract_code_blocks(html_content):
        """Extracts all code blocks from HTML."""
        soup = BeautifulSoup(html_content, "html.parser")
        
        # Extract <code> elements inside <pre> (formatted code blocks)
        code_blocks = [code.get_text() for code in soup.find_all("code")]
        
        return "\n\n".join(code_blocks) if code_blocks else ""

    loader = apify.call_actor(
        actor_id="apify/website-content-crawler",
        run_input={
            "startUrls": [{"url": "https://www.datacamp.com/tutorial/fine-tuning-deepseek-r1-reasoning-model"}]
        },
        dataset_mapping_function=lambda item: Document(
            page_content=item.get("text", "") + "\n\n" + extract_code_blocks(item.get("html", "")),
            metadata={"source": item["url"]}
        )
    )
    
    loaded_doc = loader.load()
    
    txt_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, 
                                              chunk_overlap=100,
                                              length_function=len,
                                              is_separator_regex=False)
    
    splitted_doc = txt_splitter.split_documents(loaded_doc)

    uuids = [str(uuid4()) for _ in range(len(splitted_doc))]

    vector_store.add_documents(documents=splitted_doc, ids=uuids)

    retriever = vector_store.as_retriever(search_type="similarity")

    retriever_tool = create_retriever_tool(retriever=retriever,
                                           name="database_retriever_tool", 
                                           description="Retrieve relevant information from the document database")

    print("Done")
    
    return retriever_tool

In [82]:
db_retrieval_tool = CreateDocRetrievalTool(url="https://www.datacamp.com/tutorial/fine-tuning-deepseek-r1-reasoning-model", 
                                           llm=llm_model)

Done


In [83]:
bot_template = ChatPromptTemplate.from_messages([
    ("system", (
        "You are a strict retrieval-based assistant called Eddy. "
        "You **must** always retrieve the answer using the available tools and **must not** generate or modify the response. "
        "Return the answer **exactly as retrieved**, word for word, without paraphrasing or summarizing. "
        "If no answer is found, respond with: 'No matching answer found in the database.' "
        "Format the answer in Markdown exactly as retrieved."
    )),
    MessagesPlaceholder(variable_name="chat_history", optional=True),
    ("user", "{input}"),
    MessagesPlaceholder("agent_scratchpad")
])

In [84]:
agent_with_tools = create_tool_calling_agent(llm=llm_model, 
                                             prompt=bot_template,
                                             tools=[db_retrieval_tool])

bot_agent = AgentExecutor(agent=agent_with_tools, 
                          tools=[db_retrieval_tool],
                          verbose=True,
                          handle_parsing_errors=True)

In [90]:
display(Markdown(bot_agent.invoke({"input": "Steps in Fine-Tuning DeepSeek R1. Include the codes involved."})["output"]))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `database_retriever_tool` with `{'query': 'Steps in Fine-Tuning DeepSeek R1. Include the codes involved.'}`


[0m[36;1m[1;3mAashi Dutt 
8 min
tutorial
DeepSeek R1 Demo Project With Gradio and EasyOCR
In this DeepSeek-R1 tutorial, you'll learn how to build a math puzzle solver app by integrating DeepSeek-R1 with EasyOCR and Gradio.
Aashi Dutt 
12 min
See MoreSee More
EnglishEspañolBetaPortuguêsBetaDeutschBetaFrançaisBeta
Found an Error?

from trl import SFTTrainer from transformers import TrainingArguments from unsloth import is_bfloat16_supported trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=max_seq_length, dataset_num_proc=2, args=TrainingArguments( per_device_train_batch_size=2, gradient_accumulation_steps=4, # Use num_train_epochs = 1, warmup_ratio for full training runs! warmup_steps=5, max_steps=60, learning_rate=2e-4, fp16=not is_bf

```markdown
To fine-tune the DeepSeek R1 model, you can follow the steps below:

1. **Setting up**
   For this project, we are using Kaggle as our Cloud IDE because it provides free access to GPUs, which are often more powerful than those available in Google Colab. To get started, launch a new Kaggle notebook and add your Hugging Face token and Weights & Biases token as secrets. You can add secrets by navigating to the Add-ons tab in the Kaggle notebook interface and selecting the Secrets option. After setting up the secrets, install the unsloth Python package. Unsloth is an open-source framework designed to make fine-tuning large language models (LLMs) 2X faster and more memory-efficient.

   ```python
   %%capture 
   !pip install unsloth 
   !pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
   ```

   Log in to the Hugging Face CLI using the Hugging Face API that we securely extracted from Kaggle Secrets.

   ```python
   from huggingface_hub import login 
   from kaggle_secrets import UserSecretsClient 
   user_secrets = UserSecretsClient() 
   hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN") 
   login(hf_token)
   ```

   Log in to Weights & Biases (wandb) using your API key and create a new project to track the experiments and fine-tuning progress.

   ```python
   import wandb 
   wb_token = user_secrets.get_secret("wandb") 
   wandb.login(key=wb_token) 
   run = wandb.init(project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Medical COT Dataset', job_type="training", anonymous="allow")
   ```

2. **Loading the model and tokenizer**
   For this project, we are loading the Unsloth version of DeepSeek-R1-Distill-Llama-8B. Additionally, we will load the model in 4-bit quantization to optimize memory usage and performance.

   ```python
   from unsloth import FastLanguageModel 
   max_seq_length = 2048 
   dtype = None 
   load_in_4bit = True 
   model, tokenizer = FastLanguageModel.from_pretrained(model_name="unsloth/DeepSeek-R1-Distill-Llama-8B", max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit, token=hf_token)
   ```

3. **Model inference before fine-tuning**
   To create a prompt style for the model, we will define a system prompt and include placeholders for the question and response generation.

   ```python
   prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response. ### Instruction: You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question. ### Question: {} ### Response: <think>{}"""
   ```

   In this example, we will provide a medical question to the prompt_style, convert it into tokens, and then pass the tokens to the model for response generation.

   ```python
   question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?" 
   FastLanguageModel.for_inference(model) 
   inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda") 
   outputs = model.generate(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=1200, use_cache=True) 
   response = tokenizer.batch_decode(outputs) 
   print(response[0].split("### Response:")[1])
   ```

4. **Loading and processing the dataset**
   We will slightly change the prompt style for processing the dataset by adding the third placeholder for the complex chain of thought column.

   ```python
   train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response. ### Instruction: You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question. ### Question: {} ### Response: <think> {} </think> {}"""
   ```

   Write the Python function that will create a "text" column in the dataset, which consists of the train prompt style. Fill the placeholders with questions, chains of text, and answers.

   ```python
   EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN 

   def formatting_prompts_func(examples): 
       inputs = examples["Question"] 
       cots = examples["Complex_CoT"] 
       outputs = examples["Response"] 
       texts = [] 
       for input, cot, output in zip(inputs, cots, outputs): 
           text = train_prompt_style.format(input, cot, output) + EOS_TOKEN 
           texts.append(text) 
       return { "text": texts, }
   ```

   We will load the first 500 samples from the FreedomIntelligence/medical-o1-reasoning-SFT dataset, which is available on the Hugging Face hub. After that, we will map the text column using the formatting_prompts_func function.

   ```python
   from datasets import load_dataset 
   dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split="train[0:500]",trust_remote_code=True) 
   dataset = dataset.map(formatting_prompts_func, batched=True,) 
   dataset["text"][0]
   ```

5. **Setting up the model**
   Using the target modules, we will set up the model by adding the low-rank adopter to the model.

   ```python
   model = FastLanguageModel.get_peft_model(model, r=16, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", random_state=3407, use_rslora=False, loftq_config=None)
   ```

   Next, we will set up the training arguments and the trainer by providing the model, tokenizers, dataset, and other important training parameters that will optimize our fine-tuning process.
```
