In [1]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [2]:
# Modules for fine-tuning
from unsloth import FastLanguageModel
import torch # Import PyTorch
from trl import SFTTrainer # Trainer for supervised fine-tuning (SFT)
from unsloth import is_bfloat16_supported # Checks if the hardware supports bfloat16 precision
# Hugging Face modules
from huggingface_hub import login # Lets you login to API
from transformers import TrainingArguments # Defines training hyperparameters
from datasets import load_dataset # Lets you load fine-tuning datasets
# Import weights and biases
import wandb
# Import kaggle secrets
from kaggle_secrets import UserSecretsClient

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# Load keys for W&B and HuggingFace

In [4]:
# Initialize Hugging Face & WnB tokens
user_secrets = UserSecretsClient() # from kaggle_secrets import UserSecretsClient
hugging_face_token = user_secrets.get_secret("Hugging_Face_Token")
wnb_token = user_secrets.get_secret("wnb")

# Login to Hugging Face
login(hugging_face_token) # from huggingface_hub import login

# Login to WnB
wandb.login(key=wnb_token) # import wandb
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Clarity Dataset for Clarity-AI-Agent-2', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33moriol_palacios[0m ([33moriol_palacios-universidad-nacional-de-san-antonio-abad-[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


# Loading Deepseek and the tokenizer

In [5]:
# Set parameters
max_seq_length = 2048 # Define the maximum sequence length a model can handle (i.e. how many tokens can be processed at once)
dtype = None # Set to default 
load_in_4bit = True # Enables 4 bit quantization — a memory saving optimization 

# Load the DeepSeek R1 model and tokenizer using unsloth — imported using: from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",  # Load the pre-trained DeepSeek R1 model (8B parameter version)
    max_seq_length=max_seq_length, # Ensure the model can process up to 2048 tokens at once
    dtype=dtype, # Use the default data type (e.g., FP16 or BF16 depending on hardware support)
    load_in_4bit=load_in_4bit, # Load the model in 4-bit quantization to save memory
    token=hugging_face_token, # Use hugging face token
)

==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

# Fine tuning the model

## Step 1: Set a system prompt

```python
train_prompt_style = 
"""
Below is an instruction describing a task. First analyze the technical requirements, then provide a secure Clarity-specific solution.

### Instruction:
You are a Clarity blockchain engineer with 5+ years experience. Your answers must, follow Clarity's deterministic principles, prevent common vulnerabilities, use official documentation

### Question:
{}

### Response:
<think>
{}
</think>
{}""

```

In [6]:
train_prompt_style = """Below is an instruction describing a task. First analyze the technical requirements, then provide a secure Clarity-specific solution.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a Clarity blockchain engineer with 5+ years experience. Your answers must, follow Clarity's deterministic principles, prevent common vulnerabilities, use official documentation

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

## Step 2: Upload the fine tuning dataset

In [7]:
import os
os.listdir('/kaggle/input/')

['dataset-v4-augmented']

In [8]:
import pandas as pd
dataset = pd.read_csv('/kaggle/input/dataset-v4-augmented/Dataset-v4-augmented.csv')

In [9]:
EOS_TOKEN = tokenizer.eos_token  # Define EOS_TOKEN which the model when to stop generating text during training
EOS_TOKEN

'<｜end▁of▁sentence｜>'

In [10]:
def format_row(row):
    return train_prompt_style.format(row["Question"], row["CoT"], row["Response"]) + EOS_TOKEN

In [11]:
from datasets import Dataset
dataset_df = dataset.copy()
dataset_df["text"] = dataset_df.apply(format_row, axis=1)

dataset_finetune = Dataset.from_pandas(dataset_df)

In [12]:
dataset_finetune['text'][0]

'Below is an instruction describing a task. First analyze the technical requirements, then provide a secure Clarity-specific solution.\nBefore answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n\n### Instruction:\nYou are a Clarity blockchain engineer with 5+ years experience. Your answers must, follow Clarity\'s deterministic principles, prevent common vulnerabilities, use official documentation\n\n### Question:\nCreate a detailed Clarity smart contract for the Stacks blockchain that implements a decentralized lending protocol allowing users to deposit STX tokens, borrow against their collateral, and earn yield. The contract should include functionality for managing deposits with user-specific balances, implementing a loan system with dynamic interest calculations based on block height, and distributing yield to depositors. Incorporate a 50% loan-to-value ratio limit, a 10% interest rate, and proper err

## Step 3: Setting up the model using LoRA

In [13]:
# Apply LoRA (Low-Rank Adaptation) fine-tuning to the model 
model_lora = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank: Determines the size of the trainable adapters (higher = more parameters, lower = more efficiency)
    target_modules=[  # List of transformer layers where LoRA adapters will be applied
        "q_proj",   # Query projection in the self-attention mechanism
        "k_proj",   # Key projection in the self-attention mechanism
        "v_proj",   # Value projection in the self-attention mechanism
        "o_proj",   # Output projection from the attention layer
        "gate_proj",  # Used in feed-forward layers (MLP)
        "up_proj",    # Part of the transformer’s feed-forward network (FFN)
        "down_proj",  # Another part of the transformer’s FFN
    ],
    lora_alpha=16,  # Scaling factor for LoRA updates (higher values allow more influence from LoRA layers)
    lora_dropout=0,  # Dropout rate for LoRA layers (0 means no dropout, full retention of information)
    bias="none",  # Specifies whether LoRA layers should learn bias terms (setting to "none" saves memory)
    use_gradient_checkpointing="unsloth",  # Saves memory by recomputing activations instead of storing them (recommended for long-context fine-tuning)
    random_state=3407,  # Sets a seed for reproducibility, ensuring the same fine-tuning behavior across runs
    use_rslora=False,  # Whether to use Rank-Stabilized LoRA (disabled here, meaning fixed-rank LoRA is used)
    loftq_config=None,  # Low-bit Fine-Tuning Quantization (LoFTQ) is disabled in this configuration
)

Unsloth 2025.3.14 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [14]:
# Initialize the fine-tuning trainer — Imported using from trl import SFTTrainer
trainer = SFTTrainer(
    model=model_lora,  # The model to be fine-tuned
    tokenizer=tokenizer,  # Tokenizer to process text inputs
    train_dataset=dataset_finetune,  # Dataset used for training
    dataset_text_field="text",  # Specifies which field in the dataset contains training text
    max_seq_length=max_seq_length,  # Defines the maximum sequence length for inputs
    dataset_num_proc=2,  # Uses 2 CPU threads to speed up data preprocessing

    # Define training arguments
    args=TrainingArguments(
        per_device_train_batch_size=2,  # Number of examples processed per device (GPU) at a time
        gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps before updating weights
        num_train_epochs=1, # Full fine-tuning run
        warmup_steps=5,  # Gradually increases learning rate for the first 5 steps
        max_steps=60,  # Limits training to 60 steps (useful for debugging; increase for full fine-tuning)
        learning_rate=2e-4,  # Learning rate for weight updates (tuned for LoRA fine-tuning)
        fp16=not is_bfloat16_supported(),  # Use FP16 (if BF16 is not supported) to speed up training
        bf16=is_bfloat16_supported(),  # Use BF16 if supported (better numerical stability on newer GPUs)
        logging_steps=10,  # Logs training progress every 10 steps
        optim="adamw_8bit",  # Uses memory-efficient AdamW optimizer in 8-bit mode
        weight_decay=0.01,  # Regularization to prevent overfitting
        lr_scheduler_type="linear",  # Uses a linear learning rate schedule
        seed=3407,  # Sets a fixed seed for reproducibility
        output_dir="outputs",  # Directory where fine-tuned model checkpoints will be saved
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/97 [00:00<?, ? examples/s]

# Step 4: Train the model

In [15]:
# Start the fine-tuning process
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 97 | Num Epochs = 10 | Total steps = 60
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.9286
20,1.3285
30,1.024
40,0.8809
50,0.7744
60,0.7257


In [16]:
wandb.finish()

0,1
train/epoch,▁▂▄▅▇██
train/global_step,▁▂▄▅▇██
train/grad_norm,█▂▂▁▁▁
train/learning_rate,█▇▅▄▂▁
train/loss,█▅▃▂▁▁

0,1
total_flos,7.683926376517632e+16
train/epoch,8.64
train/global_step,60.0
train/grad_norm,0.25255
train/learning_rate,0.0
train/loss,0.7257
train_loss,1.11034
train_runtime,5241.7225
train_samples_per_second,0.183
train_steps_per_second,0.011


In [17]:
new_model_online = "OriolPalacios/DeepSeek-R1-Clarity-AI-Agent-2"
new_model_local = "DeepSeek-R1-Clarity-AI-Agent-2"
model_lora.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)

('DeepSeek-R1-Clarity-AI-Agent-2/tokenizer_config.json',
 'DeepSeek-R1-Clarity-AI-Agent-2/special_tokens_map.json',
 'DeepSeek-R1-Clarity-AI-Agent-2/tokenizer.json')

# Testing the fine tuned model

In [70]:
prompt_style = """Below is an instruction describing a task. First analyze the technical requirements, then provide a secure Clarity-specific solution.

### Instruction:
You are a Clarity blockchain engineer with 5+ years experience. Your answers must, follow Clarity's deterministic principles, prevent common vulnerabilities, use official documentation

### Question:
{}

### Response:
<think>
{}"""

In [72]:
import torch

question = """Create a Clarity smart contract on Stacks that retrieves real-time price data for a specific asset (e.g., BTC/USD) from the Pyth Network using the Stacks-Pyth Bridge. Include deployment and testing instructions."""
# Load the inference model using FastLanguageModel (Unsloth optimizes for speed)
FastLanguageModel.for_inference(model_lora)  # Unsloth has 2x faster inference!

# Set the torch dtype correctly
model_lora.config.torch_dtype = torch.float16

# Tokenize the input question with a specific prompt format and move it to the GPU
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

# Generate a response using LoRA fine-tuned model with specific parameters
outputs = model_lora.generate(
    input_ids=inputs.input_ids,          # Tokenized input IDs
    attention_mask=inputs.attention_mask, # Attention mask for padding handling
    max_new_tokens=1200,                  # Maximum length for generated response
    use_cache=True,                       # Enable cache for efficient generation
)

# Decode the generated response from tokenized format to readable text
response = tokenizer.batch_decode(outputs)
# Extract and print only the model's response part after "### Response:"
print(response[0].split("### Response:")[1])



<think>
The objective is to create a Clarity smart contract on Stacks that fetches real-time price data for Bitcoin (BTC) against USD using the Pyth Network's oracle system. This entails interacting with the Pyth Oracle contract on Stacks and the Pyth Network's BTC/USD feed contract. The contract must handle the process of querying live price data, updating the contract's internal state, and allowing users to retrieve the price via a read-only function. Error handling is crucial, so both the oracle query and feed contract interaction must include error handling. The contract should also store the last price retrieved and the block height at which it was last updated. Security is paramount, so the contract must restrict access to specific addresses. The implementation should use traits like `pyth-trait` for the Pyth Oracle and `pyth-token-v1` for the BTC/USD feed. The `get-price` function will attempt to retrieve the price from the Pyth Oracle, then from the Pyth feed contract, and ret

# Pushing the model to HuggingFace

In [18]:
model_lora.push_to_hub(new_model_online) # Online saving
tokenizer.push_to_hub(new_model_online) # Online saving 

README.md:   0%|          | 0.00/632 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/OriolPalacios/DeepSeek-R1-Clarity-AI-Agent-2


  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

# Merging the model and publishin to huggin face

In [19]:
model_lora.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)
model_lora.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")

Unsloth: You have 2 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.0G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 18.4 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 34%|███▍      | 11/32 [00:00<00:01, 14.53it/s]
We will save to Disk and not RAM now.
100%|██████████| 32/32 [00:23<00:00,  1.34it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00001-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00002-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00003-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00004-of-00004.bin...
Done.


Unsloth: You are pushing to hub in Kaggle environment.
To save memory, we shall move OriolPalacios/DeepSeek-R1-Clarity-AI-Agent-2 to /tmp/DeepSeek-R1-Clarity-AI-Agent-2


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 17.15 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 32/32 [00:23<00:00,  1.39it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving /tmp/DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00001-of-00004.bin...
Unsloth: Saving /tmp/DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00002-of-00004.bin...
Unsloth: Saving /tmp/DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00003-of-00004.bin...
Unsloth: Saving /tmp/DeepSeek-R1-Clarity-AI-Agent-2/pytorch_model-00004-of-00004.bin...


  0%|          | 0/4 [00:00<?, ?it/s]

pytorch_model-00002-of-00004.bin:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

pytorch_model-00001-of-00004.bin:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

pytorch_model-00003-of-00004.bin:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

pytorch_model-00004-of-00004.bin:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/OriolPalacios/DeepSeek-R1-Clarity-AI-Agent-2
