In [1]:
%pip install unsloth
%pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

Note: you may need to restart the kernel to use updated packages.
Collecting git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-req-build-q_73nvwa
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-req-build-q_73nvwa
  Resolved https://github.com/unslothai/unsloth.git to commit 2ff5dc1a8de1614994a275785b7b64fb4db8cb5d
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2025.3.19-py3-none-any.whl size=192249 sha256=443cebfccb887677a3cd923a2d3644b86656dc1e228a75aca6ff7fe120a191a1
  Stored in directory: /tmp/pip-ephem-wheel-cache-0bqpyl0g/wheels/ed/d4/e9/76fb290ee3df0a5fc21ce5c2c788e29e9607a2353d8342fd0d


In [37]:
!pip install git+https://github.com/huggingface/transformers.git
!pip install accelerate

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-35roxt4l
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-35roxt4l
  Resolved https://github.com/huggingface/transformers.git to commit 348f3285c5114159d2ff4933b4b8ae36866d01a7
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for transformers: filename=transformers-4.51.0.dev0-py3-none-any.whl size=11069858 sha256=1cbdd2340bc177a99b0d9eeae838393049a3858378c737140da4727a4800e6e5
  Stored in directory: /tmp/pip-ephem-wheel-cache-0ma6h558/wheels/e7/9c/5b/e1a9c8007c343041e61cc484433d512ea9274272e3fcbe7c16
Successfully b

In [30]:
import torch
from huggingface_hub import login, create_repo, upload_folder
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

from kaggle_secrets import UserSecretsClient

from trl import SFTTrainer

import wandb

from unsloth import FastLanguageModel, is_bfloat16_supported

In [9]:
wandb.login(key = UserSecretsClient().get_secret("wnb_key"))
run = wandb.init(
    project = 'fine_tune_deepseek_for_customersupport',
    job_type = 'training',
    anonymous = 'allow'
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtobias-pfeiffer[0m ([33mtobias-pfeiffer-capgemini[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


### Model Config

In [11]:
MAX_SEQ_LENGTH = 2048
LOAD_IN_4BIT = True # reduce size to enable local development
DTYPE = None

In [12]:
model, tokenizer = (
    FastLanguageModel
    .from_pretrained(
        model_name = 'unsloth/DeepSeek-R1-Distill-Llama-8B',
        max_seq_length = MAX_SEQ_LENGTH,
        dtype = DTYPE,
        load_in_4bit = LOAD_IN_4BIT,
        token = UserSecretsClient().get_secret("hf_key")
    )
)

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.2.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [17]:
system_prompt = """
You are a helpful customer support assistant. Read the inquiry and generate a professional response.

Customer: "{}"

Response:
<think>Understand the request and generate a courteous, helpful reply.
<response>"{}"
"""

load dataset for finetuning

In [14]:
dataset = load_dataset("AabirDey/job-queries-and-customer-service")
EOS_TOKEN = tokenizer.eos_token

README.md:   0%|          | 0.00/36.0 [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/15.0M [00:00<?, ?B/s]

test.csv:   0%|          | 0.00/3.77M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23551 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5888 [00:00<?, ? examples/s]

In [15]:
# adjust data for system prompt

def format_for_finetuning(example):
    _inputs = example['instruction']
    _outputs = example['output']
    formatted = []
    for i, j in zip(_inputs, _outputs):
        formatted_text = system_prompt.format(i, j) + EOS_TOKEN
        formatted.append(formatted_text)
    return {"text": formatted}

In [18]:
finetune_dataset = dataset.map(format_for_finetuning, batched=True)

# check the output of format_for_finetuning()
print(finetune_dataset['train']['text'][0])

Map:   0%|          | 0/23551 [00:00<?, ? examples/s]

Map:   0%|          | 0/5888 [00:00<?, ? examples/s]


You are a helpful customer support assistant. Read the inquiry and generate a professional response.

Customer: "would it be possible to file a damn customer reclamation?"

Response:
<think>Understand the request and generate a courteous, helpful reply.
<response>"I apologize for any frustration or inconvenience you may have experienced. I understand that you would like to file a customer reclamation. Our company takes customer complaints seriously, and I assure you that I will do my best to assist you with this matter. To better understand your situation, could you please provide me with more details about the issue you are facing? This will help me guide you through the process and ensure that your concerns are addressed appropriately. Thank you for reaching out to us, and we appreciate your patience as we work towards a resolution."
<｜end▁of▁sentence｜>


### Finetuning & LoRA

https://huggingface.co/blog/Andyrasika/finetune-unsloth-qlora

In [19]:
model_lora = (
    FastLanguageModel
    .get_peft_model(
        model,
        r = 16,
        target_modules = [
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj"
        ],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = 'unsloth',
        random_state = 420,
        use_rslora = False,
        loftq_config = None
    )
)

Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [20]:
from trl import SFTTrainer

In [21]:
trainer = SFTTrainer(
    model = model_lora,
    tokenizer = tokenizer,
    train_dataset = finetune_dataset["train"],
    dataset_text_field = "text",
    max_seq_length = MAX_SEQ_LENGTH,
    dataset_num_proc = 2,

    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 1,
        warmup_steps = 5,
        max_steps = 50,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 420,
        output_dir = "outputs"
    )
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/23551 [00:00<?, ? examples/s]

In [22]:
trainer_stats = trainer.train()

wandb.finish()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 23,551 | Num Epochs = 1 | Total steps = 50
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,2.1613
20,1.2264
30,1.0439
40,0.9684
50,0.9376


0,1
train/epoch,▁▃▅▆██
train/global_step,▁▃▅▆██
train/grad_norm,█▃▁▃▂
train/learning_rate,█▆▅▃▁
train/loss,█▃▂▁▁

0,1
total_flos,9450549998616576.0
train/epoch,0.03397
train/global_step,50.0
train/grad_norm,0.47531
train/learning_rate,0.0
train/loss,0.9376
train_loss,1.26749
train_runtime,748.5969
train_samples_per_second,1.069
train_steps_per_second,0.067


### Saving Model

In [25]:
login(UserSecretsClient().get_secret("hf_key"))

repo_name = "tobinho1234/deep-seek-lora-customer-support"
local_model_path = "/kaggle/working/outputs/checkpoint-50"

create_repo(repo_name, exist_ok = True)
upload_folder(
    repo_id = repo_name,
    folder_path = local_model_path,
    path_in_repo = ".",
    commit_message = "Upload LoRA fine-tuned adapter and tokenizer"
)

scaler.pt:   0%|          | 0.00/988 [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/85.7M [00:00<?, ?B/s]

Upload 7 LFS files:   0%|          | 0/7 [00:00<?, ?it/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.62k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/tobinho1234/deep-seek-lora-customer-support/commit/571efcaed4ae5db5b3225c519a58779c9b3ffb94', commit_message='Upload LoRA fine-tuned adapter and tokenizer', commit_description='', oid='571efcaed4ae5db5b3225c519a58779c9b3ffb94', pr_url=None, repo_url=RepoUrl('https://huggingface.co/tobinho1234/deep-seek-lora-customer-support', endpoint='https://huggingface.co', repo_type='model', repo_id='tobinho1234/deep-seek-lora-customer-support'), pr_revision=None, pr_num=None)