## Import necessary libraries

In [1]:
import json
import os
from pprint import pprint

import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset, concatenate_datasets, load_from_disk
from trl import DPOConfig, DPOTrainer

from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

  from .autonotebook import tqdm as notebook_tqdm


## Model Loading & Quantization with BitsAndBytes

In [2]:
MODEL_NAME = "Qwen/Qwen2.5-0.5B"

# Configure BitsAndBytes for 4-bit quant"ization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                   # Enable 4-bit quantization
    bnb_4bit_use_double_quant=True,      # Use double quantization for stability
    bnb_4bit_quant_type="nf4",           # Specify the quantization type (nf4)
    bnb_4bit_compute_dtype=torch.float16 # Use float16 for computation efficiency
)

# Load the pre-trained model with quantization configuration
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",         # Automatically map model layers to available devices
    trust_remote_code=True,    # Trust the remote code if needed (e.g., custom architectures)
    quantization_config=bnb_config,
)


# Enable gradient checkpointing to reduce memory usage
model.gradient_checkpointing_enable()



# Load the tokenizer corresponding to the model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Set the pad token to be the same as the end-of-sequence token
tokenizer.pad_token = tokenizer.eos_token

In [3]:
model.config

Qwen2Config {
  "_attn_implementation_autoset": true,
  "_name_or_path": "Qwen/Qwen2.5-0.5B",
  "architectures": [
    "Qwen2ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151643,
  "eos_token_id": 151643,
  "hidden_act": "silu",
  "hidden_size": 896,
  "initializer_range": 0.02,
  "intermediate_size": 4864,
  "max_position_embeddings": 32768,
  "max_window_layers": 24,
  "model_type": "qwen2",
  "num_attention_heads": 14,
  "num_hidden_layers": 24,
  "num_key_value_heads": 2,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": true,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  },
  "rms_norm_eps": 1e-

In [4]:
print(model.__class__)

<class 'transformers.models.qwen2.modeling_qwen2.Qwen2ForCausalLM'>


## Configuring LoRA:

In [5]:
# Define a LoRA configuration
lora_config = LoraConfig(
    r=16,                    # Low-rank dimension; a higher value gives more capacity
    lora_alpha=16,           # Scaling factor for LoRA layers
    lora_dropout=0.05,       # Dropout applied to the LoRA layers (helps prevent overfitting)
    task_type="CAUSAL_LM",   # Specify that we are fine-tuning a causal language model
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    bias="none",
    use_rslora = False,  # We support rank stabilized LoRA
)

# Prepare the model for k-bit training (this is needed for quantized models)
model = get_peft_model(prepare_model_for_kbit_training(model), lora_config)

In [6]:
model.config.max_position_embeddings

32768

## Process meta-math/MetaMathQA Dataset

In [None]:
meta_math_dataset = load_dataset("meta-math/MetaMathQA",  split="train")

In [7]:
meta_math_dataset

Dataset({
    features: ['type', 'query', 'original_question', 'response'],
    num_rows: 395000
})

In [8]:
def format_meta_math_convo_batch(examples):
    texts = []
    # 'examples' is a dict with keys "query" and "response" whose values are lists
    for query, response in zip(examples["query"], examples["response"]):
        convo = [
            {"role": "user", "content": query.strip()},
            {"role": "assistant", "content": response.strip()}
        ]
        # Convert the conversation into a unified text string using the chat template.
        text = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        texts.append(text)
    return {"text": texts}

meta_math_dataset_formatted = meta_math_dataset.map(format_meta_math_convo_batch, batched=True)


# Print a formatted example to inspect the output
print("\nMetaMathQA - Formatted Example:")
print(meta_math_dataset_formatted[0]["text"])


MetaMathQA - Formatted Example:
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Gracie and Joe are choosing numbers on the complex plane. Joe chooses the point $1+2i$. Gracie chooses $-1+i$. How far apart are Gracie and Joe's points?<|im_end|>
<|im_start|>assistant
The distance between two points $(x_1,y_1)$ and $(x_2,y_2)$ in the complex plane is given by the formula $\sqrt{(x_2-x_1)^2+(y_2-y_1)^2}$.
In this case, Joe's point is $(1,2)$ and Gracie's point is $(-1,1)$.
So the distance between their points is $\sqrt{((-1)-(1))^2+((1)-(2))^2}=\sqrt{(-2)^2+(-1)^2}=\sqrt{4+1}=\sqrt{5}$.
Therefore, Gracie and Joe's points are $\boxed{\sqrt{5}}$ units apart.
The answer is: \sqrt{5}<|im_end|>



## Process TIGER-Lab/MathInstruct Dataset

In [9]:
math_instruct_dataset = load_dataset("TIGER-Lab/MathInstruct", split="train")

In [10]:
math_instruct_dataset

Dataset({
    features: ['source', 'output', 'instruction'],
    num_rows: 262039
})

In [11]:
def format_math_instruct_convo(example):
    # Here, 'instruction' is the question and 'output' is the answer.
    convo = [
        {"role": "user", "content": example["instruction"].strip()},
        {"role": "assistant", "content": example["output"].strip()}
    ]
    text = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
    return {"text": text}

math_instruct_dataset_formatted = math_instruct_dataset.map(format_math_instruct_convo, batched=False)
print("\nTIGER-Lab/MathInstruct - Formatted Example:")
print(math_instruct_dataset_formatted[0]["text"])


TIGER-Lab/MathInstruct - Formatted Example:
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
The distance between two stars is 6.52 × 10^5 light years. What is the distance between the two stars in parsecs? (1 parsec = 3.26 light years)
Answer Choices: (A) 2 × 10^5 (B) 4 × 10^6 (C) 5 × 10^7 (D) 7 × 10^7 (E) 9 × 10^8<|im_end|>
<|im_start|>assistant
Let's think about the multi-choice question.
6.52 × 10^5 ly / (3.26 ly/parsec) = 2 x 10^5 persec
The answer is A.<|im_end|>



## Process openchat/ultrachat-sharegpt Dataset

In [12]:
ultrachat_dataset = load_dataset("openchat/ultrachat-sharegpt", split="train")

In [13]:
ultrachat_dataset

Dataset({
    features: ['id', 'conversations'],
    num_rows: 1468352
})

In [14]:
def standardize_sharegpt(
    dataset,
    aliases_for_system    = ["system",],
    aliases_for_user      = ["user", "human", "input",],
    aliases_for_assistant = ["gpt", "assistant", "output",],
):
    """
    Standardizes ShareGPT and other formats to user/assistant Hugging Face format.
    
    Get aliases for the system, user and assistant roles.
    These shall map to "system", "user" and "assistant" respectively.
    
    aliases_for_system    = ["system",],
    aliases_for_user      = ["user", "human", "input",],
    aliases_for_assistant = ["gpt", "assistant", "output",],
    """
    import collections
    import itertools

    convos = dataset[:10]["conversations"]
    uniques = collections.defaultdict(list)
    for convo in convos:
        for message in convo:
            for key, value in message.items():
                uniques[key].append(value)
    pass

    # Must be only 2 entries
    assert(len(uniques.keys()) == 2)

    keys = list(uniques.keys())
    length_first  = len(set(uniques[keys[0]]))
    length_second = len(set(uniques[keys[1]]))

    if length_first < length_second:
        # Role is assigned to the first element
        role_key    = keys[0]
        content_key = keys[1]
    else:
        role_key    = keys[1]
        content_key = keys[0]
    pass

    # Check roles are in aliases
    all_aliases = set(aliases_for_system + aliases_for_user + aliases_for_assistant)
    roles = set(uniques[role_key])
    leftover_aliases = (all_aliases | roles) - all_aliases
    if len(leftover_aliases) != 0:
        raise TypeError(
            f"Unsloth: {list(leftover_aliases)} are not in aliases. Please update aliases."
        )
    pass

    # Mapping for aliases
    aliases_mapping = {}
    for x in aliases_for_system:    aliases_mapping[x] = "system"
    for x in aliases_for_user:      aliases_mapping[x] = "user"
    for x in aliases_for_assistant: aliases_mapping[x] = "assistant"

    def _standardize_dataset(examples):
        convos = examples["conversations"]
        all_convos = []
        for convo in convos:
            new_convo = [
                { "role" : aliases_mapping[message[role_key]], "content" : message[content_key], }
                for message in convo
            ]
            all_convos.append(new_convo)
        pass
        return { "conversations" : all_convos, }
    pass

    return dataset.map(_standardize_dataset, batched = True, desc = "Standardizing format")
pass

In [15]:
ultrachat_dataset = standardize_sharegpt(ultrachat_dataset)

In [16]:
def format_ultrachat_convo(example):
    text = tokenizer.apply_chat_template(example["conversations"], tokenize=False, add_generation_prompt=False)
    return {"text": text}


ultrachat_dataset_formatted = ultrachat_dataset.map(format_ultrachat_convo, batched=False)
print("\nopenchat/ultrachat-sharegpt - Formatted Example:")
print(ultrachat_dataset_formatted[0]["text"])


openchat/ultrachat-sharegpt - Formatted Example:
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Here is a piece of text: Canadian Rx Store: Viagra on paypal open 24 hours!!
Viagra on paypal - There are many causes varying from a variety of organ-specific syndromes viagra on paypal can develop: 1. Ulceroglandular/glandular tularemia and necessitates re-treatment. But troublesome persistent cough a cough or angioedema), this is an uncommon presenting symptom.
Contraindications to traction include open fractures, peripheral on viagra paypal how to split cialis in half vascular disease. Fractures of the unit contains: Paediatric tracheostomy tube self-retaining tourniquet intravenous infusion 25/500 min headache, tachycardia, insomnia, increased cough, tension, depression, cervical dysfunction, vascular headache , cervical spondylosis, tumours, polymyalgia rheumatica, although the ex-pression :C :Cv vc0v tytiv ttitcuoiv occurs in response to a deep breath and w

## concatenate datasets

In [None]:
def keep_text_only(dataset):
    # Remove all columns except for "text"
    cols_to_remove = [col for col in dataset.column_names if col != "text"]
    if cols_to_remove:
        dataset = dataset.remove_columns(cols_to_remove)
    return dataset

meta_math_dataset_formatted = keep_text_only(meta_math_dataset_formatted)
math_instruct_dataset_formatted = keep_text_only(math_instruct_dataset_formatted)
ultrachat_dataset_formatted = keep_text_only(ultrachat_dataset_formatted)

# Combine the three datasets into one unified dataset for instruction tuning.
combined_dataset = concatenate_datasets([
    meta_math_dataset_formatted,
    math_instruct_dataset_formatted,
    ultrachat_dataset_formatted,
])
combined_dataset = combined_dataset.shuffle(seed=42)


In [18]:
len(combined_dataset)

2125391

In [19]:
combined_dataset[0]["text"]

'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nCalculate the value of 6 divided by 0.\\overline{6}.<|im_end|>\n<|im_start|>assistant\nLet $x = 0.\\overline{6}$.\nThen $10x = 6.\\overline{6}$.\nSubtracting the first equation from the second equation, we have $10x - x = 6.\\overline{6} - 0.\\overline{6}$.\nThis simplifies to $9x = 6$.\nDividing both sides by 9, we get $x = \\frac{6}{9} = \\frac{2}{3}$.\nTherefore, $6 \\div 0.\\overline{6} = \\frac{6}{\\frac{2}{3}} = \\frac{6}{1} \\cdot \\frac{3}{2} = \\boxed{9}$.\nThe answer is: 9<|im_end|>\n'

In [None]:
# Save the processed dataset to a directory
combined_dataset.save_to_disk("processed_dataset_dir")


In [7]:
combined_dataset = load_from_disk("processed_dataset_dir")

## Fine‑Tune the Model on the Combined Dataset

In [8]:
def tokenize_function(example):
    # Do NOT pad here—let the data collator handle dynamic padding.
    return tokenizer(example["text"], padding=True, truncation=True)

tokenized_dataset = combined_dataset.map(tokenize_function, batched=True, remove_columns=["text"])


In [9]:
tokenized_dataset

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 2125391
})

In [10]:
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq
# Because the combined_dataset is huge (over 2 million examples),
# we'll limit training by setting max_steps instead of iterating over the entire dataset.
# We also set up a saving strategy to save checkpoints every N steps.

training_args = TrainingArguments(
    output_dir="./fine_tuned_qwen_model",        # Directory for saving checkpoints
    per_device_train_batch_size=2,                # Use a small batch size due to memory limits
    gradient_accumulation_steps=4,                # Accumulate gradients to simulate a larger batch
    num_train_epochs=1,                           # Epoch count can be 1 when using max_steps
    max_steps=200,                                # For testing, train for only 200 steps.
    learning_rate=2e-4,                           # Starting learning rate
    warmup_steps=500,                             # Gradually increase LR over 500 steps
    logging_steps=50,                             # Log training metrics every 50 steps for testing.
    save_steps=5000,                              # Save a checkpoint every 5000 steps
    save_total_limit=3,                           # Only keep the most recent 3 checkpoints
    optim="adamw_8bit",                           # Use memory-efficient AdamW optimizer in 8-bit
    weight_decay=0.01,                            # Regularization to help prevent overfitting
    lr_scheduler_type="linear",                   # Linearly decay learning rate over time
    fp16=True,                                    # Use mixed-precision training for speed & memory
    seed=3407,                                    # Set a random seed for reproducibility
    report_to="none"                              # Change to "tensorboard" to monitor training with TensorBoard
)

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer)

# Then pass this collator to your Trainer:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)


In [11]:
def _longest_common_sublist(lists):
    if not lists: 
        return []
    min_len = min(len(lst) for lst in lists)
    if min_len == 0: 
        return []
    def has_common_sublist(length):
        common = set()
        first = lists[0]
        for i in range(len(first) - length + 1):
            sub = tuple(first[i:i + length])
            common.add(sub)
        for lst in lists[1:]:
            current = set()
            for i in range(len(lst) - length + 1):
                sub = tuple(lst[i:i + length])
                if sub in common:
                    current.add(sub)
            common = current
            if not common:
                return False, []
        return True, list(common.pop())
    left, right = 1, min_len
    result = []
    while left <= right:
        mid = left + (right - left) // 2
        exists, sublist = has_common_sublist(mid)
        if exists:
            result = sublist
            left = mid + 1
        else:
            right = mid - 1
    return result

def _find_common_token_ids(component, tokenizer):
    right_text = " " if component.endswith(" ") else ("\n" if component.endswith("\n") else "")
    left_text = " " if component.startswith(" ") else ("\n" if component.startswith("\n") else "")
    stripped = component.strip()
    all_input_ids = []
    for left in range(3):
        for right in range(3):
            x = left * left_text + stripped + right * right_text
            tokenized = tokenizer(x, add_special_tokens=False).input_ids
            all_input_ids.append(tokenized)
            x = left * "\n" + stripped + right * "\n"
            tokenized = tokenizer(x, add_special_tokens=False).input_ids
            all_input_ids.append(tokenized)
    substring = _longest_common_sublist([x + [0] for x in all_input_ids])
    if substring == [0] and len(all_input_ids[0]) == 1:
        single_token = all_input_ids[0][0]
        if all(single_token in x for x in all_input_ids):
            substring = [single_token]
    for j in range(len(tokenizer(component, add_special_tokens=False).input_ids)):
        if tokenizer(component, add_special_tokens=False).input_ids[j: j + len(substring)] == substring:
            break
    optional_left = tokenizer(component, add_special_tokens=False).input_ids[:j]
    optional_right = tokenizer(component, add_special_tokens=False).input_ids[j + len(substring):]
    return substring, optional_left, optional_right

def train_on_responses_only(trainer, instruction_part=None, response_part=None):
    if trainer.tokenizer is None:
        trainer.tokenizer = tokenizer
    _tokenizer = trainer.tokenizer
    if instruction_part is None or response_part is None:
        raise ValueError("Unsloth: instruction_part and response_part must be provided!")
    Q_must, Q_left, Q_right = _find_common_token_ids(instruction_part, _tokenizer)
    A_must, A_left, A_right = _find_common_token_ids(response_part, _tokenizer)
    A_first = A_must[0]
    len_A_must = len(A_must)
    A_left_reversed = A_left[::-1]
    A_right_forward = A_right
    Q_first = Q_must[0]
    len_Q_must = len(Q_must)
    Q_left_reversed = Q_left[::-1]
    Q_right_forward = Q_right
    def _mask_labels(examples):
        input_ids_list = examples["input_ids"]
        new_labels = []
        for input_ids in input_ids_list:
            n = len(input_ids)
            labels = [-100] * n
            j = 0
            while j < n:
                if (input_ids[j] == A_first) and (input_ids[j: j + len_A_must] == A_must):
                    while j > 0 and any(opt == input_ids[j-1] for opt in A_left_reversed):
                        j -= 1
                    assistant_start = j
                    j += len_A_must
                    while j < n and any(opt == input_ids[j] for opt in A_right_forward):
                        j += 1
                    k = j
                    while k < n:
                        if (input_ids[k] == Q_first) and (input_ids[k: k + len_Q_must] == Q_must):
                            break
                        k += 1
                    for idx in range(assistant_start, k):
                        labels[idx] = input_ids[idx]
                    j = k
                else:
                    j += 1
            new_labels.append(labels)
        return {"labels": new_labels}
    trainer.train_dataset = trainer.train_dataset.map(_mask_labels, batched=True)
    if hasattr(trainer, "eval_dataset") and trainer.eval_dataset is not None:
        if isinstance(trainer.eval_dataset, dict):
            for key, value in trainer.eval_dataset.items():
                trainer.eval_dataset[key] = value.map(_mask_labels, batched=True)
        else:
            trainer.eval_dataset = trainer.eval_dataset.map(_mask_labels, batched=True)
    return trainer

In [12]:
instruction_marker = "<|im_start|>user"
response_marker = "<|im_start|>assistant"

trainer = train_on_responses_only(
    trainer,
    instruction_part=instruction_marker,
    response_part=response_marker
)

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


In [13]:
print("*" * 80)
print(tokenizer.decode(trainer.train_dataset[1]["input_ids"]))
print("*" * 80)
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
print(tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[1]["labels"]]))
print("*" * 80)

********************************************************************************
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
8 persons a, b, c, ... and h are to be lined up in a row. If a, b, and c adhere always together, how many possible cases are there?
Answer Choices: (A) 24 (B) 36 (C) 480 (D) 1720 (E) 4320<|im_end|>
<|im_start|>assistant
Let's solve the multi-choice question step by step.
firstly it is not GEOMETRY but COMBINATIONS, so changing the TAG accordingly..
total 8 are there..
take a,b, and c as one,so total =6..
these 6 can be arranged in 6! ways..
a,b, and c can be arranged within themselves in 3! ways ..
TOTAL = 6!*3!=4320
The answe is E<|im_end|>
<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftex

In [14]:
print("*" * 80)
print(tokenizer.decode(trainer.train_dataset[5]["input_ids"]))
print("*" * 80)
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
print(tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]]))
print("*" * 80)

********************************************************************************
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Let A = {1, 2, 3, 4} and B = {2, 3, 4, 5}. Find (A Δ B) ∪ C where C = {3, 4, 5, 6} and Δ symbolizes the symmetric difference operator.<|im_end|>
<|im_start|>assistant
First, we need to find the symmetric difference (A Δ B) between sets A and B. The symmetric difference is the set of elements which are in either of the sets A or B but not in their intersection.

A = {1, 2, 3, 4}
B = {2, 3, 4, 5}

Intersection of A and B: {2, 3, 4}

Now, we find the elements in A or B but not in their intersection:

A Δ B = {1, 5}

Next, we need to find the union of the symmetric difference (A Δ B) and set C:

A Δ B = {1, 5}
C = {3, 4, 5, 6}

The union of two sets is the set containing all elements from both sets:

(A Δ B) ∪ C = {1, 3, 4, 5, 6}

So, the final result is:

(A Δ B) ∪ C = {1, 3, 4, 5, 6}<|im_end|>
<|endoftext|><|endoftext|><|endoftext|><|

In [15]:
print("*" * 80)
print(tokenizer.decode(trainer.train_dataset[10]["input_ids"]))
print("*" * 80)
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
print(tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[10]["labels"]]))
print("*" * 80)

********************************************************************************
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Please write a detailed product review on a popular kitchen appliance, detailing your experience with the appliance, its features, ease of use, maintenance, and if it is worth the investment. Be sure to use a conversational style to engage the reader and provide honest feedback on both the pros and cons of the appliance. Please also include any tips or additional information that may be helpful for potential buyers.<|im_end|>
<|im_start|>assistant
I recently purchased a Cuisinart food processor for my kitchen, and I must say I am thoroughly impressed with its performance. Before buying this appliance, I did a lot of research to ensure that I was investing in a quality product that would serve me well for many years. So far, the Cuisinart food processor has exceeded my expectations in every way.

One of the most striking features of 

In [16]:
model.config.use_cache = False

In [17]:
trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss
50,8.297
100,0.3226
150,0.2889
200,0.2608


TrainOutput(global_step=200, training_loss=2.2923299169540403, metrics={'train_runtime': 6537.5113, 'train_samples_per_second': 0.245, 'train_steps_per_second': 0.031, 'total_flos': 1.33411748499456e+16, 'train_loss': 2.2923299169540403, 'epoch': 0.0007528023065862673})

In [22]:
trainer.save_model("./fine_tuned_qwen_model")


### Generate a response

In [24]:
model = AutoModelForCausalLM.from_pretrained(
    "fine_tuned_qwen_model",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("fine_tuned_qwen_model")


In [32]:
convo = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "assistant", "content": "What equipment do I need for rock climbing?"}
]
prompt = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)

prompt


'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>assistant\nWhat equipment do I need for rock climbing?<|im_end|>\n'

In [33]:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [34]:
print(generated_text)

system
You are a helpful assistant.
assistant
What equipment do I need for rock climbing?
涞s
For rock climbing, you typically need the following equipment:

1. Climbing harness: This is a harness that you wear on your back to help control the flow of the rope. It should be sturdy and have a secure attachment.

2. A climbing rope: This is the main part of the climbing equipment that you will use to climb over rocks and other obstacles.

3. A safety harness: This helps to prevent you from getting injured or falling off during a climb.

4. A safety helmet: This helps to protect your head from falling rocks or other objects during a climb.

5. A climbing safety rope: This is a longer rope that you attach to the climbing harness, and it should be secure in order to prevent you from slipping or being pulled off the climbing wall.

6. A climbing ladder: This is a temporary climbing wall that can be set up on the ground or a rock face. It can be useful for exploring different climbing routes o