In [None]:
#sft dataset download

from huggingface_hub import snapshot_download

snapshot_download(repo_id="deepmind/code_contests", repo_type="dataset")


Fetching 44 files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 44/44 [01:20<00:00,  1.84s/it]


'/home/bart/.cache/huggingface/hub/datasets--deepmind--code_contests/snapshots/802411c3010cb00d1b05bad57ca77365a3c699d6'

In [1]:
from transformers import AutoTokenizer
from datasets import load_dataset

dataset = load_dataset("deepmind/code_contests")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3b-Instruct")

def is_python(example):
    return 3 in example["solutions"]["language"]

python_dataset = dataset["train"].filter(is_python)

def preprocessing_sft(example):
    langs = example["solutions"]["language"]
    py_idx = langs.index(3)

    target_code = example["solutions"]["solution"][py_idx]
    messages = [
        {"role": "system", "content": "You are a competitive programming expert."},
        {"role": "user", "content": f"Solve this: {example['description']}"},
        {"role": "assistant", "content": target_code}
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    return {"text": text}

final_dataset = python_dataset.map(preprocessing_sft)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#dataset check
#print(final_dataset[0]['text'])
print(final_dataset[0].keys())

dict_keys(['name', 'description', 'public_tests', 'private_tests', 'generated_tests', 'source', 'difficulty', 'solutions', 'incorrect_solutions', 'cf_contest_id', 'cf_index', 'cf_points', 'cf_rating', 'cf_tags', 'is_description_translated', 'untranslated_description', 'time_limit', 'memory_limit_bytes', 'input_file', 'output_file', 'text'])


In [3]:
#tests tomorrow!!
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-3b-Instruct",
    quantization_config=bnb_config,
    device_map="auto"
)

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "up_proj", "gate_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)



training_config = SFTConfig(
    output_dir="./qwen_sft_results",
    report_to="wandb",
    logging_steps=10,
    per_device_train_batch_size=4,
    learning_rate=1e-4,
    dataset_text_field="text",
    gradient_accumulation_steps=1,
    max_length=512,
    gradient_checkpointing=True,
    bf16=True,
    fp16=False
)

trainer = SFTTrainer(
    model=model,
    train_dataset=final_dataset, 
    args=training_config,
    peft_config=peft_config,
    processing_class=tokenizer    
)

trainer.train()
trainer.save_model("./final_qwen_model")

Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:01<00:00,  1.09it/s]
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
[34m[1mwandb[0m: Currently logged in as: [33mj0hny0xx[0m ([33mj0hny0xx-bt[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,1.2171
20,0.7939
30,0.7412
40,0.7545
50,0.691
60,0.7134
70,0.6955
80,0.7087
90,0.7726
100,0.6745


In [25]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

model_id = "Qwen/Qwen2.5-3b-Instruct"
checkpoint_path = "../datasets_sft/qwen_sft_results/checkpoint-24417/"

tokenizer = AutoTokenizer.from_pretrained(model_id)
base_model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model, checkpoint_path)
model = model.to("cuda")
model.eval()

Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00,  3.41it/s]


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 2048)
        (layers): ModuleList(
          (0-35): 36 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_feat

In [35]:
messages = [
    {
        "role": "system",
        "content": "You are a professional Python developer. Write clean, efficient, and well-documented code."     
    },
    {
        "role": "user",
        "content": "Write a Python script that uses a custom decorator to log the execution time of a function, but the decorator must handle both synchronous and asynchronous functions."
    }
]

text = tokenizer.apply_chat_template(
    messages, 
    tokenize=False,
    add_generation_prompt=True
)

model_inputs = tokenizer([text], return_tensors="pt").to("cuda")

generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=10000, 
    temperature=0.1,
    do_sample=True, 
    pad_token_id=tokenizer.eos_token_id
)

response_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)   
]


response = tokenizer.batch_decode(response_ids, skip_special_tokens=True)[0]
print("output: ")
print(response)

output: 
Certainly! To create a Python script that logs the execution time of both synchronous and asynchronous functions using a custom decorator, we can use the `time` module for timing and the `functools` module to handle decorators. We'll also use the `asyncio` library for handling asynchronous functions.

Here's a step-by-step guide:

1. **Install asyncio if not already installed**:
   ```bash
   pip install asyncio
   ```

2. **Create a Python script with the custom decorator**:

```python
import functools
import time
import asyncio

# Custom decorator to measure execution time
def log_execution_time(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        
        # Call the function
        result = func(*args, **kwargs)
        
        end_time = time.time()
        elapsed_time = end_time - start_time
        
        print(f"Function '{func.__name__}' executed in {elapsed_time:.4f} seconds")
        
        return result


In [36]:
model.print_trainable_parameters()
print(f" Active adapter: {model.active_adapter}")
print(f"ðŸ“‚ Available adapters: {list(model.peft_config.keys())}")

trainable params: 1,843,200 || all params: 3,087,781,888 || trainable%: 0.0597
 Active adapter: default
ðŸ“‚ Available adapters: ['default']


In [37]:
# --- TEST 1: WITHOUT YOUR TRAINING ---
with model.disable_adapter():
    output_base = model.generate(**model_inputs, max_new_tokens=100)
    print("BASE QWEN OUTPUT:", tokenizer.decode(output_base[0], skip_special_tokens=True))

print("-" * 30)

# --- TEST 2: WITH YOUR TRAINING ---
# This ensures the 'default' adapter is active
model.set_adapter("default") 
output_ft = model.generate(**model_inputs, max_new_tokens=100)
print("YOUR MODEL OUTPUT:", tokenizer.decode(output_ft[0], skip_special_tokens=True))

BASE QWEN OUTPUT: system
You are a professional Python developer. Write clean, efficient, and well-documented code.
user
Write a Python script that uses a custom decorator to log the execution time of a function, but the decorator must handle both synchronous and asynchronous functions.
assistant
To create a Python script with a custom decorator that logs the execution time of both synchronous and asynchronous functions, we can use Python's `functools.wraps` for function metadata preservation and `time` module to measure the time. We will also utilize the `asyncio` library to handle asynchronous functions.

First, ensure you have Python 3.7 or later installed as it includes the `functools.update_wrapper` which is used in the example below. If you're using an older
------------------------------
YOUR MODEL OUTPUT: system
You are a professional Python developer. Write clean, efficient, and well-documented code.
user
Write a Python script that uses a custom decorator to log the execution 

In [40]:
# Create inputs
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to("cuda")
input_len = model_inputs.input_ids.shape[1]

# 1. Generate with Base Model
with model.disable_adapter():
    base_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
    base_response = tokenizer.decode(base_ids[0][input_len:], skip_special_tokens=True)

# 2. Generate with Your Model
model.set_adapter("default")
ft_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
ft_response = tokenizer.decode(ft_ids[0][input_len:], skip_special_tokens=True)

print(f"BASE MODEL: {base_response.strip()}")
print("-" * 20)
print(f"YOUR MODEL: {ft_response.strip()}")

BASE MODEL: To create a Python script that logs the execution time of both synchronous and asynchronous functions using a custom decorator, we can use the `time` module for
--------------------
YOUR MODEL: Certainly! To create a Python script that logs the execution time of both synchronous and asynchronous functions using a custom decorator, we can use the `time`
