In [None]:
! pip install trl



In [None]:
! pip install -U bitsandbytes



# **Loading Datasets**

# **2. iamtarun/python_code_instructions_18k_alpaca**

* code + explanation

In [None]:
from datasets import load_dataset
alpaca_python = load_dataset(
    "iamtarun/python_code_instructions_18k_alpaca",
    split="train"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
alpaca_python

Dataset({
    features: ['instruction', 'input', 'output', 'prompt'],
    num_rows: 18612
})

In [None]:
alpaca_python  = alpaca_python.shuffle(seed=42).select(range(4000))

In [None]:
def process_iamtarun_alpaca(example):
    # When batched=True, example['instruction'] and example['input'] are lists
    combined_inputs = [
        instr + '\n' + inp for instr, inp in zip(example['instruction'], example['input'])
    ]
    return {
        "input": combined_inputs,
        "output": example["output"]
    }
alpaca_python = alpaca_python.map(process_iamtarun_alpaca, batched=True, remove_columns=alpaca_python.column_names)

In [None]:
alpaca_python

Dataset({
    features: ['input', 'output'],
    num_rows: 4000
})

# **teknium/OpenHermes-2.5**

In [None]:
hermes_dataset = load_dataset(
    "teknium/OpenHermes-2.5",
    split="train"
)

In [None]:
hermes_dataset = hermes_dataset.shuffle(seed=42).select(range(2000))

In [None]:
def convert_openhermes_to_input_output(examples):
    inputs = []
    outputs = []

    for conv in examples["conversations"]:
        user_parts = []
        assistant_reply = None

        for turn in conv:
            if turn["from"] == "human":
                user_parts.append(turn["value"].strip())
            elif turn["from"] == "gpt":
                assistant_reply = turn["value"].strip()

        # Join multiple human turns into a single string, separated by newlines
        inputs.append('\n'.join(user_parts))
        outputs.append(assistant_reply)

    return {
        "input": inputs,
        "output": outputs
    }


hermes_dataset = hermes_dataset.map(convert_openhermes_to_input_output,batched=True, remove_columns=hermes_dataset.column_names)

In [None]:
hermes_dataset['output'][0]

"After each iteration, the perimeter of the Sierpinski arrowhead curve increases by a factor of 2. Let's calculate the perimeter after the third iteration:\n\n1st iteration:\nPerimeter = 6 cm * 2 = 12 cm\n\n2nd iteration:\nPerimeter = 12 cm * 2 = 24 cm\n\n3rd iteration:\nPerimeter = 24 cm * 2 = 48 cm\n\nSo, the perimeter of the Sierpinski arrowhead curve after the third iteration is 48 cm."

# **Merging All Dataset**

In [None]:
from datasets import concatenate_datasets

final_dataset = concatenate_datasets(
    [hermes_dataset, alpaca_python]
).shuffle(seed=42)


In [None]:
final_dataset

Dataset({
    features: ['input', 'output'],
    num_rows: 6000
})

# **Train-Test Split**

In [None]:
dataset = final_dataset.train_test_split(test_size=0.2)

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input', 'output'],
        num_rows: 4800
    })
    test: Dataset({
        features: ['input', 'output'],
        num_rows: 1200
    })
})

# **Applying Chat-Template**

In [None]:
def convert_to_chatml(examples):
    batch_messages = []
    for  input, output in zip(examples['input'],examples["output"]):
        batch_messages.append(

            [
                {"role": "user", "content": input},
                {"role": "assistant", "content": output},
            ]
        )
    return {"messages": batch_messages}

In [None]:
dataset = dataset.map(convert_to_chatml, batched=True, remove_columns=['output', 'input'])

Map:   0%|          | 0/4800 [00:00<?, ? examples/s]

Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 4800
    })
    test: Dataset({
        features: ['messages'],
        num_rows: 1200
    })
})

# **Tokenizer**

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name =  "deepseek-ai/deepseek-coder-1.3b-instruct"


tokenizer = AutoTokenizer.from_pretrained(model_name)



In [None]:
dataset['train']['messages'][0]

[{'content': 'Optimize the given code in Python to reduce its complexity\nmy_list = [1, 2, 3, 4, 5]\ntotal = 0\n\nfor x in my_list:\n    total = total + x',
  'role': 'user'},
 {'content': 'my_list = [1, 2, 3, 4, 5]\ntotal = sum(my_list)\n\nprint("Sum of all elements in given list:", total)',
  'role': 'assistant'}]

# **Quantizing Model using QLoRA**

In [None]:
from transformers import BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.bfloat16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")

#device_map="auto" automatically spreads the model across available devices  to avoid memory issues.

In [None]:
#Reduces GPU memory usage during training by not storing intermediate activations.
model.gradient_checkpointing_enable()

In [None]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

# **Fine-tuning using SFT & Lora**

In [None]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=4,
    lora_alpha=8,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "o_proj"]
)

In [None]:
from trl import SFTConfig, SFTTrainer




training_args = SFTConfig(
    output_dir="./qlora_model",
    num_train_epochs = 1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps = 16,

    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,

    logging_steps=50,
    save_steps=50,      # or even 25
    save_total_limit=2,


    bf16 = True
)

In [None]:

trainer = SFTTrainer(
    model=model,
    args=training_args,
    peft_config=lora_config,
    train_dataset=dataset["train"],
    processing_class=tokenizer,
    formatting_func=None
)

Tokenizing train dataset:   0%|          | 0/4800 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/4800 [00:00<?, ? examples/s]

In [None]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 32014}.
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 3


[34m[1mwandb[0m: You chose "Don't visualize my results"
[34m[1mwandb[0m: Using W&B in offline mode.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
50,1.4016
100,0.9492
150,0.8847
200,0.9069
250,0.8798
300,0.9257




TrainOutput(global_step=300, training_loss=0.9913159688313802, metrics={'train_runtime': 6202.6295, 'train_samples_per_second': 0.774, 'train_steps_per_second': 0.048, 'total_flos': 1.1328849257877504e+16, 'train_loss': 0.9913159688313802})

# **Merging LoRA Adapter & Model**

In [None]:
import torch
from transformers import AutoModelForCausalLM
from peft import PeftModel

# 1. Load the base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name, torch_dtype=torch.float16, device_map="auto"
)

# 2. Load the PEFT model with adapter from the correct checkpoint directory
peft_model = PeftModel.from_pretrained(
    base_model, "./qlora_model/checkpoint-300", torch_dtype=torch.float16
)

# 3. Merge adapter weights with base model
merged_model = peft_model.merge_and_unload()

# **Uploading on HF Hub**

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from huggingface_hub import Repository

repo = Repository("PyCoder-QLoRA-v1", clone_from="Hhsjsnns/PyCoder-QLoRA-v1")

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/Hhsjsnns/PyCoder-QLoRA-v1 into local empty directory.


In [None]:
# Save both model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
merged_model.save_pretrained("PyCoder-QLoRA-v1")
tokenizer.save_pretrained("PyCoder-QLoRA-v1")

('PyCoder-QLoRA-v1/tokenizer_config.json',
 'PyCoder-QLoRA-v1/special_tokens_map.json',
 'PyCoder-QLoRA-v1/chat_template.jinja',
 'PyCoder-QLoRA-v1/tokenizer.json')

In [None]:
repo.git_add()
repo.git_commit("Add model and tokenizer files")
repo.git_push()

Upload file model.safetensors:   0%|          | 1.00/2.51G [00:00<?, ?B/s]

To https://huggingface.co/Hhsjsnns/PyCoder-QLoRA-v1
   e413e32..159fa7a  main -> main

   e413e32..159fa7a  main -> main



'https://huggingface.co/Hhsjsnns/PyCoder-QLoRA-v1/commit/159fa7a61f32d06c056bb7bd2a8a9f7f69454962'

# **Testing**

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name="Hhsjsnns/PyCoder-QLoRA-v1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [7]:
prompt = "Write a function that calculates the sum of numbers in a list"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=150,
    temperature=0.7,

)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.


Write a function that calculates the sum of numbers in a list.

def sum_numbers(numbers):
    total = 0
    for number in numbers:
        total += number
    return total

# Test the function
numbers = [1, 2, 3, 4, 5]
print(sum_numbers(numbers))  # Output: 15

# Test the function with a negative number
numbers = [1, -2, 3, -4, 5]
print(sum_numbers(numbers))  # Output: 3

# Test the function with a large number
numbers = [1] * 1000000
print


In [9]:
prompt = "What is python?"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=150,
    temperature=0.7,

)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:32021 for open-end generation.


What is python?


A: Python is a high-level, interpreted programming language that is designed to be easy to read and write. It is developed by Guido van Rossum and released in 1991. Python is used in many areas of computing, including web development, data analysis, machine learning, artificial intelligence, and more.

