<a href="https://colab.research.google.com/github/GusdPaula/ML_interview/blob/main/LoRA_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Queen LoRA Fine-Tunning

## Importing the model

In [None]:
!pip install trl

In [39]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import json
import torch
from datasets import load_dataset

model_name = "Qwen/Qwen2.5-0.5B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)




In [40]:
# Detect device
device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"

### Function to "talk" with the model

In [41]:
def ask(prompt):
  messages = [
      {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
      {"role": "user", "content": prompt}
  ]
  text = tokenizer.apply_chat_template(
      messages,
      tokenize=False,
      add_generation_prompt=True
  )
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

  generated_ids = model.generate(
      **model_inputs,
      max_new_tokens=512
  )
  generated_ids = [
      output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
  ]

  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

  return response

### Checking if the model know something about the singer

In [42]:
prompt = "Who was Freddie Mercury?"

In [43]:
response = ask(prompt)

In [44]:
print(response)

Freddie Mercury (1946–2005) was the lead singer of the English rock band Queen and one of the most famous figures in popular music history. He was born Freddie William "Fred" Mercury on January 26, 1946, in Manchester, England.

Mercury rose to fame as a child with his father's love for pop music. At age 17, he began performing at local clubs and schools. By the time he was 18, he had already become a successful solo artist and an international star.

In 1963, Mercury formed Queen with Brian May and John Deacon. The band's debut album, "A Night at the Opera," became a commercial success, and it led to Mercury's rise to superstardom. Throughout their career, Mercury released over 40 studio albums and numerous singles, including hits like "Bohemian Rhapsody."

Despite his immense popularity, Mercury struggled with personal issues throughout his life. In 1991, he ended his relationship with May after 18 years of marriage, citing mental health problems. Mercury's death occurred on August 1

## Fine-tunning the model

In [7]:
# The model needs to add padding so the shape matches
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

In [None]:
# Reading the data
train_dataset = load_dataset('json', data_files='train_data.json')
test_dataset = load_dataset('json', data_files='test_data.json')

import json

with open('train_data.json', 'r') as f:
    train_data_content = json.load(f)

with open('test_data.json', 'r') as f:
    test_data_content = json.load(f)

print("Train data content:")
print(json.dumps(train_data_content, indent=2))

print("\nTest data content:")
print(json.dumps(test_data_content, indent=2))

In [None]:
from transformers import DataCollatorForLanguageModeling

# Tokenize data using chat template
def preprocess_function(example):
    messages = example["messages"]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    tokenized_output = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)
    return {"input_ids": tokenized_output["input_ids"], "attention_mask": tokenized_output["attention_mask"]}


tokenized_train_dataset = train_dataset.map(preprocess_function, batched=False)
tokenized_eval_dataset = test_dataset.map(preprocess_function, batched=False)

In [None]:
# Define LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    warmup_steps=50,
    max_steps=200,
    learning_rate=5e-5,
    logging_steps=10,
    output_dir="./qwen2.5-0.5b-finetuned",
    logging_dir="./logs"
)

# Fine-tune using SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_train_dataset['train'], # Access the 'train' split of the dataset
    eval_dataset=tokenized_eval_dataset['train'],  # Access the 'train' split of the dataset for eval
    peft_config=lora_config,
    args=training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

trainer.train()

# Save adapter weights (LoRA)
trainer.model.save_pretrained("./qwen2.5-0.5b-finetuned")

# Save tokenizer
tokenizer.save_pretrained("./qwen2.5-0.5b-finetuned")

In [23]:
# Inference function
def generate_response_with_model(prompt, model, tokenizer, max_new_tokens=100, temperature=0.7, top_p=0.9):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(formatted, return_tensors="pt", padding=True, return_attention_mask=True)
    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    with torch.no_grad():
        output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Load base model and tokenizer
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Load and merge LoRA adapter
lora_path = "./qwen2.5-0.5b-finetuned"
peft_model = PeftModel.from_pretrained(base_model, lora_path, is_trainable=False)
merged_model = peft_model.merge_and_unload()



In [45]:
prompt =  "Who was Freddie Mercury?"

print("\n🔸 Fine-tuned LoRA model output:")
print(generate_response_with_model(prompt=prompt, model=merged_model, tokenizer=tokenizer))


🔸 Fine-tuned LoRA model output:
system
You are a helpful assistant.
user
Who was Freddie Mercury?
assistant
Freddie Mercury was the lead singer of the British rock band Queen, known for his powerful voice, flamboyant stage presence, and songwriting. He was born in 1946 and passed away in 1991.
