# LoRA-tuning Script

## 1️⃣ Install dependencies

In [None]:

!pip install transformers==4.40.2 peft==0.10.0 datasets==2.19.0
!pip install accelerate
!pip install -U bitsandbytes
!pip install -i https://pypi.org/simple/ bitsandbytes

Looking in indexes: https://pypi.org/simple/


## 2️⃣ Mount Google Drive

In [1]:

from google.colab import drive
drive.mount('/content/drive')

# Put your messages.jsonl here:
DATASET_PATH = "/content/drive/MyDrive/Work/Miss-you/tinyllama_chat.jsonl"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 3️⃣ Load model & tokenizer

In [2]:

from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## 4️⃣ Prepare LoRA config

In [3]:

from peft import LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)



## 5️⃣ Load and format dataset


In [4]:

from datasets import load_dataset
from transformers import AutoTokenizer
import torch

# -------------------------
# 1️⃣ Load dataset
# -------------------------
dataset = load_dataset("json", data_files=DATASET_PATH, split="train")

# -------------------------
# 2️⃣ Flatten conversation to single text for causal LM
# -------------------------
def flatten_conversation(example):
    text = ""
    for msg in example["messages"]:
        role_token = "<|user|>" if msg["role"] == "user" else "<|assistant|>" if msg["role"] == "assistant" else ""
        text += f"{role_token}\n{msg['content']}\n"
    return {"text": text}

dataset = dataset.map(flatten_conversation)

# -------------------------
# 3️⃣ Tokenize
# -------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized = dataset.map(tokenize, batched=True)

# -------------------------
# 4️⃣ Add labels for causal LM
# -------------------------
def add_labels(batch):
    batch["labels"] = batch["input_ids"].copy()
    return batch

tokenized = tokenized.map(add_labels, batched=True)

print("✅ Dataset ready for LoRA training!")





Map:   0%|          | 0/1834 [00:00<?, ? examples/s]

Map:   0%|          | 0/1834 [00:00<?, ? examples/s]

Map:   0%|          | 0/1834 [00:00<?, ? examples/s]

✅ Dataset ready for LoRA training!


## 6️⃣ Training


In [7]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/lora-out",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=10,
    save_total_limit=2,
    bf16=True,
    optim="paged_adamw_8bit",
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
)

trainer.train()


model.save_pretrained("/content/drive/MyDrive/lora-adapter")
print("✅ LoRA training complete! Adapter saved to Drive.")


  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mamirreza-asari2[0m ([33mamirreza-asari2-kharazmi-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,1.7112
20,1.1652
30,0.5097
40,0.1035
50,0.1157
60,0.0471
70,0.0597
80,0.002
90,0.0018
100,0.0012


Step,Training Loss
10,1.7112
20,1.1652
30,0.5097
40,0.1035
50,0.1157
60,0.0471
70,0.0597
80,0.002
90,0.0018
100,0.0012




✅ LoRA training complete! Adapter saved to Drive.




## 8️⃣ Test your tuned model

In [37]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import re

base = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto", torch_dtype=torch.float16)
betterModel = PeftModel.from_pretrained(base, "/content/drive/MyDrive/lora-adapter")
betterModel = betterModel.merge_and_unload()  # optional: merge LoRA into base
betterModel.eval()

# for name, param in model.named_parameters():
#     if "lora" in name:
#         print(name, param.requires_grad)


prompt = """<|system|>
Continue the chat as Sara in her tone.
<|user|>
سلام. چطوری؟
<|assistant|>
"""


inputs = tokenizer(prompt, return_tensors="pt").to(base.device)
outputs = betterModel.generate(
    **inputs,
    max_new_tokens=80,
    do_sample=False,
    temperature=0.7,
    num_return_sequences=1,
)

decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)

# Find the first <|assistant|> ... before the next <|user|> or end of text
match = re.search(r"<\|assistant\|>\s*(.*?)(?=<\|user\||$)", decoded, re.DOTALL)
print(decoded)
if match:
    first_assistant_reply = match.group(1).strip()
    print("reply: ",first_assistant_reply)
else:
    print("No assistant reply found.")

<s> <|system|>
Continue the chat as Sara in her tone.
<|user|>
سلام. چطوری؟
<|assistant|>
هایی. بورس لایت تو خوبی؟
<|user|>
آوو تو خبرس لایت?
<|assistant|>
اوم همونطوری که بورس لا
reply:  هایی. بورس لایت تو خوبی؟
