<a href="https://colab.research.google.com/github/JhanviMistry/LoRA/blob/main/LoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U torch datasets transformers peft bitsandbytes accelerate #pytorch, datasets and transformers from hugging face, peft -> parameter efficient fine tuning (LoRA), bitsandbytes to quantized the

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

In [None]:
import bitsandbytes as bnb

In [None]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto',
    trust_remote_code = True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True)

In [None]:
lora_config = LoraConfig(
    r = 8, #rank
    lora_alpha = 16,
    target_modules = ['q_proj', 'v_proj'], #lora is applied on query and value matrix
    lora_dropout = 0.05,
    bias = 'none',
    task_type = TaskType.CAUSAL_LM
)

#take the moedl and turn it into an instance of peft model
model = get_peft_model(model, lora_config)

In [None]:
data = load_dataset('openai/gsm8k', 'main', split = 'train[:200]') #first 200 data

In [None]:
#tokenize a batch of texts
def tokenize(batch):
  texts = [
      f"### Instruction:\n{instruction} \n### Response:\n{out}"
      for instruction, out in zip(batch['question'], batch['answer'])
  ]

  tokens = tokenizer(
      texts,
      padding = 'max_length',
      max_length = 256,
      truncation = True,
      return_tensors = 'pt' #pytorch

  )

  tokens['labels'] = tokens['input_ids'].clone()

  return tokens


In [None]:
tokenized_data = data.map(tokenize, batched = True, remove_columns = data.column_names) #wil tokenize the data remove all the columns

In [None]:
#define training arguments and trainer and then train
#training arguments
training_args = TrainingArguments(
    output_dir = './LoRA_tinyllama_output',#output directory of the model, the name is given by user
    per_device_train_batch_size = 4, #stronger GPU -> more value, week GPU set to 1
    gradient_accumulation_steps = 4, #increase the effective batch size
    learning_rate = 2e-4, #heavy learning then learning rate is 1e-4, but if it messes up the loss then 1e-3
    num_train_epochs = 30,
    fp16 = True, #floating point 16 is True, using the path precision to save the memory
    logging_steps = 20, #so we dont have to see everything all the time
    save_strategy = 'epoch',
    report_to = 'none',
    remove_unused_columns = False,
    label_names = ['labels']

    )

In [None]:
# define trainer
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_data,
    processing_class = tokenizer
)

In [None]:
trainer.train()

In [None]:
# save the model and tokenizer
model.save_pretrained('./LoRA_tinyllama_tuned_adapter_model')
tokenizer.save_pretrained('./LoRA_tinyllama_')

This trained adapter can used further...