## Finetuning a Large Language Model
##### Step by step guide to performing Low Rank Adaptation For Finetuning (LoRA) of LLMs


In [1]:
!python3 -m pip install --upgrade pip
!pip install accelerate -U
!pip install peft
!pip install datasets
!pip install transformers[torch]
!pip install bitsandbytes
!pip install accelerate

Collecting pip
  Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-24.0
Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1

In [2]:
import os
import random
import string
from datetime import datetime
from typing import List
import torch
import transformers
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TFAutoModelForSequenceClassification
from datasets import Dataset

### Import a LLM from HuggingFace

For more info about the used model: https://huggingface.co/Salesforce/codegen-350M-mono

In [3]:
checkpoint = "Salesforce/codegen-350M-mono"
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/999 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/797M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/240 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

### Run an example prediction/inference

In [4]:
text = "def hello_world():"

completion = model.generate(**tokenizer(text, return_tensors="pt").to("cuda"))

print(tokenizer.decode(completion[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


def hello_world():
    print("Hello World")

hello_world()




### Prepare model for training

In [5]:
GRADIENT_CHECKPOINTING = False
PER_DEVICE_TRAIN_BATCH_SIZE = 1
WARMUP_STEPS = 0
EPOCHS = 100
LEARNING_RATE = 1e-5
R = 32
LORA_ALPHA = R
LORA_DROPOUT = 0.1

In [6]:
print(model)

CodeGenForCausalLM(
  (transformer): CodeGenModel(
    (wte): Embedding(51200, 1024)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-19): 20 x CodeGenBlock(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): CodeGenAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (qkv_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (out_proj): Linear(in_features=1024, out_features=1024, bias=False)
        )
        (mlp): CodeGenMLP(
          (fc_in): Linear(in_features=1024, out_features=4096, bias=True)
          (fc_out): Linear(in_features=4096, out_features=1024, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=51200, bias=True)
)


In [6]:
def find_target_modules(model) -> List[str]:
    """
    Identify linear layers in the model and return as list.
    """
    layers = set()
    for name, module in model.named_modules():
        if "Linear" in str(type(module)):
            layer_type = name.split('.')[-1]
            layers.add(layer_type)

    return list(layers)

target_modules = find_target_modules(model)
target_modules

['qkv_proj', 'lm_head', 'fc_in', 'out_proj', 'fc_out']

In [7]:
peft_config = LoraConfig(
    task_type="Causal_LM", inference_mode=False, r=R, lora_alpha=LORA_ALPHA, lora_dropout=0.1, target_modules=target_modules
)

In [8]:
model = get_peft_model(model, peft_config)

In [9]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token = tokenizer.eos_token

In [10]:
letters = string.ascii_letters

prompts = []
length = 100
for i in range(3):
    random_string = ''.join(random.choice(letters) for i in range(length))
    #prompts.append("def hello_world():" + random_string)
    prompts.append("def hello_world():" + "print('hello world')")

data = [{"text": x} for x in prompts]
dataset = Dataset.from_dict({"text": [item["text"] for item in data[:]]})
tokenized_dataset = dataset.map(lambda x : tokenizer(x["text"]), batched=True)

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [11]:
dataset['text']

["def hello_world():print('hello world')",
 "def hello_world():print('hello world')",
 "def hello_world():print('hello world')"]

In [12]:
train_args = transformers.TrainingArguments(
    per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,
    warmup_steps=WARMUP_STEPS,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    logging_steps=100,
    save_total_limit=1,
    output_dir=os.path.join('.', datetime.now().strftime("%Y%m%d%H%M%S")),
    gradient_checkpointing=GRADIENT_CHECKPOINTING
)

In [13]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=train_args,
    callbacks=[],
    data_collator=transformers.DataCollatorForLanguageModeling(
        tokenizer, mlm=False
    )
)

### Start training

In [14]:
loss = trainer.train()

Step,Training Loss
100,0.9521
200,0.0008
300,0.0004


In [None]:
trainer.save_model('peft_model')

### Run an inference after fine-tuning

In [None]:
# torch.cuda.empty_cache()

In [16]:
text = "def hello_world():"

completion = model.generate(**tokenizer(text, return_tensors="pt").to("cuda"), max_length=10)

print(tokenizer.decode(completion[0]))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


def hello_world():print('hello world')
