In [1]:
!pip install peft bitsandBytes accelerate transformers datasets

Collecting bitsandBytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandBytes
Successfully installed bitsandBytes-0.47.0


In [2]:
!pip install GPUtil

Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25l[?25hdone
  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7392 sha256=b4ad4f1d59c0b7475a015d83475a91517db0dcd48187e00581232ced6cae3e0d
  Stored in directory: /root/.cache/pip/wheels/92/a8/b7/d8a067c31a74de9ca252bbe53dea5f896faabd25d55f541037
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0


In [3]:
import torch
import GPUtil
import os
GPUtil.showUtilization()

if torch.cuda.is_available():
  print("GPU is available")
else:
  print("GPU is not available")

os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='0'

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
GPU is available


In [4]:
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer
from huggingface_hub import notebook_login
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
if 'colab_gpu' in os.environ:
  from google.colab import output
  output.enable_custom_widget_manager()

In [6]:
base_model_id='meta-llama/llama-2-7b-chat-hf'
bnb_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)
model=AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    token='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'
)

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [7]:
!git clone https://github.com/poloclub/Fine-tuning-LLMs.git

Cloning into 'Fine-tuning-LLMs'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (47/47), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 47 (delta 14), reused 29 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (47/47), 9.34 MiB | 38.25 MiB/s, done.
Resolving deltas: 100% (14/14), done.


In [8]:
train_dataset=load_dataset(
    'text',
    data_files={'train':
                        ['/content/Fine-tuning-LLMs/data/hawaii_wf_1.txt','/content/Fine-tuning-LLMs/data/hawaii_wf_2.txt','/content/Fine-tuning-LLMs/data/hawaii_wf_3.txt']},split='train'
)

Generating train split: 0 examples [00:00, ? examples/s]

In [13]:
train_dataset['text'][11]

'There were multiple simultaneous fires on the island, with the most significant occurrence in Lahaina where 100 people would lose their lives.'

In [30]:
tokenizer=LlamaTokenizer.from_pretrained(
    base_model_id,
    use_fast=False,
    trust_remote_code=True,
    add_eos_token=True
)
if tokenizer.pad_token is None:
  tokenizer.add_special_tokens({'pad_token':tokenizer.eos_token})

In [31]:
tokenized_train_dataset=[]
for phrase in train_dataset:
  tokenized_train_dataset.append(tokenizer(phrase['text']))

In [32]:
tokenized_train_dataset[1]

{'input_ids': [1, 21122, 29979, 8079, 14861, 3120, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}

In [33]:
model.gradient_checkpointing_enable()
model=prepare_model_for_kbit_training(model)

config=LoraConfig(
    r=8,
    lora_alpha=64,
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','out_proj'],
    bias='none',
    lora_dropout=0.05,
    task_type='CAUSAL_LM'
)
model=get_peft_model(model,config)



In [34]:
trainer= transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    args=transformers.TrainingArguments(
        output_dir='./finetunedModel',
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        learning_rate=1e-4,
        max_steps=100,
        bf16=False,
        optim='paged_adamw_8bit',
        logging_dir='./log',
        save_strategy='epoch',
        save_steps=50,
        logging_steps=10

    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache=False

In [35]:
trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss
10,3.4345
20,3.0031
30,2.6696
40,2.6256
50,2.5245
60,2.5042
70,2.3625
80,2.0631
90,2.0773
100,1.9169


  return fn(*args, **kwargs)


TrainOutput(global_step=100, training_loss=2.518130130767822, metrics={'train_runtime': 437.6847, 'train_samples_per_second': 0.914, 'train_steps_per_second': 0.228, 'total_flos': 438976797622272.0, 'train_loss': 2.518130130767822, 'epoch': 1.474074074074074})

In [36]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from peft import PeftModel

base_model_id='meta-llama/Llama-2-7b-chat-hf'
nf4Config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer=LlamaTokenizer.from_pretrained(base_model_id,use_fast=False,trust_remote_code=True, add_eos_token=True)
base_model=AutoModelForCausalLM.from_pretrained(base_model_id,
                                                quantization_config=nf4Config,
                                                device_map='auto',
                                                trust_remote_code=True,
                                                use_auth_token=True)





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [37]:
modelfinetuned=PeftModel.from_pretrained(base_model,'finetunedModel/checkpoint-100')



In [39]:
user_question='According to the most recent census, what is the population of Lahaina?'
eval_prompt=f'question: {user_question} Just answer this question accuratly and concisely'
prompt_tokenized=tokenizer(eval_prompt,return_tensors='pt').to('cuda')
modelfinetuned.eval()
with torch.no_grad():
  print(tokenizer.decode(modelfinetuned.generate(**prompt_tokenized,max_new_tokens=1024)[0],skip_special_tokens=True))
  torch.cuda.empty_cache()

question: According to the most recent census, what is the population of Lahaina? Just answer this question accuratly and concisely...and the winner is...
The population of Lahaina, according to the 2020 census, is approximately 27,000 people.
