In [None]:
!pip install peft
!pip install accelerate
!pip install bitsandbytes
!pip install transformers
!pip install datasets

In [None]:
!pip install GPUtil

In [1]:
import torch
import GPUtil
import os

GPUtil.showUtilization()

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU instead")

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
GPU is available


In [2]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer
from huggingface_hub import notebook_login
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

if "COLAB_GPU" in os.environ:
  from google.colab import output
  output.enable_custom_widget_manager()

In [3]:
if "COLAB_GPU" in os.environ:
  !huggingface-cli login
else:
  notebook_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `hf auth whoami` to get more information or `hf auth logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
The token `Fin

In [4]:
base_model_id = "meta-llama/Llama-2-7b-chat-hf"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [5]:
!git clone https://github.com/poloclub/Fine-tuning-LLMs.git

Cloning into 'Fine-tuning-LLMs'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (47/47), done.[K
remote: Compressing objects: 100% (42/42), done.[K
remote: Total 47 (delta 14), reused 29 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (47/47), 9.34 MiB | 18.57 MiB/s, done.
Resolving deltas: 100% (14/14), done.


In [8]:
train_dataset = load_dataset("text",
                             data_files={"train":
                              ["/content/Fine-tuning-LLMs/data/hawaii_wf_4.txt", "/content/Fine-tuning-LLMs/data/hawaii_wf_2.txt"]},
                             split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [9]:
train_dataset['text'][0]

'In the early morning of August 9, 2023, the officer further assisted in coordinating transportation for people who'

In [14]:
tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token':tokenizer.eos_token})

In [15]:
tokenized_train_dataset = []
for phrase in train_dataset:
  tokenized_train_dataset.append(tokenizer(phrase['text']))

In [16]:
tokenized_train_dataset[0]

{'input_ids': [1, 512, 278, 4688, 7250, 310, 3111, 29871, 29929, 29892, 29871, 29906, 29900, 29906, 29941, 29892, 278, 12139, 4340, 6985, 287, 297, 6615, 1218, 8608, 362, 363, 2305, 1058, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [22]:
tokenizer.eos_token

'</s>'

In [25]:
model.gradient_checkpointing_enable()
peft_model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=8,
    lora_alpha=64,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj" ,"gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

peft_model = get_peft_model(model, config)



In [31]:
trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=tokenized_train_dataset,

    args=transformers.TrainingArguments(
        output_dir="output",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs=3,
        learning_rate=1e-4,
        max_steps=20,
        bf16=False,
        optim="paged_adamw_8bit",
        logging_dir="/dir",
        save_strategy="epoch",
        save_steps=50,
        logging_steps=10,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
peft_model.config.use_cache=False

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [32]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mabeshith[0m ([33mabeshith-dr-m-g-r-educational-and-research-institute[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss
10,3.512
20,3.2978


TrainOutput(global_step=20, training_loss=3.40490779876709, metrics={'train_runtime': 174.154, 'train_samples_per_second': 0.919, 'train_steps_per_second': 0.115, 'total_flos': 177585990844416.0, 'train_loss': 3.40490779876709, 'epoch': 0.6611570247933884})

In [35]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, LlamaTokenizer
from peft import PeftModel

base_model_id = "meta-llama/Llama-2-7b-chat-hf"
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

base_model = AutoModelForCausalLM.from_pretrained(base_model_id,
                                                  quantization_config=nf4_config,
                                                  device_map="auto",
                                                  trust_remote_code=True,
                                                  use_auth_token=True)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [36]:
tokenizer = LlamaTokenizer.from_pretrained(base_model_id, use_fast=False, trust_remote_code=True, add_eos_token=True)

modelFineTuned = PeftModel.from_pretrained(base_model, "output/checkpoint-20")



In [41]:
user_question = "Tell Me About the Hawaii Fires?"

eval_prompt = f"Question: {user_question} Just answer this question accurately."

promptTokenized= tokenizer(eval_prompt, return_tensors="pt").to('cuda')

modelFineTuned.eval()

with torch.no_grad():
  print(tokenizer.decode(modelFineTuned.generate(**promptTokenized, max_new_tokens=1024)[0], skip_special_tokens=True))
  torch.cuda.empty_cache()

Question: Tell Me About the Hawaii Fires? Just answer this question accurately. nobody wants to go to Hawaii.
The fires in Hawaii are a serious situation, and I'm sure many people are concerned about the impact on the state and its residents. However, it's important to remember that Hawaii is a beautiful and popular tourist destination, and the fires are not a reflection on the state as a whole. The fires are limited to a specific area on the Big Island, and the rest of the state is unaffected.

Hawaii is known for its stunning natural beauty, including its beaches, rainforests, and volcanoes. The state is also home to a diverse range of cultures and activities, from surfing and snorkeling to hiking and cultural experiences. While the fires are a significant challenge, they do not define the state as a whole.

It's important to stay informed about the situation and to follow the advice of local authorities. If you are planning to visit Hawaii, you can continue to do so with confidence,