<a href="https://colab.research.google.com/github/JuanManuelHuerta/QLora_PEFT/blob/main/QLora1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:


!pip install bitsandbytes
!pip install torch
!pip install transformers
!pip install accelerate
!pip install scipy
!pip install peft
!pip install datasets
!pip install wandb


import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


####  https://github.com/huggingface/peft

model_name = "EleutherAI/gpt-neox-20b"

#Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)


quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config, device_map={"":0})


model.gradient_checkpointing_enable()


from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)



from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

#data = load_dataset("Anthropic/hh-rlhf")
#data = data.map(lambda samples: tokenizer(samples["chosen"]), batched=True)

import transformers

tokenizer.pad_token = tokenizer.eos_token
#tokenizer.truncation = True

trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        warmup_steps=16,
        max_steps=40,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
trainer.train()





while True:
    text = input("Enter prompt here:")
    device = "cuda:0"
    inputs = tokenizer(text, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=20)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
v01.py (END)



Collecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.41.1
Collecting transformers
  Downloading transformers-4.32.0-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m63.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m107.1

Downloading (…)okenizer_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/457k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/60.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/46 [00:00<?, ?it/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/926M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/910M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/604M [00:00<?, ?B/s]

Downloading (…)of-00046.safetensors:   0%|          | 0.00/620M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/46 [00:00<?, ?it/s]

Downloading readme:   0%|          | 0.00/5.55k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/647k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradi

Step,Training Loss
1,2.5988
2,2.4501
3,1.884
4,2.4915
5,2.7002
6,2.3567
7,2.5236
8,2.6765
9,2.5551
10,1.719


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...

Enter prompt here:This is a test


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is in

This is a test." "This is a test." "This is a test." "This is a test." "
Enter prompt here:A man is not worth much if he


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
`use_cache=True` is in

A man is not worth much if he is not worth anything to anyone.”

“A man is not worth much if he is not


KeyboardInterrupt: ignored