In [8]:
!git clone https://github.com/tloen/alpaca-lora.git



Cloning into 'alpaca-lora'...
remote: Enumerating objects: 607, done.[K
remote: Counting objects: 100% (51/51), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 607 (delta 28), reused 34 (delta 19), pack-reused 556[K
Receiving objects: 100% (607/607), 27.78 MiB | 7.48 MiB/s, done.
Resolving deltas: 100% (360/360), done.


In [1]:
!pip install datasets loralib sentencepiece
!pip install bitsandbytes
!pip install -q datasets
!pip install transformers
!pip install peft
!pip install accelerate
!conda install -y cudatoolkit



[0mCollecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [None]:
#!cp /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so

In [3]:
import os
os.environ['LD_LIBRARY_PATH'] = '/opt/conda/lib/'

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model

In [None]:
#!python -m bitsandbytes


In [4]:
!nvidia-smi
torch.cuda.is_available()

Sat Apr 22 21:28:10 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090         On | 00000000:0A:00.0 Off |                  Off |
|  0%   56C    P8               37W / 450W|      3MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

True

In [5]:
# Setting for A100 - For 3090 
MICRO_BATCH_SIZE = 4  # change to 4 for 3090
BATCH_SIZE = 128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 2  # paper uses 3
LEARNING_RATE = 2e-5  # from the original paper
CUTOFF_LEN = 256  # 256 accounts for about 96% of the data
LORA_R = 4
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

In [6]:
from accelerate import init_empty_weights 


tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf", add_eos_token=True)
model = LlamaForCausalLM.from_pretrained("decapoda-research/llama-7b-hf", load_in_8bit=True, device_map="auto",)


from peft import prepare_model_for_int8_training 
model = prepare_model_for_int8_training(model)


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [9]:
%cd alpaca-lora/
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
data = load_dataset("json", data_files="alpaca_data.json")


/usr/src/train/alpaca-lora
Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-43835e4f97f6a4b4/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-43835e4f97f6a4b4/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
def generate_prompt(data_point):
    # sorry about the formatting disaster gotta move fast
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Input:
{data_point["input"]}
### Response:
{data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Response:
{data_point["output"]}"""


data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=True,
        max_length=CUTOFF_LEN,
        padding="max_length",
    )
)


Map:   0%|          | 0/52002 [00:00<?, ? examples/s]

In [None]:
# for debug only

#print (mydata ['train'] [30000])
#print (generate_prompt(mydata['train'][3000]))
#myres = tokenizer (generate_prompt(mydata['train'][3000]))
#print (myres)
#print (tokenizer.decode(myres['input_ids']))

#print (data['train'][3])

In [11]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=100,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=1,
        output_dir="lora-alpaca",
        save_total_limit=3,
        max_steps=100
        
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)

!cd ..
model.save_pretrained("lora-alpaca7b")
tokenizer.save_pretrained("lora-alpaca7b")



Step,Training Loss
1,2.1871
2,2.1395
3,2.2145
4,2.2101
5,2.197
6,2.1361
7,2.1161
8,2.115
9,2.1077
10,2.123


UsageError: Line magic function `%cd..` not found.


In [12]:
!cd ..
model.save_pretrained("lora-alpaca7b")
tokenizer.save_pretrained("lora-alpaca7b")

('lora-alpaca7b/tokenizer_config.json',
 'lora-alpaca7b/special_tokens_map.json',
 'lora-alpaca7b/tokenizer.model',
 'lora-alpaca7b/added_tokens.json')

In [14]:
!git config --global credential.helper store
from huggingface_hub import notebook_login

notebook_login()


Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [15]:
model.push_to_hub("fischi007/lora-alpaca7b-100", use_auth_token=True)

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/8.44M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/fischi007/lora-alpaca7b-100/commit/6d3e67c257761614bf22b7d3fb1d258f3bff3d91', commit_message='Upload model', commit_description='', oid='6d3e67c257761614bf22b7d3fb1d258f3bff3d91', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
#
# Testing
#
from transformers import GenerationConfig



PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Write me an short summary about training LLMs.
### Response:"""

inputs = tokenizer(
    PROMPT,
    return_tensors="pt",
)
input_ids = inputs["input_ids"].cuda()

generation_config = GenerationConfig(
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.15,
)
print("Generating...")
generation_output = model.generate(
    input_ids=input_ids,
    generation_config=generation_config,
    return_dict_in_generate=True,
    output_scores=True,
    max_new_tokens=128,
)
for s in generation_output.sequences:
    print(tokenizer.decode(s))