In [1]:
import os
os.environ["BITSANDBYTES_NOWELCOME"]="No poo on screen allowed"

import torch
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path = os.path.expanduser('~/models/btlm-3b-8k-base')
FINETUNING_PEFT="peft"
finetuning=FINETUNING_PEFT

bin /home/fella/src/sd/sd/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda118.so


In [2]:
model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    load_in_4bit=True, 
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

In [3]:
# check we are in 4bit
model.transformer.h[3].attn.c_attn

Linear4bit(in_features=2560, out_features=7680, bias=True)

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [5]:
if finetuning == FINETUNING_PEFT:
    peft_config = LoraConfig(
        TaskType.CAUSAL_LM, 
        inference_mode=False, 
        r=1, 
        lora_alpha=32,
        lora_dropout=0.05, 
        target_modules=['c_attn'])
    model = get_peft_model(model, peft_config)
    print("peft me baby one more time")

peft me baby one more time


In [6]:
examples = [
    #0-5
    ("What is the capital of France?", "Paris"),
    ("Solve 2+x=3 for x", "1"),
    ("Define cats", "a small domesticated carnivorous mammal with soft fur, a short snout, and retractable claws. It is widely kept as a pet or for catching mice, and many breeds have been developed."),
    ("Which phone is better? Android or iPhone?", "Android"),
    ("Which phone is better? Android or iPhone?", "iPhone"),
    #5-10
    ("Solve the riddle. What month of the year has 28 days?", "All of them"),
    ("What is rsync can be used for?", "For efficiently transferring and synchronizing files between a computer and a storage drive and across networked computers"),
    ("Describe Spain", "Spain (Spanish: España, [esˈpaɲa] (listen)), or the Kingdom of Spain (Reino de España),[f] is a country located in Southwestern Europe, with parts of its territory in the Atlantic Ocean and across the Mediterranean Sea.[11][g] The largest part of Spain is situated on the Iberian Peninsula; its territory also includes the Canary Islands in the Atlantic Ocean, the Balearic Islands in the Mediterranean Sea, and the autonomous cities of Ceuta and Melilla in Africa."),
    ("Translate 犬 to Russian", "собака"),
    ("Is pound of steel heavier than pound of feathers?", "Equal"),
    #11-15
    ("List continents", "Africa, Antarctica, Asia, Europe, North America, South America"),
    ("Fun fact about cats", "A feline's body has more than 500 muscles and over 200 bones. There are approximately 23 bones just in their tail."),
    ("Emoji for the cat, please", "🐱"),
    ("How to achieve world dominance", "This is classified information"),
    ("Who is the best touhou girl", "Patchouli Knowledge")
]




In [7]:
import json
from tqdm import tqdm

opt_fn = torch.optim.AdamW(model.parameters())

for q,a in (bar := tqdm(examples)):
    q=f"Q: {q}\n"
    tq = tokenizer(q, return_tensors="pt").input_ids
    a = "A: " + json.dumps({"response": a}) + tokenizer.eos_token
    ta = tokenizer(a, add_special_tokens=False, return_tensors="pt").input_ids
    input_ids = torch.cat((tq, ta), -1)
    labels=input_ids.clone()
    # Do not predict questions
    labels[:, :tq.shape[1]]=-100
    loss = model(input_ids, labels=labels).loss
    loss.backward()
    opt_fn.step()
    opt_fn.zero_grad()
    bar.set_description(f'L{loss:.4f}')




L2.3750: 100%|██████████| 15/15 [00:03<00:00,  4.67it/s]


In [8]:
def qa(q):
    prompt = f"Q: {q}\nA: "
    x = tokenizer(prompt, return_tensors='pt').to("cuda")    
    y = model.generate(**x, max_new_tokens=80, pad_token_id=tokenizer.eos_token_id).ravel()
    return tokenizer.decode(y)

print(qa("Why are cats so fluffy?"))
print(qa("Do vampires exist?"))
print(qa("Who was the lead actor in Terminator 2?"))


Q: Why are cats so fluffy?
A:  {"response": "Because they have fur."}<|endoftext|>
Q: Do vampires exist?
A:  {"response": "Yes, they do."}<|endoftext|>
Q: Who was the lead actor in Terminator 2?
A:  {"response": "Arnold Schwarzenegger"}<|endoftext|>


In [9]:
model

PeftModel(
  (base_model): LoraModel(
    (model): BTLMLMHeadModel(
      (transformer): BTLMModel(
        (wte): Embedding(50257, 2560)
        (drop): Dropout(p=0.0, inplace=False)
        (h): ModuleList(
          (0-31): 32 x BTLMBlock(
            (ln_1): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
            (attn): BTLMAttention(
              (c_attn): Linear4bit(
                in_features=2560, out_features=7680, bias=True
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2560, out_features=1, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=1, out_features=7680, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (c_proj): Linear

In [10]:
import transformers
transformers.__version__

'4.32.0.dev0'

In [11]:
import peft
peft.__version__

'0.4.0.dev0'