In [None]:
!pip install -q bitsandbytes datasets accelerate loralib sentencepiece
!pip install tensorboardX
!pip install -q git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/peft.git

In [None]:
import os
os.kill(os.getpid(), 9)

In [1]:
import transformers
from transformers import LlamaTokenizer, LlamaForCausalLM, AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training

import os
import torch
import torch.nn as nn




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin c:\Users\panta\anaconda3\envs\nlp\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.dll
CUDA SETUP: CUDA runtime path found: C:\Users\panta\anaconda3\envs\nlp\bin\cudart64_110.dll
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary c:\Users\panta\anaconda3\envs\nlp\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.dll...


  warn(msg)
  warn(msg)


In [2]:
BASE_MODEL = "facebook/opt-350m"

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

tokenizer.pad_token_id = 0

In [None]:
BASE_MODEL = "decapoda-research/llama-7b-hf"

model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)

tokenizer.pad_token_id = 0

In [3]:
model = prepare_model_for_int8_training(model)
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 1572864 || all params: 332769280 || trainable%: 0.472659014678278


In [4]:
def tokenize_sample(item, max_seq_length=1024, add_eos_token=True):
        result = tokenizer(
            item,
            truncation=True,
            max_length=max_seq_length,
            padding=True,
        )
        result = {
            "input_ids": result["input_ids"][:-1],
            "attention_mask": result["attention_mask"][:-1],
        }
        if (
            result["input_ids"][-1] != tokenizer.eos_token_id
            and len(result["input_ids"]) < max_seq_length
            and add_eos_token
        ):
            result["input_ids"].append(tokenizer.eos_token_id)
            result["attention_mask"].append(1)
        
        return result

In [5]:
def generate_prompt(data_point):
    return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Describe the caption using appropriate tags
### Inputs:
{data_point['caption_string']}
### Response:
{data_point['tag_string']}
    """

In [6]:
from datasets import load_dataset

data = load_dataset("json", data_files=r'dataset/train_data.json')
data = data["train"].train_test_split(test_size=0.05, shuffle=True, seed=42)
data = data.map(lambda x: tokenize_sample(generate_prompt(x)))
data

Found cached dataset json (C:/Users/panta/.cache/huggingface/datasets/json/default-fac367448397b4f6/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at C:\Users\panta\.cache\huggingface\datasets\json\default-fac367448397b4f6\0.0.0\fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e\cache-a753b907c50531b5.arrow and C:\Users\panta\.cache\huggingface\datasets\json\default-fac367448397b4f6\0.0.0\fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e\cache-402f08e87db1eb6b.arrow


Map:   0%|          | 0/18952 [00:00<?, ? examples/s]

In [None]:
trainer = transformers.Trainer(
    model=model, 
    train_dataset=data['train'],
    eval_dataset=data['test'],
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4, 
        gradient_accumulation_steps=32,
        num_train_epochs=3,
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=2,
        optim="adamw_torch",
        evaluation_strategy="steps",
        eval_steps=20,
        output_dir='outputs',
#         save_strategy="steps",
#         save_steps=20,
#         save_total_limit=2,
        report_to="tensorboard"
    ),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

In [None]:
batch = tokenizer("### Caption: Eromame's art features a female Corrin and Incineroar in armor from Fire Emblem, with long hair, a hairband, pointy ears, and an open mouth, against a simple background, and posted on their Twitter.", return_tensors='pt').to("cuda")

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=500, no_repeat_ngram_size=0)

print(tokenizer.decode(output_tokens[0], skip_special_tokens=True))

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
model.push_to_hub("ooferdoodles/text2tags-opt-350m", use_auth_token=True)

In [None]:
model.save_pretrained(r"loras/tagger-v2")

In [None]:
%load_ext tensorboard