In [1]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
from peft import PeftModel

In [2]:
# Constants
llama_og_path = "./models/llama-7b-huggingface"
llama_token_path = "./models/llama-7b-huggingface"
train_dataset = "./llama_datasets/grammar_dataset/"  #gtrain_10k.csv"
test_dataset = "./llama_datasets/grammar_dataset/"  #grammar_validation.csv"

INPUT_START_LABEL = "[ISTART]"
INPUT_END_LABEL = "[IEND]"
OUTPUT_START_LABEL = "[OSTART]"
OUTPUT_END_LABEL = '[OEND]'

In [3]:
train_dataset = load_dataset(train_dataset, split='train')
eval_dataset  = load_dataset(test_dataset, split='validation')

In [4]:
tokenizer = AutoTokenizer.from_pretrained(
    llama_token_path,
    model_max_length=512,
    # padding_side="left",
    add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


In [5]:
def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=512,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [13]:
def generate_and_tokenize_prompt(data_point):
    target = data_point['input']
    result = data_point['target']
    
    full_prompt = f"You will see two sentences. The first is marked INCORRECT and has a plethora of spelling and grammatical issues, the second is marked CORRECT and shows the fixed version of the prior
        sentence. INCORRECT: {target} CORRECT: {result}"
    return tokenize(full_prompt)

In [14]:
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/2988 [00:00<?, ? examples/s]

In [15]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    llama_og_path,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
)


ValueError: 
                    Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
                    quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
                    in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to
                    `from_pretrained`. Check
                    https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                    for more details.
                    

In [16]:
ft_model = PeftModel.from_pretrained(base_model, "./brev-grab/checkpoint-300")

In [11]:
eval_prompt = "One possible outcome is that an environmentally-induced reduction in motorization levels in the richer countries will outweigh any rise in motorization levels in the poorer countries."
model_input = tokenizer(eval_prompt, return_tensors="pt")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=150, repetition_penalty=1.15)[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


[ISTART]One possible outcome is that an environmentally-induced reduction in motorization levels in the richer countries will outweigh any rise in motorization levels in the poorer countries.[IEND][OSTART] sierpni 2019
The 3rd International Conference on Sustainable Development Goals (SDGs) and Human Rights: Challenges, Opportunities and Solutions was held at the University of Warsaw. The conference was organized by the Institute for Advanced Studies in Science, Technology and Innovation Law (IASSTIL), Faculty of Law and Administration, University of Warsaw, with support from the Polish Ministry of Foreign Affairs.
The event brought together over 50 participants representing academia, civil society organizations, international institutions, governmental bodies and business sector. It provided a platform to discuss challenges related to implementation of SDGs and human rights, as well as opportun


In [12]:
tokenizer.decode(tokenized_val_dataset[4]['input_ids'])

'</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s