In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig


In [3]:
peft_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",)

In [4]:
dataset = load_dataset("pszemraj/booksum-short")

In [5]:
quantization_config = BitsAndBytesConfig(
        # load_in_8bit=True,
        load_in_4bit=True,
        # llm_int8_enable_fp32_cpu_offload=True,
        # llm_int8_has_fp16_weight=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype="float16"
)

In [6]:
device = 'cuda'

In [7]:
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=quantization_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")


In [9]:
messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

In [10]:
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
# model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


<s> [INST] What is your favourite condiment? [/INST]Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!</s> [INST] Do you have mayonnaise recipes? [/INST] I'm an artificial intelligence and don't have the ability to cook or prepare recipes myself. however, I can certainly help you find a great mayonnaise recipe if you'd like! Here's a simple and classic version:

Ingredients:
- 1 egg yolk
- 1 tsp Dijon mustard
- 1 tbsp white wine vinegar or lemon juice
- 1 cup vegetable oil (canola or sunflower oil work best)
- 1 tsp salt
- 1 tsp sugar (optional)
- 1-2 tbsp cold water

Instructions:
1. In a large, shallow bowl, whisk together the egg yolk, mustard, and vinegar or lemon juice until well combined and slightly thickened.
2. Begin to slowly drizzle the oil into the yolk mixture, whisking constantly to incorporate the oil drop by drop. Once you've added about a quarter of the oil, you can g

In [10]:
train_dataset = dataset["train"].select_columns(['chapter', 'summary'])
val_dataset = dataset["validation"].select_columns(['chapter', 'summary'])

In [11]:
print(train_dataset)

Dataset({
    features: ['chapter', 'summary'],
    num_rows: 5912
})


In [12]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['bid', 'is_aggregate', 'source', 'chapter_path', 'summary_path', 'book_id', 'summary_id', 'content', 'summary', 'chapter', 'chapter_length', 'summary_name', 'summary_url', 'summary_text', 'summary_analysis', 'summary_length', 'analysis_length'],
        num_rows: 5912
    })
    validation: Dataset({
        features: ['bid', 'is_aggregate', 'source', 'chapter_path', 'summary_path', 'book_id', 'summary_id', 'content', 'summary', 'chapter', 'chapter_length', 'summary_name', 'summary_url', 'summary_text', 'summary_analysis', 'summary_length', 'analysis_length'],
        num_rows: 1012
    })
    test: Dataset({
        features: ['bid', 'is_aggregate', 'source', 'chapter_path', 'summary_path', 'book_id', 'summary_id', 'content', 'summary', 'chapter', 'chapter_length', 'summary_name', 'summary_url', 'summary_text', 'summary_analysis', 'summary_length', 'analysis_length'],
        num_rows: 988
    })
})


In [13]:
def encode(dataset):
    return tokenizer(dataset["chapter"], dataset["summary"], truncation=True, padding="max_length")


In [14]:
encoded_train_dataset = train_dataset.map(encode, batched=True)
encoded_val_dataset = val_dataset.map(encode, batched=True)

In [27]:
encoded_train_dataset

Dataset({
    features: ['chapter', 'summary', 'input_ids', 'attention_mask'],
    num_rows: 5912
})

In [26]:
len(encoded_train_dataset['input_ids'])

5912

In [16]:
encoded_val_dataset

Dataset({
    features: ['chapter', 'summary', 'input_ids', 'attention_mask'],
    num_rows: 1012
})

In [17]:
# encoded_train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=4)

In [18]:
training_args = TrainingArguments(output_dir="test_trainer")

In [19]:
import numpy as np
import evaluate

rouge = evaluate.load("rouge")

In [20]:
model.add_adapter(peft_config)

How to Use Rouge
At minimum, this metric takes as input a list of predictions and a list of references:

>>> rouge = evaluate.load('rouge')
>>> predictions = ["hello there", "general kenobi"]
>>> references = ["hello there", "general kenobi"]
>>> results = rouge.compute(predictions=predictions,
...                         references=references)
>>> print(results)
{'rouge1': 1.0, 'rouge2': 1.0, 'rougeL': 1.0, 'rougeLsum': 1.0}
One can also pass a custom tokenizer which is especially useful for non-latin languages.

>>> results = rouge.compute(predictions=predictions,
...                         references=references,
                            tokenizer=lambda x: x.split())
>>> print(results)
{'rouge1': 1.0, 'rouge2': 1.0, 'rougeL': 1.0, 'rougeLsum': 1.0}
It can also deal with lists of references for each predictions:

>>> rouge = evaluate.load('rouge')
>>> predictions = ["hello there", "general kenobi"]
>>> references = [["hello", "there"], ["general kenobi", "general yoda"]]
>>> results = rouge.compute(predictions=predictions,
...                         references=references)
>>> print(results)
{'rouge1': 0.8333, 'rouge2': 0.5, 'rougeL': 0.8333, 'rougeLsum': 0.8333}```

In [30]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_val_dataset,
    compute_metrics=rouge
)

In [31]:
trainer.train()

ValueError: expected sequence of length 11593 at dim 1 (got 576)