In [1]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
)
from peft import PeftModel
import torch

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base_model_path = "mistralai/Mistral-7B-Instruct-v0.3"
adapter_path = "model/mistral-gatherer-sage-v1/4vgx59fr/best_model"

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    device_map="auto",
    # attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
)

model_peft = PeftModel.from_pretrained(model, adapter_path)
# model_peft = model_peft.merge_and_unload()

model_peft2 = AutoModelForCausalLM.from_pretrained(
    adapter_path,
    device_map="auto",
    # attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
)

tokenizer = AutoTokenizer.from_pretrained(base_model_path)
tokenizer.pad_token = tokenizer.eos_token

llm_pipeline_no_peft = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=500,
)

llm_pipeline_peft = pipeline(
    model=model_peft,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=500,
)

llm_pipeline_peft2 = pipeline(
    model=model_peft2,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=500,
)

  from .autonotebook import tqdm as notebook_tqdm


Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.90it/s]
Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.92it/s]
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', '

In [2]:
from gatherer_sage.rag import RAG

rag = RAG(vector_database_path="data/rag_vector_db")

prompt_in_chat_format = [
    {
        "role": "user",
        "content": """Using the information contained in the context,
give a comprehensive and concise answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the rule when relevant.
If the answer cannot be deduced from the context, do not give an answer.
The questions are related with Magic The Gathering card game.
        
Context:
{context}
---
Now here is the question you need to answer.

Question: {question}""",
    },
]

prompt_template = tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)


def get_answer(question, model):
    context = rag.retrieve_context(question)
    prompt = prompt_template.format(context=context, question=question)
    return model(prompt)[0]["generated_text"]

In [3]:
q = "Allyson casts Thoughtseize targeting Nico. In response, Nico casts their last card, and when Thoughtseize resolves, Nico has zero cards in hand. Does Allyson lose 2 life?"
print(get_answer(q, llm_pipeline_no_peft))
print(get_answer(q, llm_pipeline_peft))
print(get_answer(q, llm_pipeline_peft2))

100%|██████████| 1/1 [00:00<00:00, 22.53it/s]


Answer: No.

Thoughtseize does not target anything. It simply says "Target player". So, it can target someone who has no cards in hand.

&gt; 115.1a An object is targeted if it’s specified as the target of a spell or ability. Casting a spell or activating an ability that specifies a target is called casting a targeted spell or activating a targeted ability, respectively. See rule 601.2c.

So, Thoughtseize is not a targeted spell. And, since it's not a targeted spell, it does not check whether its target is legal until it resolves.

&gt; 115.1b A spell or ability is targeted if it specifies a target object or player but doesn’t specify a zone or object type. Such objects are called targeted objects.

And, since it's not a targeted spell, it does not check whether its target is legal after it resolves.

&gt; 115.1c A spell or ability is targeted if it specifies a target object or player and specifies a zone or object type that contains that object or player. Such objects are called targe

100%|██████████| 1/1 [00:00<00:00, 22.46it/s]


Answer: Yes.

Thoughtseize does not check what happens while it is on the Stack. It checks what happens after it resolves.

So, if Nico has no Cards in Hand after Thoughtseize resolves, then Allyson will lose 2 Life.

---

Alternatively, if Nico has no Cards in Hand before Thoughtseize resolves, then Allyson will not lose 2 Life.

---

Note; If Thoughtseize is countered, then nothing happens. No one loses 2 life.

&gt; 701.5j If a spell or ability puts a card into a player’s hand without using the word “draw,” the player draws that card while performing any other actions required by that spell or ability. If the player is instructed to draw multiple cards, they draw them one at a time. If the player is instructed to draw cards until a certain event occurs, they continue drawing cards until that event occurs. If the player is instructed to draw cards equal to a certain number, they draw that many cards. If the player is instructed to draw cards equal to the value of a variable, they dra

100%|██████████| 1/1 [00:00<00:00, 22.53it/s]


Answer: Nope, Thoughtseize says "You choose a non-Land Card" so Nico chooses a non-Land Card, but since there are none in their Hand, they don't get to discard anything. So Allyson does not lose 2 life.

&gt; 701.1 
&gt;
&gt; 701.1a To choose a card name, a player announces the name of a card they know is in the appropriate zone (see rule 601.2i), or they announce that they’re casting a spell with certain characteristics (such as a card with mana cost {U} or a creature with power 3) and ask whether the player being addressed knows the name of a card that matches those characteristics. If the player being addressed doesn’t know the name of such a card, they say so. If the player who chose a card name or announced characteristics doesn’t like the answer, they may repeat the process until they’re satisfied. If the player who chose a card name or announced characteristics doesn’t like the answer, they may repeat the process until they’re satisfied. If the player who chose a card name or an

In [4]:
q = "Aubrey casts Chord of Calling, and in response Nickolas casts Containment Priest. Aubrey attempts to put Grist, the Hunger Tide onto the battlefield. Will it be exiled?"

print(get_answer(q, llm_pipeline_no_peft))
print(get_answer(q, llm_pipeline_peft))
print(get_answer(q, llm_pipeline_peft2))

100%|██████████| 1/1 [00:00<00:00, 18.71it/s]


Answer: Yes.

&gt; 702.19b Auras attached to a permanent that moves to a hidden zone (see rule 113.6) aren’t put into their owners’ graveyards. Spells and abilities that would cause an Aura to be put into its owner’s graveyard instead exile it. An Aura that’s exiled is removed from the game. See rule 800.4.

So, if Grist is in a hidden zone, it's exiled. 

&gt; 800.4. If a card leaves the game, it leaves the game completely. It doesn’t go to a hidden zone.

If Grist is in a public zone, it's not exiled. 

&gt; 702.19c If an effect attempts to put an Aura onto the battlefield attached to a permanent that it can’t legally enchant, the Aura remains in its current zone, unless that zone is a stack. In that case, the Aura is put into its owner’s graveyard instead of entering the battlefield. If the Aura is a token, it isn’t created.

If Grist is in a public zone, and there's a legal permanent for it to attach to, it attaches to that permanent.

&gt; 702.19d If an effect allows a player to p

100%|██████████| 1/1 [00:00<00:00, 19.03it/s]


Answer: No.

Chord of Calling is a Spell. It is not a Creature Card. So, it does not trigger Grist's +1 ability.

And, since Grist's +1 ability did not trigger, there is nothing to repeat the process. So, Grist remains in Exile.

---

Alternatively, if they had cast [[Bolt Bend]], which is a Creature Card, then it would enter the Battlefield as a 1/1 Insect Creature. And, since it is a Creature Card, it would trigger Grist's +1 ability.

So, they would get a token and mill a card. And, since they milled a Creature Card, they would repeat the process.

And, since Grist is not on the Battlefield, it would remain in Exile.

---

Or, they could have cast [[Rishadan Brigand]], which is a Creature Card. It would enter the Battlefield as a 1/1 Insect Creature. And, since it is a Creature Card, it would trigger Grist's +1 ability.

So, they would get a token and mill a card. And, since they milled a Creature Card, they would repeat the process.

And, since Grist is not on the Battlefield, it w

100%|██████████| 1/1 [00:00<00:00, 19.04it/s]


Answer: No.

Aubrey puts Grist onto the Battlefield. It is not a Creature Card. So, it is not affected by Containment Priest.

---

But, if Aubrey had cast Grist, then Nickolas cast Containment Priest, then Aubrey would have been unable to put Grist onto the Battlefield. And, it would remain in Exile.

---

Also, if Aubrey had cast Grist, then Nickolas cast Containment Priest, then Aubrey would have been unable to put Grist onto the Battlefield. And, it would remain in Exile.

---

And, if Aubrey had cast Grist, then Nickolas cast Containment Priest, then Aubrey would have been unable to put Grist onto the Battlefield. And, it would remain in Exile.

---

And, if Aubrey had cast Grist, then Nickolas cast Containment Priest, then Aubrey would have been unable to put Grist onto the Battlefield. And, it would remain in Exile.

---

And, if Aubrey had cast Grist, then Nickolas cast Containment Priest, then Aubrey would have been unable to put Grist onto the Battlefield. And, it would remai

In [2]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
)
from peft import PeftModel
import torch

import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig
from trl import SFTTrainer
import wandb
import evaluate
import numpy as np
import os


def dataset_gen(data, allow_system_role=True):
    for idx, row in data.iterrows():
        if allow_system_role:
            prompt = [
                {
                    "role": "system",
                    "content": """Using the information contained in the context,
give a comprehensive and concise answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the rule when relevant.
If the answer cannot be deduced from the context, do not give an answer.
The questions are related with Magic The Gathering card game.""",
                },
                {
                    "role": "user",
                    "content": f"""Context:
{row['context']}
---
Now here is the question you need to answer.

Question: {row['question']}""",
                },
                {"role": "assistant", "content": f"Answer: {row['answer']}"},
            ]
        else:
            prompt = [
                {
                    "role": "user",
                    "content": f"""Using the information contained in the context,
give a comprehensive and concise answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the rule when relevant.
If the answer cannot be deduced from the context, do not give an answer.
The questions are related with Magic The Gathering card game.

Context:
{row['context']}
---
Now here is the question you need to answer.

Question: {row['question']}""",
                },
                {"role": "assistant", "content": f"Answer: {row['answer']}"},
            ]

        yield {"messages": prompt}


def create_datasets(
    data_path: str = "data/reddit/reddit_qa_dataset_with_context.csv",
    num_samples: int = -1,
    allow_system_role: bool = True,
):
    reddit_df = pd.read_csv(data_path)

    if num_samples > 0:
        reddit_df = reddit_df.sample(num_samples, random_state=42)

    dataset = Dataset.from_generator(
        dataset_gen,
        gen_kwargs={"data": reddit_df, "allow_system_role": allow_system_role},
    )
    dataset = dataset.train_test_split(test_size=0.2)
    train_dataset = dataset["train"]
    test_dataset = dataset["test"]

    return train_dataset, test_dataset


def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak.
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits, dim=-1)
    return pred_ids


data_path: str = "data/reddit/reddit_qa_dataset_with_context.csv"

train_dataset, test_dataset = create_datasets(
    data_path,
    num_samples=300,
    allow_system_role=False,
)

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")


def compute_metrics(eval_preds):
    preds, labels = eval_preds

    # FIX: This is a temporary fix because there is no generation algorithm
    # So we just return the argmax of the logits
    # There is a trainer parameter called: `preprocess_logits_for_metrics`
    # preds = preds.argmax(-1)

    # decode preds and labels
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    preds = np.where(preds != -100, preds, tokenizer.pad_token_id)
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    scores = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        rouge_types=["rouge1", "rouge2", "rougeL"],
        use_aggregator=True,
        use_stemmer=True,
    )

    scores["bleu"] = bleu.compute(predictions=decoded_preds, references=decoded_labels)[
        "bleu"
    ]

    return scores


# Training Params
args = TrainingArguments(
    output_dir="output",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    eval_accumulation_steps=1,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    optim="adamw_torch_fused",
    logging_steps=1,
    eval_steps=50,
    eval_strategy="steps",
    save_steps=100,
    learning_rate=5e-5,
    bf16=True,
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    report_to="wandb",
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
    save_total_limit=1,  # Save only the most recent checkpoint
)

# Trainer
max_seq_length = 3072  # max sequence length for model and packing of the dataset

Generating train split: 300 examples [00:00, 22133.92 examples/s]


In [3]:
base_model_path = "mistralai/Mistral-7B-Instruct-v0.3"
adapter_path = "model/mistral-gatherer-sage-v1/4vgx59fr/best_model"

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
)

model_peft = PeftModel.from_pretrained(model, adapter_path)

tokenizer = AutoTokenizer.from_pretrained(base_model_path)
tokenizer.pad_token = tokenizer.eos_token

trainer = SFTTrainer(
    model=model_peft,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    # packing=True,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False,  # No need to add additional separator token
    },
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.83it/s]
Map: 100%|██████████| 240/240 [00:00<00:00, 2563.86 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 2718.34 examples/s]


In [6]:
trainer.evaluate()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[34m[1mwandb[0m: Currently logged in as: [33mjavier-jimenez99[0m. Use [1m`wandb login --relogin`[0m to force relogin
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


{'eval_loss': 0.29772478342056274,
 'eval_rouge1': 0.9274727458822571,
 'eval_rouge2': 0.8332162826848648,
 'eval_rougeL': 0.8994541068093473,
 'eval_bleu': 0.8269862546802125,
 'eval_runtime': 47.2802,
 'eval_samples_per_second': 1.269,
 'eval_steps_per_second': 0.635}

In [9]:
peft_config = LoraConfig(
    lora_alpha=256 * 0.5,
    lora_dropout=0.05,
    r=256,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
)

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    peft_config=peft_config,
    eval_dataset=test_dataset,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    # packing=True,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False,  # No need to add additional separator token
    },
)
trainer.evaluate()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


ValueError: Target module Dropout(p=0.05, inplace=False) is not supported. Currently, only the following modules are supported: `torch.nn.Linear`, `torch.nn.Embedding`, `torch.nn.Conv2d`, `transformers.pytorch_utils.Conv1D`.