In [2]:
%reload_ext autoreload
%autoreload 2

In [1]:

from mtg_ai.cards import MTGDatabase
import numpy as np
import pandas as pd

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


# Setup colored logging

In [2]:
import logging
import colorlog
handler = colorlog.StreamHandler()
fmt = "%(log_color)s%(levelname)s:%(name)s:%(message)s"
formatter = colorlog.ColoredFormatter(
    fmt,
    log_colors={
        "DEBUG": "purple",
        "INFO": "green",
        "WARNING": "yellow",
        "ERROR": "red",
        "CRITICAL": "red,bg_white",
    },
)
handler.setFormatter(formatter)
logging.basicConfig(level=logging.DEBUG, handlers=[handler])
logger = logging.getLogger("mtg-ai")
logger.propagate = True

# Dataset Builder

In [3]:
from mtg_ai.cards.training_data_builder import build_question_answer_datasets, load_mtg_dataset
from mtg_ai.cards import MTGDatabase
data = MTGDatabase()
build_question_answer_datasets(data, recreate=True)


100%|██████████| 7/7 [00:02<00:00,  3.13it/s]


Finished creating question answer dataset in 3.333371639251709 seconds | file size: 20.98 MB


# Training

In [4]:
from mtg_ai.ai import MTGCardAITrainer, ModelAndTokenizer

model = ModelAndTokenizer.UNSLOTH_LLAMA_3_2_3B_INSTRUCT_Q8

# dataset = "data/question_answer_dataset.csv"
# loader = MTGCardAITrainingDatasetLoader(dataset, model.tokenizer)
gguf_file = None #model.gguf_file
ai_trainer = MTGCardAITrainer(model_name=model.value, gguf_file=gguf_file)

ai_trainer.train(resume_from_checkpoint=False)

==((====))==  Unsloth 2024.10.0: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA GeForce RTX 3080. Max memory: 12.0 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth 2024.10.0 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


AttributeError: module 'datetime' has no attribute 'now'

# Combine Lora with model

In [None]:
from pathlib import Path

from mtg_ai.ai import ModelAndTokenizer, save_combined_model

model_name = ModelAndTokenizer.BARTOWSKI_LLAMA_3_2_8B_INSTRUCT_Q4_K_L
model_dir = Path("./results").resolve()

save_combined_model(model_name=model_name.value, tokenizer_name=model_name.tokenizer, gguf_file=model_name.gguf_file, model_dir=model_dir)

# Inference

In [None]:
from typing import cast

import torch
from peft import PeftModelForCausalLM
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template

from mtg_ai.ai import ModelAndTokenizer

model_name = ModelAndTokenizer.UNSLOTH_LLAMA_3_2_3B_INSTRUCT_Q8
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "./results/", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = 2048,
    dtype = torch.bfloat16,
    load_in_4bit = True,
)
model = cast(PeftModelForCausalLM, model)
model.to("cuda")
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
print(type(tokenizer))

system_prompt = """
    You are a knowledgeable assistant trained on Magic: The Gathering cards. Your goal is to provide accurate and concise information about Magic cards when asked. Always follow these guidelines:
 
    1. When asked about specific card attributes, such as toughness, power, mana cost, or abilities, provide clear and factual information.
    2. If the query involves a combination of attributes (like power and toughness), list both in a structured format.
    3. For combo-related queries, suggest relevant and well-known card interactions based on color, mana cost, and abilities.
    4. If you are unsure or the information isn't available, provide the most relevant possible answer based on your knowledge.
    5. Maintain a professional and helpful tone in all responses.
    6. For factual questions, such as "What is the toughness of Serra Angel?", respond directly with the correct value in a sentence.
    7. Always prioritize accuracy over creativity when answering card-specific queries.
"""

messages = [
    {"role":"system", "content": system_prompt},
    {"role": "user", "content": "What is 5 + 5?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = .1, min_p = 0.1)

In [None]:
from pathlib import Path

from mtg_ai.ai import ModelAndTokenizer, MTGCardAI

model_path = Path("./results/merged_model").resolve()
m = ModelAndTokenizer.UNSLOTH_LLAMA_3_2_3B_INSTRUCT_Q8

ai = MTGCardAI(model_name=m.value, gguf_file=m.gguf_file, tokenizer_name=m.tokenizer, adapter="./results/")
# ai = MTGCardAI(model_name=m.value, gguf_file=m.gguf_file, tokenizer_name=m.tokenizer)
ai.load_models()
test_questions = [
    "What is the converted mana cost of Acquisitions Expert?",
    "What is the converted mana cost of Sol Ring?",
    "What is the type of Sol Ring?",
]
logging.info("---------------------------")
for tq in test_questions:
    logging.info("##################################")
    response = ai.run(prompt=tq, max_new_tokens=200)
    logging.info(f"Response:\n{response}")

# Experimenting 