[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Jaseci-Labs/mtllm/blob/main/support/finetune_llm/test.ipynb)

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

 # Change this to the model you want to use
checkpoint = "chandralegend/mtllm-levelgen-smollm-1.7b-chat"

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(checkpoint, load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from datasets import Dataset, load_dataset


def formatted_train(input: str, response: str) -> str:
    """Format the input and response into the chat prompt format."""
    return f"{input}\n{response}\n"


def prepare_train_data(dataset: str) -> Dataset:
    """Prepare the training data for the MTLLM model."""
    _dataset = load_dataset(dataset)
    dataset_df = _dataset["train"].to_pandas()
    dataset_df["text"] = dataset_df[["input_prompt", "output_prompt"]].apply(
        lambda x: formatted_train(x["input_prompt"], x["output_prompt"]), axis=1
    )
    _dataset_ = Dataset.from_pandas(dataset_df)
    return _dataset_

test_data = prepare_train_data("chandralegend/mtllm-level-gen-synthetic")

In [10]:
idx = 11
sample_input = test_data[idx]["input"]
expected_output = test_data[idx]["output"]

print(f"Input: \n\n{sample_input}\n\n")
print(f"Expected Output: \n\n{expected_output}")


Input: 

[System Prompt]
This is an operation you must perform and return the output values. Neither, the methodology, extra sentences nor the code are not needed.
Input/Type formatting: Explanation of the Input (variable_name) (type) = value

[Information]


[Context]


[Inputs Information]
(last_levels) (list[Level]) = [Level(name="Aldur's Keep", difficulty=3, width=20, height=20, num_wall=23, num_enemies=6, time_countdown=90, n_retries_allowed=3), Level(name="Dragon's Roost", difficulty=3, width=20, height=20, num_wall=25, num_enemies=8, time_countdown=85, n_retries_allowed=3)]
(difficulty) (int) = 4
(level_width) (int) = 20
(level_height) (int) = 20

[Output Information]
(Level)

[Type Explanations]
(Level) (obj) eg:- Level(name=str, difficulty=int, width=int, height=int, num_wall=int, num_enemies=int, time_countdown=int, n_retries_allowed=int) -> Fantasy based Name (name) (str), (difficulty) (int), (width) (int), (height) (int), (num_wall) (int), (num_enemies) (int), (time_countdo

In [11]:
messages = [
    {"role": "user", "content": sample_input},
]
model_input_str = tokenizer.apply_chat_template(messages, tokenize=False)
print(model_input_str)

<|im_start|>user
[System Prompt]
This is an operation you must perform and return the output values. Neither, the methodology, extra sentences nor the code are not needed.
Input/Type formatting: Explanation of the Input (variable_name) (type) = value

[Information]


[Context]


[Inputs Information]
(last_levels) (list[Level]) = [Level(name="Aldur's Keep", difficulty=3, width=20, height=20, num_wall=23, num_enemies=6, time_countdown=90, n_retries_allowed=3), Level(name="Dragon's Roost", difficulty=3, width=20, height=20, num_wall=25, num_enemies=8, time_countdown=85, n_retries_allowed=3)]
(difficulty) (int) = 4
(level_width) (int) = 20
(level_height) (int) = 20

[Output Information]
(Level)

[Type Explanations]
(Level) (obj) eg:- Level(name=str, difficulty=int, width=int, height=int, num_wall=int, num_enemies=int, time_countdown=int, n_retries_allowed=int) -> Fantasy based Name (name) (str), (difficulty) (int), (width) (int), (height) (int), (num_wall) (int), (num_enemies) (int), (time

In [12]:
model_input = tokenizer.encode(model_input_str, return_tensors="pt").to(device)

In [13]:
model_input.shape

torch.Size([1, 418])

In [14]:
outputs = model.generate(model_input, max_new_tokens=100)
output = tokenizer.decode(outputs[0])

In [15]:
print(output[len(model_input_str):])

<|im_start|>assistant
[Output] Level(name='Dragon's Roost', difficulty=4, width=20, height=20, num_wall=25, num_enemies=8, time_countdown=85, n_retries_allowed=3)

[Type] Level(name='', difficulty=4, width=20, height=20, num_wall=25, num_enemies=


In [16]:
messages = [
    {"role": "assistant", "content": expected_output},
]
expected_output_str = tokenizer.apply_chat_template(messages, tokenize=False)
print(expected_output_str)

<|im_start|>assistant
[Output] Level(name="Griffon's Perch", difficulty=4, width=20, height=20, num_wall=27, num_enemies=10, time_countdown=80, n_retries_allowed=3)<|im_end|>

