In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("../.."))
from IPython.display import display
from inference_utils import extract_level_representation, fix_level_format, fix_level_format_extra
from create_img import convert_kidicarus_to_png, convert_loderunner_to_png, convert_mario_to_png, convert_rainbowisland_to_png
from unsloth import FastLanguageModel
import torch
from matplotlib import pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import datetime
import io
from PIL import Image
from unsloth import FastModel
from unsloth.chat_templates import get_chat_template
import json
from inference.metrics import SampledLevelEvaluator

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
models = {
    "llama-3": [
        "/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/Llama-3.1-8B-Instruct-unsloth-bnb-4bit-mario-teste1"
    ],
    "qwen-3": [
        "/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/Qwen3-14B-Instruct-bnb-4bit-mario-horizontal-newline-teste1"
    ],
    "qwen-2.5": [
        "/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/Qwen-2.5-14b-horizontal-newline-1epoch-mario-teste1"
    ],
    "gemma-3": [
        "/home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/gemma-3-12b-it-unsloth-bnb-4bit-mariogpt-teste1"
    ]
}

temperatures = [0.7, 1.0, 1.2, 1.5]

num_of_samples = 3

max_seq_length = 2048
dtype = None 
load_in_4bit = True
game_type = "mario"  # options: "mario", "loderunner", "kidicarus", "rainbowisland"

# Game-specific settings
game_settings = {
    "mario": {
        "empty_space": "-",
        "line_quantity": 14,
        "column_quantity": 50,
        "convert_function": convert_mario_to_png,
        "tiles_dir": '../../assets/mario',
        "add_ground": "X",
        "expected_output_size": 700
    },
    "loderunner": {
        "empty_space": ".",
        "line_quantity": 22,
        "column_quantity": 32,
        "convert_function": convert_loderunner_to_png,
        "tiles_dir": '../../assets/lode_runner',
        "add_ground": None,
        "expected_output_size": 704
    },
    "kidicarus": {
        "empty_space": "-",
        "line_quantity": 20,
        "column_quantity": 16,
        "convert_function": convert_kidicarus_to_png,
        "tiles_dir": '../../assets/kid_icarus',
        "add_ground": None,
        "expected_output_size": 320
    },
    "rainbowisland": {
        "empty_space": ".",
        "line_quantity": 35,
        "column_quantity": 32,
        "convert_function": convert_rainbowisland_to_png,
        "tiles_dir": '../../assets/rainbow_island',
        "add_ground": None,
        "expected_output_size": 1120
    }
}


prompt = "Create a level"
output_pdf = f"level_generation_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"

In [3]:
def load_model_by_type(model_path, model_type, max_seq_length=2048, dtype=None, load_in_4bit=True):
    """Load model based on model type"""
    if model_type in ["llama-3", "qwen-2.5"]:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_path,
            max_seq_length=max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
        )
        FastLanguageModel.for_inference(model)
    elif model_type == "qwen-3":
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_path,
            max_seq_length=max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
        )
        FastLanguageModel.for_inference(model)
    elif model_type == "gemma-3":
        model, tokenizer = FastModel.from_pretrained(
            model_name=model_path,
            max_seq_length=max_seq_length,
            load_in_4bit=load_in_4bit,
            load_in_8bit=False,
            full_finetuning=False,
        )
        FastModel.for_inference(model)
        tokenizer = get_chat_template(
            tokenizer,
            chat_template="gemma-3",
        )
    else:
        raise ValueError(f"Unsupported model type: {model_type}")
    
    return model, tokenizer

def generate_with_model(model, tokenizer, prompt, model_type, temperature=0.7, top_p=0.8, top_k=20):
    """Generate text with model based on model type"""
    if model_type == "gemma-3":
        messages = [{
            "role": "user",
            "content": [{
                "type": "text",
                "text": prompt,
            }]
        }]
        text = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
        )
        outputs = model.generate(
            **tokenizer([text], return_tensors="pt").to("cuda"),
            max_new_tokens=1024,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
        )
    elif model_type == "qwen-3":
        messages = [{"role": "user", "content": prompt}]
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False,
        )
        outputs = model.generate(
            **tokenizer(text, return_tensors="pt").to("cuda"),
            max_new_tokens=512,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
        )
    else:  # llama-3 or qwen-2.5
        messages = [{"role": "user", "content": prompt}]
        inputs = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt",
        ).to("cuda")
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=4096,
            use_cache=True,
            temperature=temperature,
            min_p=0.1,
        )
    
    return tokenizer.batch_decode(outputs)

In [4]:
json_output = f"level_generation_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
results_data = []
evaluator = SampledLevelEvaluator()

with PdfPages(output_pdf) as pdf:
    for model_type, model_paths in models.items():
        print(f"Processing model type: {model_type}")
        
        for model_path in model_paths:
            print(f"Processing model: {model_path}")
            
            try:
                model, tokenizer = load_model_by_type(
                    model_path=model_path,
                    model_type=model_type,
                    max_seq_length=max_seq_length,
                    dtype=dtype,
                    load_in_4bit=load_in_4bit
                )
                
                for temp in temperatures:
                    print(f"Running with temperature: {temp}")
                    
                    for sample_idx in range(num_of_samples):
                        print(f"Generating sample {sample_idx+1}/{num_of_samples}")
                        
                        generation_params = {
                            'temperature': temp,
                            'top_p': 0.8,
                            'top_k': 20
                        }
                        
                        if model_type == "gemma-3":
                            generation_params['top_p'] = 0.95
                            generation_params['top_k'] = 64
                        
                        response = generate_with_model(
                            model=model,
                            tokenizer=tokenizer,
                            prompt=prompt,
                            model_type=model_type,
                            **generation_params
                        )
                        
                        level = extract_level_representation(
                            response[0], 
                            model_type=model_type, 
                            orientation="horizontal", 
                            separator="\n"
                        )
                        
                        fixed_level = fix_level_format_extra(
                            level, 
                            empty_space=game_settings[game_type]["empty_space"], 
                            line_quantity=game_settings[game_type]["line_quantity"], 
                            column_quantity=game_settings[game_type]["column_quantity"], 
                            enforce_shape="both", 
                            add_ground=game_settings[game_type]["add_ground"]
                        )
                        
                        expected_output_size = game_settings[game_type]["expected_output_size"]
                        level_without_separators = level.replace("\n", "").replace("|", "")
                        diff_percentage = SampledLevelEvaluator.calculate_generation_diff(
                            expected_output_size, 
                            level_without_separators
                        )
                        
                        result_data = {
                            "model_type": model_type,
                            "model_path": os.path.basename(model_path),
                            "temperature": temp,
                            "sample_index": sample_idx + 1,
                            "level": fixed_level,
                            "metrics": {
                                "expected_size": expected_output_size,
                                "actual_size": len(level_without_separators),
                                "size_diff_percentage": diff_percentage
                            }
                        }
                        results_data.append(result_data)
                        
                        convert_function = game_settings[game_type]["convert_function"]
                        tiles_dir = game_settings[game_type]["tiles_dir"]
                        if tiles_dir:
                            img, _, _ = convert_function(fixed_level, tiles_dir=tiles_dir)
                        else:
                            img, _, _ = convert_function(fixed_level)
                        
                        plt.figure(figsize=(12, 10))
                        
                        metadata = (
                            f"Model Type: {model_type}\n"
                            f"Model: {os.path.basename(model_path)}\n"
                            f"Temperature: {temp}\n"
                            f"Sample: {sample_idx+1}/{num_of_samples}\n"
                            f"Size Diff: {diff_percentage:.2f}%\n"
                            f"Level:\n{fixed_level}"
                        )
                        
                        plt.subplot(2, 1, 1)
                        plt.text(0.05, 0.95, metadata, fontsize=8, va='top', 
                                 family='monospace', transform=plt.gca().transAxes)
                        plt.axis('off')
                        plt.subplot(2, 1, 2)
                        plt.imshow(img)
                        plt.axis('off')
                        plt.title(f"Generated Level")
                        plt.tight_layout()

                        pdf.savefig()
                        plt.close()
                        
                        print(f"Sample {sample_idx+1} completed")
                    
                    print(f"Completed temperature {temp}")
                
                print(f"Completed model {model_path}")
                
                del model
                del tokenizer
                torch.cuda.empty_cache()
                
            except Exception as e:
                if model:
                    del model
                if tokenizer:
                    del tokenizer
                torch.cuda.empty_cache()

                plt.figure(figsize=(8.5, 11))
                error_info = (
                    f"Error processing model: {model_path}\n"
                    f"Model type: {model_type}\n\n"
                    f"Error: {str(e)}"
                )
                plt.text(0.5, 0.5, error_info, fontsize=12, ha='center', va='center', color='red')
                plt.axis('off')
                pdf.savefig()
                plt.close()
                print(f"Error with model {model_path}: {str(e)}")
    
    plt.figure(figsize=(8.5, 11))
    
    total_models = sum(len(paths) for paths in models.values())
    
    info = (
        f"Level Generation Results\n\n"
        f"Date: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
        f"Model Types: {', '.join(models.keys())}\n"
        f"Total Models: {total_models}\n"
        f"Temperatures: {temperatures}\n"
        f"Samples per combination: {num_of_samples}\n"
        f"Game type: {game_type}\n"
        f"Total samples: {total_models * len(temperatures) * num_of_samples}"
    )
    plt.text(0.5, 0.5, info, fontsize=12, ha='center', va='center')
    plt.axis('off')
    pdf.savefig()
    plt.close()

# Save the JSON data
with open(json_output, 'w') as f:
    json.dump(results_data, f, indent=2)

print(f"PDF saved to {output_pdf}")
print(f"JSON saved to {json_output}")

Processing model type: llama-3
Processing model: /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/Llama-3.1-8B-Instruct-unsloth-bnb-4bit-mario-teste1
==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 23.677 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.4.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Running with temperature: 0.7
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 0.7
Running with temperature: 1.0
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.0
Running with temperature: 1.2
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.2
Running with temperature: 1.5
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.5
Completed model /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/Llama-3.1-8B-Instruct-unsloth-bnb-4bit-mario-teste1
Processing model type: qwen-3
Processing model: /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/model

Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  2.26it/s]
Unsloth 2025.4.7 patched 40 layers with 40 QKV layers, 40 O layers and 40 MLP layers.


Running with temperature: 0.7
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 0.7
Running with temperature: 1.0
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.0
Running with temperature: 1.2
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.2
Running with temperature: 1.5
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.5
Completed model /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/Qwen3-14B-Instruct-bnb-4bit-mario-horizontal-newline-teste1
Processing model type: qwen-2.5
Processing model: /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetun

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.48it/s]
Unsloth 2025.4.7 patched 48 layers with 48 QKV layers, 48 O layers and 48 MLP layers.


Running with temperature: 0.7
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
REPLACING <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=16x16 at 0x70340540B790> (24, 12)
REPLACING <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=16x16 at 0x70340540B790> (25, 12)
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 0.7
Running with temperature: 1.0
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.0
Running with temperature: 1.2
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.2
Running with temperature: 1.5
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.5
Completed model /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunni

Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.76it/s]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Running with temperature: 0.7
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 0.7
Running with temperature: 1.0
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.0
Running with temperature: 1.2
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.2
Running with temperature: 1.5
Generating sample 1/3
Sample 1 completed
Generating sample 2/3
Sample 2 completed
Generating sample 3/3
Sample 3 completed
Completed temperature 1.5
Completed model /home/pressprexx/Code/AKCITGaming/Paper_LLM_PCG_Geral/VGLC_LLM_Finetunning/models/mario/gemma-3-12b-it-unsloth-bnb-4bit-mariogpt-teste1
PDF saved to level_generation_results_20250521_100847.pdf
JSON saved to level_generation_results_20250521_100847.json
