In [None]:
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI

from os.path import join, dirname, realpath
from time import sleep
from math import floor, ceil

from wand.image import Image
from wand.exceptions import WandException


import pandas as pd
import dotenv
import os
import base64
import json

MAIN_PATH = realpath(join(dirname("__file__"), '.'))

dotenv.load_dotenv(join(MAIN_PATH, "..", ".env"));

## Load LLM Model

In [57]:
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = os.environ.get("GOOGLE_API_KEY")

# model = ChatGoogleGenerativeAI(
#     model="gemini-2.5-flash-preview-05-20"
# )

model = ChatGoogleGenerativeAI(
    model="gemma-3-27b-it"
)

In [58]:
messages = [
    # SystemMessage("You are a expert mathematician."),
    HumanMessage("How mutch is 8!?")
]

result = model.invoke(messages)
print(result.content)

8! (read as "8 factorial") is calculated as:

8! = 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1 = 40,320

Therefore, 8! = **40,320**


In [59]:
# with open(join(MAIN_PATH, "img2.png"), "rb") as image_file:
#     imageb64 = base64.b64encode(image_file.read()).decode()

# print(imageb64)

# Prompts

In [60]:
ROLE_IMG_ANALYZER = """
You are an expert AI assistant specialized in analyzing and classifying pixel art game assets.
"""

ANALYZE_IMG = """
Your task is to examine 16x16 pixel art tiles from a fantasy-themed tileset. Due
to the extremely low resolution (16x16), a single tile can be ambiguous and
represent multiple different objects, characters, or icons. Your goal is to
capture this ambiguity by providing a range of plausible interpretations.

1.  **Generate Multiple Interpretations:** Provide a list of 3 to 5 different
interpretations for the tile.

2.  **Be Concise and Direct:** Each interpretation must be a short, descriptive
phrase in English (e.g., 'a wizard holding a' 'staff', 'a healing potion', 'a
cave entrance', 'a spider 'monster').

3.  **Focus on Variety:** The interpretations should be distinct. Think about
what the tile could represent literally, abstractly, or as a UI icon.

4.  **Output Format:** Provide the output as a single JSON object with one key,
`'interpretations'`, which contains an array of the string descriptions. Do not
add any other text or explanation outside of the JSON object.

---

**### EXAMPLE ###**

**Input:** [Image of a simple, vertically oriented 16x16 pixel art sword with a
*brown hilt and grey blade]

**Expected Output:**
{
'interpretations': [
    'a simple shortsword',
    'a steel dagger',
    'a cross-shaped grave marker',
    'a key for a large lock',
    'a sword UI icon'
]
}

---

**Now, analyze the following tile:**
"""

# Utils Functions

In [61]:
def describe_base64_png(base64_png) -> str:
    # system_message = SystemMessage(
    #     content=ROLE_IMG_ANALYZER
    # )

    human_message = HumanMessage(
        content=[
            {
                "type": "text",
                "text": ANALYZE_IMG
            },
            {
                "type": "image_url",
                "image_url": f"data:image/png;base64,{base64_png}"
            }
        ]
    )

    # The final prompt to be sent to the model
    # improved_prompt = [system_message, human_message]
    improved_prompt = [human_message]

    result = model.invoke(improved_prompt)
    return result.content

In [62]:
def extract_tiles_as_base64(tileset_path: str, tile_width: int, tile_height: int) -> list[str]:
    """
    Carrega uma imagem de tileset, extrai cada tile individualmente
    e retorna uma lista de strings Base64, uma para cada tile.

    :param tileset_path: Caminho para o arquivo de imagem do tileset (ex: 'meu_tileset.png').
    :param tile_width: Largura de um único tile em pixels (ex: 16).
    :param tile_height: Altura de um único tile em pixels (ex: 16).
    :return: Uma lista de strings Base64, onde cada string representa um tile.
    """
    if not os.path.exists(tileset_path):
        raise FileNotFoundError(f"O arquivo de tileset não foi encontrado em: {tileset_path}")

    base64_tiles = []
    
    try:
        # O 'with' garante que os recursos da imagem serão liberados corretamente
        with Image(filename=tileset_path) as img:
            total_width = img.width
            total_height = img.height
            
            # Calcula quantas colunas e linhas de tiles existem na imagem
            cols = total_width // tile_width
            rows = total_height // tile_height
            
            print(f"Tileset carregado: {total_width}x{total_height}px.")
            print(f"Detectado grid de {cols} colunas x {rows} linhas de tiles de {tile_width}x{tile_height}px.")
            print("-" * 30)

            # Itera sobre cada tile no grid
            for row in range(rows):
                for col in range(cols):
                    # Calcula as coordenadas para o corte (crop)
                    left = col * tile_width
                    top = row * tile_height
                    
                    # Cria uma cópia recortada do tile.
                    # A sintaxe de slicing do Wand [largura, altura, x, y] é poderosa.
                    # Usamos 'with' para garantir que a imagem do tile também seja liberada.
                    with img[left:left + tile_width, top:top + tile_height] as tile_image:
                        # 1. Converte a imagem do tile para um formato binário (blob).
                        #    PNG é recomendado para manter a transparência.
                        tile_blob = tile_image.make_blob('png')
                        
                        # 2. Codifica o blob binário para Base64.
                        base64_bytes = base64.b64encode(tile_blob)
                        
                        # 3. Decodifica os bytes Base64 para uma string UTF-8 (formato comum para APIs).
                        base64_string = base64_bytes.decode('utf-8')
                        
                        # Adiciona a string Base64 à nossa lista de resultados
                        base64_tiles.append(base64_string)

    except WandException as e:
        print(f"Ocorreu um erro com a biblioteca Wand/ImageMagick: {e}")
        return []

    return base64_tiles

# Analyze Tiles

In [63]:
all_tiles_b64 = extract_tiles_as_base64(
    tileset_path=join(MAIN_PATH, "./tileset.png"),
    tile_width=16,
    tile_height=16
)

if all_tiles_b64:
    print(f"\nExtração concluída! Total de tiles encontrados: {len(all_tiles_b64)}")
    
    # Mostra o início da string Base64 do primeiro tile como exemplo
    print("\nExemplo da string Base64 do primeiro tile (primeiros 80 caracteres):")
    print(all_tiles_b64[0][:80] + "...")

Tileset carregado: 784x352px.
Detectado grid de 49 colunas x 22 linhas de tiles de 16x16px.
------------------------------

Extração concluída! Total de tiles encontrados: 1078

Exemplo da string Base64 do primeiro tile (primeiros 80 caracteres):
iVBORw0KGgoAAAANSUhEUgAAABAAAAAQAQMAAAAlPW0iAAAAIGNIUk0AAHomAACAhAAA+gAAAIDoAAB1...


In [64]:
def index_to_coords(index) -> list[int]:
    row = index // 49
    col = index % 49
    return [row, col]

def coords_to_index(coords) -> int:
    return (coords[0] * 49) + coords[1]

In [68]:
data = []
count = 0
total = len(all_tiles_b64)

try:
    for i in range(len(all_tiles_b64)):
        base64 = all_tiles_b64[i]
        description = describe_base64_png(base64)
        sleep(3)
        
        if description.startswith("```json\n"):
            description = description[len("```json\n"):]
        if description.endswith("\n```"):
            description = description[:-len("\n```")]
        coords = index_to_coords(i)
        x = coords[1]
        y = coords[0]
        data.append({
            "base64": "data:image/png;base64,"+base64,
            "description": description,
            "x": x,
            "y": y
        })
        count += 1
        print(f"[{'='*(floor(80*(count/total)))}{'-'*(floor(80*(1-(count/total))))}] ({count}/{total})")
except:
    print(f"[{'ERROR'.center(80,"-")}]")
finally:
    dt = pd.DataFrame(data)

    dt.to_csv('tiles_description.csv', index=False, sep=';')
    dt.to_json('tiles_description.json', orient='records')

[-------------------------------------------------------------------------------] (1/1078)
[-------------------------------------------------------------------------------] (2/1078)
[-------------------------------------------------------------------------------] (3/1078)
[-------------------------------------------------------------------------------] (4/1078)
[-------------------------------------------------------------------------------] (5/1078)
[-------------------------------------------------------------------------------] (6/1078)
[-------------------------------------------------------------------------------] (7/1078)
[-------------------------------------------------------------------------------] (8/1078)
[-------------------------------------------------------------------------------] (9/1078)
[-------------------------------------------------------------------------------] (10/1078)
[-------------------------------------------------------------------------------] (11/107