In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import torch
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 08-23 16:32:31 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 08-23 16:32:31 [__init__.py:239] Automatically detected platform cuda.


In [None]:
max_seq_length = 2048  # Can increase for longer reasoning traces
lora_rank = 32         # Larger rank = smarter, but slower

# Load model + tokenizer with vLLM acceleration
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-0.6B",
    max_seq_length = max_seq_length,
    load_in_4bit = False,       # False for LoRA 16bit
    fast_inference = True,      # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.7, # Reduce if out of memory
)

Unsloth: Patching vLLM v1 graph capture
Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2025.5.7: Fast Qwen3 patching. Transformers: 4.51.3. vLLM: 0.8.5.post1.
   \\   /|    Quadro T1000 with Max-Q Design. Num GPUs = 1. Max memory: 4.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/Qwen3-0.6B with actual GPU utilization = 55.83%
Unsloth: Your GPU has CUDA compute capability 7.5 with VRAM = 4.0 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 2048. Num Sequences = 128.
Unsloth: vLLM's KV Cache can use up to 1.25 GB. Also swap space = 0 GB.
INFO 08-23 16:32:35 [config.py:717] This model supports multiple tasks: {'classify', 'score', 'embed', 'generate', 

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 08-23 16:32:38 [loader.py:458] Loading weights took 0.54 seconds
INFO 08-23 16:32:38 [punica_selector.py:18] Using PunicaWrapperGPU.
INFO 08-23 16:32:38 [model_runner.py:1140] Model loading took 1.1649 GiB and 1.118414 seconds
INFO 08-23 16:32:46 [worker.py:287] Memory profiling takes 8.28 seconds
INFO 08-23 16:32:46 [worker.py:287] the current vLLM instance can use total_gpu_memory (4.00GiB) x gpu_memory_utilization (0.56) = 2.23GiB
INFO 08-23 16:32:46 [worker.py:287] model weights take 1.16GiB; non_torch_memory takes 0.02GiB; PyTorch activation peak memory takes 0.70GiB; the rest of the memory reserved for KV Cache is 0.35GiB.
INFO 08-23 16:32:47 [executor_base.py:112] # cuda blocks: 203, # CPU blocks: 0
INFO 08-23 16:32:47 [executor_base.py:117] Maximum concurrency for 2048 tokens per request: 1.59x
INFO 08-23 16:32:47 [vllm_utils.py:671] Unsloth: Running patched vLLM v0 `capture_model`.
INFO 08-23 16:32:47 [model_runner.py:1450] Capturing cudagraphs for decoding. This may lead

Capturing CUDA graph shapes:   0%|          | 0/19 [00:00<?, ?it/s]

INFO 08-23 16:32:59 [model_runner.py:1592] Graph capturing finished in 12 secs, took 0.24 GiB
INFO 08-23 16:32:59 [vllm_utils.py:678] Unsloth: Patched vLLM v0 graph capture finished in 12 secs.
INFO 08-23 16:32:59 [llm_engine.py:437] init engine (profile, create kv cache, warmup model) took 21.54 seconds
Unsloth: Just some info: will skip parsing ['post_feedforward_layernorm', 'pre_feedforward_layernorm']
Unsloth: Just some info: will skip parsing ['post_feedforward_layernorm', 'pre_feedforward_layernorm']


In [5]:
from src.globals import TRAINING_ROWS, TRAINING_COLS

In [17]:
REASONING_START = "<think>"
REASONING_END   = "</think>"
SOLUTION_START  = "<SOLUTION>"
SOLUTION_END    = "</SOLUTION>"

SYSTEM_PROMPT = f"""You are a Minesweeper assistant.
The game board is always {TRAINING_ROWS}x{TRAINING_COLS} in size.
You will be given ONLY the current board state as input from the user.

Your task: Suggest exactly ONE valid next move.

Move format rules (must follow exactly one of these two):
1. "row col"       → to reveal a cell
2. "row col f"     → to flag a cell as a mine

Board representation:
- '*' means the tile has not been revealed yet.
- Numbers 0–8 show how many mines are adjacent to that square.
- The board will be displayed as a grid of symbols only.

Here is an example board representation:

* * * * * * * *
* * * * * * * *
* * * * * * * *
* * * * * * * *
* 2 1 1 1 * * *
F 1 0 0 1 * * *
1 1 0 0 1 3 * *
0 0 0 0 0 2 * *

Here, for example, the move "6 6 f" would flag the cell at row 6, column 6 as a mine.

Constraints:
- Row values are integers in [1, {TRAINING_ROWS}].
- Column values are integers in [1, {TRAINING_COLS}].
- Suggest only one valid move next with the format "row col" or "row col f".
- Do NOT repeat or copy the board in your output.
- Do NOT output anything except the next move.

Here is the user board:
"""


In [22]:
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": """
0 0 1 * * * * *
0 0 1 * * * * *
0 0 2 * * * * *
0 0 1 F 2 * * *
0 0 2 3 4 F 2 1
0 0 1 F F 2 1 0
0 0 1 2 2 1 0 0
0 0 0 0 0 0 0 0
"""}
]

# Apply chat template, enabling thinking mode
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True   # key part for <think> sections
)

# Tokenize input
inputs = tokenizer([text], return_tensors="pt").to(model.device)

# Generate
generated_ids = model.generate(
    **inputs,
    max_new_tokens=1024,
)

# Extract only the new tokens
output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")


# Extract solution block
import re
match = re.search(r"<SOLUTION>(.*?)</SOLUTION>", decoded_output, re.DOTALL)
if match:
    move = match.group(1).strip()
else:
    move = decoded_output  # fallback if tags missing

print("final move:", move)



final move: <think>
Okay, let's see. The user provided a Minesweeper board and I need to suggest exactly one valid next move. The board is 8x8, so I need to check each cell to see if there's a valid move.

First, I'll look at the board. The user's input is a string of 8 rows, each with 8 elements. Let me parse it. The first row is "0 0 1 * * * * *". So row 0 (assuming rows start at 0) has cells 0, 0, 1, *, *, *, *, *.

Now, the rules for moves: either "row col" (reveal) or "row col f" (flag). The task is to find exactly one valid move. Let's check each cell for possible moves.

Looking at the first row, the first three cells are 0, 0, 1. The next cells are * (unrevealed). So, if I check the first row, the cells 0, 0, 1 are already revealed. The *s are unrevealed. So, maybe the next move is to reveal the *s. But which one?

Wait, the user's input shows that the first row is "0 0 1 * * * * *". So, the first three cells are revealed. The rest are *s. So, the next move could be to reveal a