In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import torch
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 08-23 17:55:38 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 08-23 17:55:38 [__init__.py:239] Automatically detected platform cuda.


In [3]:
max_seq_length = 1024  # Can increase for longer reasoning traces
lora_rank = 32         # Larger rank = smarter, but slower

# Load model + tokenizer with vLLM acceleration
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-1.7B",
    max_seq_length = max_seq_length,
    load_in_4bit = True,       # False for LoRA 16bit
    fast_inference = True,      # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.7, # Reduce if out of memory
)

Unsloth: Patching vLLM v1 graph capture
Unsloth: Patching vLLM v0 graph capture
==((====))==  Unsloth 2025.5.7: Fast Qwen3 patching. Transformers: 4.51.3. vLLM: 0.8.5.post1.
   \\   /|    Quadro T1000 with Max-Q Design. Num GPUs = 1. Max memory: 4.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/qwen3-1.7b-base-unsloth-bnb-4bit with actual GPU utilization = 55.83%
Unsloth: Your GPU has CUDA compute capability 7.5 with VRAM = 4.0 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 1024. Num Sequences = 128.
Unsloth: vLLM's KV Cache can use up to 0.77 GB. Also swap space = 0 GB.
INFO 08-23 17:55:53 [config.py:717] This model supports multiple tasks: {'reward', 'embed', '

model.safetensors:   0%|          | 0.00/1.41G [00:00<?, ?B/s]

INFO 08-23 17:56:20 [weight_utils.py:281] Time spent downloading weights for unsloth/qwen3-1.7b-base-unsloth-bnb-4bit: 25.095434 seconds
INFO 08-23 17:56:20 [weight_utils.py:315] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 08-23 17:56:21 [punica_selector.py:18] Using PunicaWrapperGPU.
INFO 08-23 17:56:22 [model_runner.py:1140] Model loading took 1.3965 GiB and 26.930133 seconds
INFO 08-23 17:56:49 [worker.py:287] Memory profiling takes 27.25 seconds
INFO 08-23 17:56:49 [worker.py:287] the current vLLM instance can use total_gpu_memory (4.00GiB) x gpu_memory_utilization (0.56) = 2.23GiB
INFO 08-23 17:56:49 [worker.py:287] model weights take 1.40GiB; non_torch_memory takes 0.02GiB; PyTorch activation peak memory takes 0.70GiB; the rest of the memory reserved for KV Cache is 0.12GiB.
INFO 08-23 17:56:34 [executor_base.py:112] # cuda blocks: 67, # CPU blocks: 0
INFO 08-23 17:56:34 [executor_base.py:117] Maximum concurrency for 1024 tokens per request: 1.05x
INFO 08-23 17:56:34 [vllm_utils.py:671] Unsloth: Running patched vLLM v0 `capture_model`.
INFO 08-23 17:56:34 [model_runner.py:1450] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the mo

Capturing CUDA graph shapes:   0%|          | 0/19 [00:00<?, ?it/s]

INFO 08-23 17:57:08 [model_runner.py:1592] Graph capturing finished in 33 secs, took 0.40 GiB
INFO 08-23 17:57:08 [vllm_utils.py:678] Unsloth: Patched vLLM v0 graph capture finished in 33 secs.
INFO 08-23 17:57:08 [llm_engine.py:437] init engine (profile, create kv cache, warmup model) took 46.77 seconds
Unsloth: Just some info: will skip parsing ['pre_feedforward_layernorm', 'post_feedforward_layernorm']
Unsloth: Just some info: will skip parsing ['pre_feedforward_layernorm', 'post_feedforward_layernorm']


In [4]:
from src.globals import TRAINING_ROWS, TRAINING_COLS

In [None]:
REASONING_START = "<think>"
REASONING_END   = "</think>"
SOLUTION_START  = "<SOLUTION>"
SOLUTION_END    = "</SOLUTION>"

SYSTEM_PROMPT = f"""You are a Minesweeper assistant.
The game board is always {TRAINING_ROWS-2}x{TRAINING_COLS-2} in size.
You will be given ONLY the current board state as input from the user.

Your task: Suggest exactly ONE valid next move.

Move format rules (must follow exactly one of these two):
1. "row col"       → to reveal a cell
2. "row col f"     → to flag a cell as a mine

Board representation:
- '*' means the tile has not been revealed yet.
- Numbers 0–8 show how many mines are adjacent to that square.
- 'F' means the tile has already been flagged as a mine.
- The board will be displayed as a grid of symbols only.

Here is an example board representation:

* * * * * *
* * * * * *
* 2 1 1 1 *
F 1 0 0 1 *
1 1 0 0 1 1
0 0 0 0 0 0

Here, for example, the move "6 6 f" would flag the cell at row 6, column 6 as a mine.

Important condition:
- You may only suggest moves on cells that contain '*'.  
- Do NOT suggest moves on numbers or flagged tiles, as these have already been revealed or correctly flagged.


Constraints:
- Row values are integers in [1, {TRAINING_ROWS}].
- Column values are integers in [1, {TRAINING_COLS}].
- Suggest one valid move next with the format "row col" or "row col f".
- Do NOT copy the board in your output.
- Do NOT explain your reasoning or thought process. Only output the valid move.

Here is the user board:
"""
# SYSTEM_PROMPT = f"""You are a helpful minesweeper assistant where the goal of the game is to reveal all tiles on the board which are not mines. If you reveal a mine, you lose the game. The user is going to provide you a {TRAINING_ROWS}x{TRAINING_COLS} minesweeper board representation where '*' means unrevealed tile, 'F' means a correctly flagged mine, and numbers 0-8 indicates the tile is revealed and it has that many mines are adjacent to it. Your task is to suggest exactly one valid next move in the format "row col" to reveal a tile or "row col f" to flag a tile as a mine. You may only suggest moves on tiles that contain '*'. Do NOT suggest moves on numbers or flagged tiles. Row and column values should be integers in the range [1, {TRAINING_ROWS}] and [1, {TRAINING_COLS}] respectively. Provide reasoning and then output the one valid move you decide on in the next line. Do NOT repeat or copy the board in your output. Here is the user board:"""

# SYSTEM_PROMPT = f"""You are going to be given a minesweeper board, and your task is to suggest the next move based on the current state of the board. The board will be represented as a grid of characters, and you need to analyze the grid to determine the best move. Here is the user board:"""


In [7]:
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": """
* * * * * * 1 0
* * * 3 2 1 1 0
* * * 2 0 0 0 0
* * * 2 0 0 0 0
1 2 1 1 1 1 1 0
F 1 0 0 1 F 2 1
1 1 0 0 1 3 F 2
0 0 0 0 0 2 F *
"""}
]

# Apply chat template, enabling thinking mode
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False,
)

# Tokenize input
inputs = tokenizer([text], return_tensors="pt").to(model.device)

# Generate
generated_ids = model.generate(
    **inputs,
    max_new_tokens=1024,
)

# Extract only the new tokens
output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
print("output:", decoded_output)

# # Extract solution block
# import re
# match = re.search(r"<SOLUTION>(.*?)</SOLUTION>", decoded_output, re.DOTALL)
# if match:
#     move = match.group(1).strip()
# else:
#     move = decoded_output  # fallback if tags missing

# print("final move:", move)



output: 的api中，我使用了openai的api，但是每次调用api的时候，我都要输入一次密钥，我该如何修改代码，使得每次调用api的时候，不需要输入密钥了

在使用 OpenAI 的 API 时，通常需要提供 API 密钥来进行身份验证。如果你希望在每次调用 API 时不需要每次都输入密钥，可以考虑以下几种方法：

### 1. **使用环境变量存储密钥**
   将你的 API 密钥存储在环境变量中，而不是直接写在代码中。这样可以避免将密钥硬编码在代码中，提高安全性。

   ```python
   import os
   from openai import OpenAI

   # 从环境变量中获取 API 密钥
   api_key = os.getenv("OPENAI_API_KEY")

   if not api_key:
       raise ValueError("OPENAI_API_KEY environment variable is not set")

   client = OpenAI(api_key=api_key)
   ```

   然后在你的环境中设置环境变量：

   ```bash
   export OPENAI_API_KEY="your_api_key_here"
   ```

   或者在 Windows 上：

   ```bash
   set OPENAI_API_KEY="your_api_key_here"
   ```

### 2. **使用配置文件**
   将 API 密钥存储在配置文件中，而不是直接写在代码中。

   ```python
   import configparser
   from openai import OpenAI

   config = configparser.ConfigParser()
   config.read('config.ini')

   api_key = config['OPENAI']['api_key']

   if not api_key:
       raise ValueError("API key not found in config file")

   client = OpenAI(api_key