In [None]:
import os
import json
from openai import OpenAI

# --- Configuration ---

# 1. Set up your DeepSeek API client
# (Make sure DEEPSEEK_API_KEY is set in your environment variables)
try:
    client = OpenAI(
        api_key=os.getenv("DEEPSEEK_API_KEY"), 
        base_url="https://api.deepseek.com"
    )
except Exception as e:
    print(f"Error: Failed to initialize DeepSeek client. Is DEEPSEEK_API_KEY set?")
    print(f"Details: {e}")
    exit()

# 2. Define the "Problem" (the prompt you would send to Qwen)
# This is the context DeepSeek will use.
original_qwen_user_prompt = """You are a the driver in Taxi-v3 game，the taxi is at your position. 
The positions in the Taxi-v3 game are understood as (row, column) coordinates on a 5 times 5 grid.
**Rules & Rewards:**
1.  **Objective:** 1. **Go to passenger**. 2. **Pickup (4)**. 3. **Go to destination**. 4. **Dropoff (5)**. Do this ASAP.
2. You won't move if your action hits a wall, so take note of walls after your trial.
3. You can only pickup or dropoff passenger when you and passenger are at the same position. e.g. If the taxi is at (4,3), and the passenger is at (4,0), your action should be move because you have to arrive there first
**State of this turn:** The taxi is currently at position (1, 0). The passenger is **waiting at Yellow** located at (4, 0).The final destination is **Blue** at (4, 3).
**Actions you can take:** 0:One grid down South, 1: One grid up North, 2: One grid to East, 3: One grid to West, 4:Pickup at taxi's position, 5:Dropoff at taxi's position
**Last Result:** - The game just started.
**Wall Notes:** - No walls noted yet.

**Your Task:**
1.  Choose the best action for this turn.
2.  Set `add_wall_note: true` ONLY IF your `Last Result` was a **move (0-3)** AND it **hit a wall**.

Think briefly with less than 50 words. After </think>, respond with **ONLY** a valid JSON:
{"action_to_take": "NUMBER", "add_wall_note": BOOLEAN}
"""

# 3. Define the "Answer" (the "chosen" or "winning" response)
# This is what you want DeepSeek to justify.
correct_answer_json = {
    "action_to_take": "0",  # Go South
    "add_wall_note": False
}

# --- Prompt Engineering for DeepSeek ---

# We will create a special prompt for DeepSeek to generate the <think> block.
system_prompt_for_deepseek = (
    "You are a training data generator. Your job is to create a concise reasoning "
    "process inside a <think> block, based on a problem and a given correct answer. "
    "Your output must **ONLY** be the <think>...</think> block."
)

user_prompt_for_deepseek = f"""
Here is the problem context:
---
{original_qwen_user_prompt}
---

Here is the KNOWN CORRECT ANSWER (as a JSON object):
---
action: 0
---

Your task:
Write a brief, logical reasoning process (less than 50 words) that explains how to arrive 
at this *exact* answer. The reasoning must fit inside a `<think>...</think>` block.

- The taxi is at (1, 0) and the passenger is at (4, 0).
- The best move is to go South (action 0) towards the passenger.
- "Last Result" is "The game just started," so no wall was hit.
- Therefore, `add_wall_note` must be `false`.

Generate **ONLY** the `<think>...</think>` block. And also the JSON answer you should set add_wall_note as if you are playing the game.
"""

# --- API Call ---

print("Sending request to DeepSeek to generate <think> block...")

try:
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": system_prompt_for_deepseek},
            {"role": "user", "content": user_prompt_for_deepseek},
        ],
        stream=False,
        temperature=0.5, # A little creativity, but not too much
        max_tokens=150
    )

    # Extract the generated <think> block
    generated_think_block = response.choices[0].message.content.strip()

    # --- Final Assembly ---
    
    # Clean up: Ensure it's *just* the think block
    if not generated_think_block.startswith("<think>"):
        print(f"Warning: DeepSeek output did not start with <think>. Received:\n{generated_think_block}")
        # Attempt a simple fix
        generated_think_block = f"<think>\n{generated_think_block}\n</think>"
    
    # Combine the generated thought process with the correct JSON answer
    final_qwen_formatted_output = (
        f"{generated_think_block}\n\n"
        f"{json.dumps(correct_answer_json, indent=4)}"
    )

    print("\n" + "="*60)
    print("✅ SUCCESS: Generated Qwen-formatted training sample")
    print("="*60 + "\n")
    print(final_qwen_formatted_output)


except Exception as e:
    print(f"\n" + "!"*60)
    print(f"Error during DeepSeek API call: {e}")
    print("!"*60)

Sending request to DeepSeek to generate <think> block...

✅ SUCCESS: Generated Qwen-formatted training sample

<think>The taxi is at (1,0) and needs to reach the passenger at (4,0). Moving South (action 0) directly towards the passenger is the best move. Since the game just started and no move was attempted, no wall was hit.</think>
{"action_to_take": 0, "add_wall_note": false}

{
    "action_to_take": "0",
    "add_wall_note": false
}
