In [None]:
!pip install -q groq "transformers==4.51.3" "datasets==3.5.0" "huggingface-hub==0.30.2" "peft==0.15.2"

In [None]:
import os
from groq import Groq

In [None]:
from google.colab import userdata
groq_api_key = userdata.get('GROQ_API_KEY')

In [None]:
client = Groq(
    api_key=groq_api_key,
)

In [None]:
SYSTEM_PROMPT = """
You are playing Wordle, a word-guessing game.

### Game Rules:
- You have **6 tries** to guess a secret **5-letter** word.
- Each guess must be a valid **5-letter English word**.
- After each guess, you will receive feedback indicating how close
your guess was.

### Feedback Format:
Each letter in your guess will receive one of three symbols:
1. ✓ : The letter is in the word and in the CORRECT position.
2. - : The letter is in the word but in the WRONG position.
3. x : The letter is NOT in the word.

### Example:
Secret Word: BRISK

Guess 1: STORM → Feedback: S(-) T(x) O(x) R(-) M(x)
Guess 2: BRAVE → Feedback: B(✓) R(✓) A(x) V(x) E(x)
Guess 3: BRISK → Feedback: B(✓) R(✓) I(✓) S(✓) K(✓)

### Response Format:
Think through the problem and feedback step by step. Make sure to
first add your step by step thought process within <think> </think>
tags. Then, return your guessed word in the following format:
<guess> guessed-word </guess>.
"""

In [None]:
from dataclasses import dataclass
from enum import Enum
from typing import List


class LetterFeedback(Enum):
    CORRECT = "✓"
    WRONG_POS = "-"
    WRONG_LETTER = "x"


@dataclass
class GuessWithFeedback:
    guess: str
    feedback: List[LetterFeedback]

    def __repr__(self) -> str:
        feedback_str = " ".join(f"{letter}({fb.value})" for letter, fb in zip(self.guess, self.feedback))
        return f"{self.guess} → Feedback: {feedback_str}"

    @classmethod
    def from_secret(cls, guess: str, secret: str) -> "GuessWithFeedback":
        """
        Compute Wordle-style feedback for `guess` against `secret`.

        Rules implemented:
        - First mark letters that are correct (same letter same position).
        - For remaining letters in guess, mark WRONG_POS if that letter
          appears among the unmatched letters of the secret (consuming one occurrence),
          otherwise mark WRONG_LETTER.
        This handles duplicate letters correctly.
        """
        if len(guess) != len(secret):
            raise ValueError("guess and secret must be the same length")

        guess = guess.upper()
        secret = secret.upper()
        n = len(guess)

        # Defaults
        feedback: List[LetterFeedback] = [LetterFeedback.WRONG_LETTER] * n

        # First pass: mark CORRECT and collect unmatched secret letters
        secret_unmatched: List[str] = []
        for i in range(n):
            if guess[i] == secret[i]:
                feedback[i] = LetterFeedback.CORRECT
            else:
                secret_unmatched.append(secret[i])

        # Second pass: for non-CORRECT positions, check if letter exists in unmatched secret letters
        for i in range(n):
            if feedback[i] == LetterFeedback.CORRECT:
                continue
            if guess[i] in secret_unmatched:
                feedback[i] = LetterFeedback.WRONG_POS
                # consume one occurrence so duplicates are handled correctly
                secret_unmatched.remove(guess[i])
            else:
                feedback[i] = LetterFeedback.WRONG_LETTER

        return cls(guess, feedback)

In [None]:
def render_user_prompt(past_guesses: List[GuessWithFeedback]) -> str:
    """Creates a user-facing prompt that includes past guesses
    and their feedback."""
    prompt = "Make a new 5-letter word guess."
    if past_guesses:
        prompt += "\n\nHere is some previous feedback:"
        for i, guess in enumerate(past_guesses):
            prompt += f"\nGuess {i+1}: {guess}"
    return prompt

In [None]:
def wordle_reward(guess: str, secret_word: str) -> int:
    if guess.upper() == secret_word.upper():
        return 1   # correct guess
    else:
        return 0   # incorrect guess

In [None]:
secret_word = "POUND"

past_guesses = [
    GuessWithFeedback.from_secret(guess="CRANE", secret=secret_word),
    GuessWithFeedback.from_secret(guess="BLOND", secret=secret_word),
    GuessWithFeedback.from_secret(guess="FOUND", secret=secret_word),
]
past_guesses

[CRANE → Feedback: C(x) R(x) A(x) N(✓) E(x),
 BLOND → Feedback: B(x) L(x) O(-) N(✓) D(✓),
 FOUND → Feedback: F(x) O(✓) U(✓) N(✓) D(✓)]

In [None]:
messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": render_user_prompt(past_guesses)
        },
        {
            "role": "assistant",
            "content": "Let me solve this step by step.\n<think>"
        }
    ]

In [None]:
def generate_stream(prompt: str, adapter_id: str = "") -> str:
    """Streams a model-generated response from a prompt in
    real-time and prints it as it arrives."""
    response = client.chat.completions.create(
        model=adapter_id,
        messages =prompt,
        max_tokens=2048,
        stream=True,
    )

    completion = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            content = chunk.choices[0].delta.content
            print(content, end="", flush=True)
            completion += content
    print()

    return completion

In [None]:
import re

def extract_guess(response: str) -> str:
    """
    Extracts the guessed word from the model's response text.
    Example:
        response = "<think> ... reasoning ... </think>\n<guess> ROUND </guess>"
        -> returns "ROUND"
    """
    match = re.search(r"<guess>\s*([A-Za-z]+)\s*</guess>", response)
    if match:
        return match.group(1).upper()
    else:
        # print("⚠️ No <guess> tag found in model output.")
        # return ""
        raise ValueError("No <guess> tag found in response.")

In [None]:
response = generate_stream(prompt = messages, adapter_id = "Qwen/Qwen3-32B")

<think>
Okay, let's see. I need to figure out the secret word based on the previous guesses and feedback. Let's go through each step carefully.

Starting with Guess 1: CRANE → Feedback was C(x), R(x), A(x), N(✓), E(x). That means the letter N is in the correct position. The other letters (C, R, A, E) are not in the word at all. So the secret word has N in the 4th position.

Guess 2: BLOND → Feedback B(x), L(x), O(-), N(✓), D(✓). Here, N and D are correct in position. O is in the word but in the wrong position. B and L are not in the word. So the secret word has D in the 5th position, and O is present but not in position 3. Also, from Guess 1, we know O wasn't part of the feedback before, so maybe O wasn't in the word. Wait, no. Wait, the first guess was CRANE, which didn't have O. So O is a new letter here. So O is in the word but not in position 3.

Guess 3: FOUND → Feedback F(x), O(✓), U(✓), N(✓), D(✓). Oh, here O is in the correct position. Wait, the feedback for O here is ✓, meanin

In [None]:
guess = extract_guess(response)
reward = wordle_reward(guess, secret_word)

print(f"Guessed Word: {guess} -> Reward: {reward}")

Guessed Word: POUND -> Reward: 1


## Using rewards to calculate advantages

In [None]:
import numpy as np

def compute_advantages(rewards: list):
    rewards = np.array(rewards)

    # Compute the mean and standard deviation of the rewards
    mean_reward = np.mean(rewards)
    std_reward = np.std(rewards)

    # Avoid division by zero in case of zero variance (typically happens when all rewards are 0)
    # Note: In the GRPO implementation, we add 1e-4 to the std_reward to avoid division by zero
    if std_reward == 0:
        return [0] * len(rewards)

    # Divide by stddev of rewards to normalize range to 0
    advantages = (rewards - mean_reward) / std_reward
    return advantages.tolist()

In [None]:
def print_guesses_table(guesses, rewards):
    """
    Prints a formatted table of guesses, rewards, and normalized advantages.
    """
    if not guesses or not rewards:
        print("No guesses or rewards to display.")
        return

    # Compute advantages
    advantages = compute_advantages(rewards)

    print("\n=== Wordle Guess Summary ===")
    print(f"{'Guess':<10} | {'Reward':<7} | {'Advantage':<10}")
    print("-" * 33)

    for guess, reward, adv in zip(guesses, rewards, advantages):
        print(f"{guess:<10} | {reward:<7.2f} | {adv:<10.2f}")

    print("-" * 33)
    print(f"Mean reward: {np.mean(rewards):.2f} | Std: {np.std(rewards):.2f}\n")

In [None]:
def generate(messages, num_guesses=6, adapter_id="Qwen/Qwen3-32B"):
    """
    Runs the Wordle guessing loop for a given number of guesses.
    Uses the model to generate guesses and collects responses.

    Args:
        messages (list): Conversation history (system + user context).
        num_guesses (int): Number of guesses to generate.
        adapter_id (str): Optional model/adapter identifier.

    Returns:
        list[str]: List of raw model responses (each containing <guess> ... </guess>).
    """
    responses = []
    response_guess_word = []
    guess_word_reward = []

    for i in range(num_guesses):
        print(f"\n=== Generating Guess {i+1}/{num_guesses} ===")
        response_text = generate_stream(messages, adapter_id)
        responses.append(response_text)

        # Extract the guessed word from model response
        try:
            guess_word = extract_guess(response_text)
            response_guess_word.append(guess_word)
        except ValueError:
            print("⚠️ No <guess> tag found in model output, skipping this turn.")
            continue

        # Compute reward
        reward = wordle_reward(guess_word, secret_word)
        print(f"Guessed Word: {guess_word} -> Reward: {reward}")
        guess_word_reward.append(reward)

        # If guessed correctly, stop early
        if reward == 1:
            print("🎉 Correct guess! Wordle solved.")
            break

        # Append feedback to conversation for next round
        feedback_obj = GuessWithFeedback.from_secret(guess_word, secret_word)
        messages.append({
            "role": "assistant",
            "content": response_text
        })
        messages.append({
            "role": "user",
            "content": render_user_prompt([feedback_obj])
        })

    return responses, response_guess_word, guess_word_reward

In [None]:
print(f"Secret: {secret_word}")
response, guess_words, rewards = generate(messages, num_guesses=8)

Secret: POUND

=== Generating Guess 1/8 ===
<think>
Okay, let's tackle this Wordle puzzle. Let me go through the feedback from each guess and see what clues I can gather.

First, the secret word is 5 letters. Let's start with the first guess: CRANE. The feedback was C(x), R(x), A(x), N(✓), E(x). So the fourth letter is N and correct. The rest (C, R, A, E) are not in the word at all. 

Next guess was BLOND: B(x), L(x), O(-), N(✓), D(✓). So O is in the word but not in the third position here. The N and D are correct in their positions. Wait, in BLOND, the letters are B L O N D. So the feedback says O is present but in the wrong position. Since the secret word's fourth letter is N (from CRANE), and BLOND's N is in position 4 as well, which was already correct. So O must be in the word but not in position 3. 

Third guess was FOUND: F(x), O(✓), U(✓), N(✓), D(✓). So F is out. Now O, U, N, D are correct in their positions. Let's check where they are in FOUND. O is second, U third, N fourth, 

In [None]:
print_guesses_table(guess_words, rewards)


=== Wordle Guess Summary ===
Guess      | Reward  | Advantage 
---------------------------------
HOUND      | 0.00    | -0.58     
SOUND      | 0.00    | -0.58     
MOUND      | 0.00    | -0.58     
POUND      | 1.00    | 1.73      
---------------------------------
Mean reward: 0.25 | Std: 0.43

