In [19]:
import numpy as np
import json
import random

from yahtzee import Game

In [84]:
test_env = YahtzeeEnv()
test_agent = RandomAgent()

In [15]:
test_env.get_state()

{'dice': [{'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False}],
 'rolls_left': 3,
 'saved_rolls': 0,
 'scorecard': {'scores': {'ones': None,
   'twos': None,
   'threes': None,
   'fours': None,
   'fives': None,
   'sixes': None,
   'pair': None,
   'two_pair': None,
   'three_of_a_kind': None,
   'four_of_a_kind': None,
   'full_house': None,
   'small_straight': None,
   'large_straight': None,
   'yahtzee': None,
   'chance': None},
  'total': 0,
  'upper_section_total': 0,
  'upper_bonus': False,
  'upper_bonus_threshold': 73},
 'current_turn_scored': False,
 'game_started': False}

In [85]:
test_env.step({'type':'roll'})

({'dice': [{'value': 1, 'held': False},
   {'value': 2, 'held': False},
   {'value': 1, 'held': False},
   {'value': 6, 'held': False},
   {'value': 1, 'held': False},
   {'value': 1, 'held': False}],
  'rolls_left': 2,
  'saved_rolls': 0,
  'scorecard': {'scores': {'ones': None,
    'twos': None,
    'threes': None,
    'fours': None,
    'fives': None,
    'sixes': None,
    'pair': None,
    'two_pair': None,
    'three_of_a_kind': None,
    'four_of_a_kind': None,
    'full_house': None,
    'small_straight': None,
    'large_straight': None,
    'yahtzee': None,
    'chance': None},
   'total': 0,
   'upper_section_total': 0,
   'upper_bonus': False,
   'upper_bonus_threshold': 73},
  'current_turn_scored': False,
  'game_started': True},
 0,
 False,
 {})

In [96]:
# game_env.py

class YahtzeeEnv:
    def __init__(self):
        self.game = Game()
    
    def reset(self):
        """Starts a new game and returns the initial state."""
        self.game = Game()
        return self.get_state()
    
    def step(self, action: dict):
        """
        Executes an action and returns a tuple of (state, reward, done, info).
    
        The action should be a dictionary with a key "type" and any required
        additional parameters. Supported actions:
          - {"type": "roll"}
          - {"type": "toggle_hold", "die_index": index}
          - {"type": "score", "category": category_name}
        """
        info = {}
        reward = 0
    
        if action.get("type") == "roll":
            success = self.game.roll_dice()
            if not success:
                info["error"] = "No rolls left"
        elif action.get("type") == "toggle_hold":
            die_index = action.get("die_index")
            if die_index is not None:
                success = self.game.toggle_hold(die_index)
                if not success:
                    info["error"] = "Invalid toggle hold action"
            else:
                info["error"] = "die_index not provided"
        elif action.get("type") == "score":
            category = action.get("category")
            if category is not None:
                # Calculate reward as the difference in total score after scoring
                prev_total = self.game.scorecard.get_total()
                result = self.game.score(category)
                if result == "success":
                    new_total = self.game.scorecard.get_total()
                    reward = new_total - prev_total
                else:
                    info["error"] = result
            else:
                info["error"] = "category not provided"
        else:
            info["error"] = "Invalid action type"
    
        # Check if the game is over by verifying if all score categories are used.
        done = self.game.scorecard.is_game_over()
        state = self.get_state()
        return state, reward, done, info
    
    def get_state(self):
        """Returns the current game state as a dictionary."""
        return self.game.to_dict()
    
    def render(self):
        """Prints a JSON-formatted representation of the current game state."""
        print(json.dumps(self.get_state(), indent=2))


In [131]:
# agent.py

import random

class YahtzeeAgent:
    def act(self, state):
        """
        Given the current game state, choose an action.
        
        The state is expected to be a dictionary containing keys such as:
          - 'rolls_left'
          - 'game_started'
          - 'current_turn_scored'
          - 'dice'
          - 'scorecard' (which contains 'scores')
        
        Returns an action dictionary. For example:
          {"type": "roll"}
          {"type": "toggle_hold", "die_index": 2}
          {"type": "score", "category": "ones"}
        """
        raise NotImplementedError("This method should be overridden by subclasses.")


class RandomAgent(YahtzeeAgent):
    def __init__(self):
        # Track dice indices that have already been toggled in the current turn
        self.toggled_indices = set()
        # Store the number of rolls left from the previous state to detect turn changes
        self.last_rolls_left = None

    def act(self, state):
        """
        A random policy agent that chooses a random valid action based on the state.
        This version avoids offering toggle hold actions for dice that have already
        been toggled in the current turn.
        """
        valid_actions = []
        current_rolls_left = state.get("rolls_left", 0)

        # Detect new turn: if rolls_left increases compared to the previous state, reset toggled_indices.
        if self.last_rolls_left is not None and current_rolls_left > self.last_rolls_left:
            self.toggled_indices = set()
        self.last_rolls_left = current_rolls_left

        # If there are rolls left, consider a roll action.
        if current_rolls_left > 0:
            valid_actions.append({"type": "roll"})

        # If the game has started and the current turn hasn't been scored,
        # allow toggle hold actions only for dice that haven't been toggled already.
        if state.get("game_started", False) and not state.get("current_turn_scored", True):
            dice = state.get("dice", [])
            for i, die in enumerate(dice):
                if i not in self.toggled_indices:
                    valid_actions.append({"type": "toggle_hold", "die_index": i})

        # Consider scoring actions for categories that haven't been used.
        if state.get("game_started", False):
            scorecard = state.get("scorecard", {})
            scores = scorecard.get("scores", {})
            for category, score in scores.items():
                if score is None:
                    valid_actions.append({"type": "score", "category": category})

        # Choose a random action from the valid ones.
        if valid_actions:
            chosen_action = random.choice(valid_actions)
            # If the chosen action is toggling a die, mark that die as toggled in this turn.
            if chosen_action.get("type") == "toggle_hold":
                self.toggled_indices.add(chosen_action["die_index"])
            return chosen_action
        else:
            # Fallback action, if no valid actions are found.
            return {"type": "roll"}


In [94]:
test_agent.act(test_env.get_state()) 

{'type': 'score', 'category': 'twos'}

In [95]:
test_agent.toggled_indices

{0, 1}

In [113]:
def pretty_print_state(state):
    """
    Prints a human-friendly summary of the current game state.
    """
    print("=== Yahtzee Game State ===")
    # Dice
    dice = state.get("dice", [])
    dice_str = ", ".join([f"{die['value']}{' (held)' if die['held'] else ''}" for die in dice])
    print("Dice:", dice_str)
    
    # Rolls and saved rolls
    print("Rolls left:", state.get("rolls_left", 0))
    print("Saved rolls:", state.get("saved_rolls", 0))
    
    # Scorecard details
    scorecard = state.get("scorecard", {})
    scores = scorecard.get("scores", {})
    print("Scorecard:")
    for category, score in scores.items():
        status = score if score is not None else "Not scored"
        print(f"  {category}: {status}")
    print("Total Score:", scorecard.get("total", 0))
    print("===========================")

In [178]:
# simulation.py

# simulation.py

from datetime import datetime
# from game_env import YahtzeeEnv
# from agent import RandomAgent

class Simulation:
    def __init__(self, env=None, agent=None):
        """
        Initialize the simulation with a given environment and agent.
        If none are provided, defaults to YahtzeeEnv and RandomAgent.
        """
        self.env = env or YahtzeeEnv()
        self.agent = agent or RandomAgent()
        self.state_log = []  # To store game states at each step.
        self.event_log = []  # In-memory event log.
        self.game_id = 0     # Using a constant game ID for simplicity.

    def log_event(self, event_type, data):
        """Records an event in the in-memory event log."""
        event = {
            "timestamp": datetime.utcnow().isoformat(),
            "game_id": self.game_id,
            "event_type": event_type,
            "data": data
        }
        self.event_log.append(event)

    def run_episode(self, render=False):
        """
        Runs a single episode until the game is over.

        Parameters:
            render (bool): If True, prints a post-game summary at the end.
        
        Returns:
            total_reward (int): The cumulative reward achieved in the episode.
            trajectory (list): A list of tuples representing (state, action, reward, next_state, done, info).
        """
        state = self.env.reset()
        self.state_log = [state]
        self.event_log = []  # Clear the event log for the new game.

        # Log the start of a new game.
        self.log_event("new_game", {"game_state": state})

        done = False
        total_reward = 0
        trajectory = []

        while not done:
            action = self.agent.act(state)

            if action.get("type") == "score" and not state.get("game_started", False):  
                raise RuntimeError("Score action attempted before any roll. This is not allowed.")
                

            next_state, reward, done, info = self.env.step(action)
            
            if action["type"] == "roll":
                dice = next_state.get("dice", [])
                dice_values = [die["value"] for die in dice]
                self.log_event("roll", {"dice": dice, "dice_values": dice_values})
                
            elif action["type"] == "score":
                self.log_event("score", {"category": action.get("category"), "game_state": next_state})


            

            if "error" in info:
                print("GAME SUMMARY:")
                self.render_summary()
                raise RuntimeError(f"Simulation error: {info['error']}")
                
                

            
            trajectory.append((state, action, reward, next_state, done, info))
            total_reward += reward
            state = next_state
            self.state_log.append(state)

        # Log game over event.
        final_score = state.get("scorecard", {}).get("total", 0)
        self.log_event("game_over", {"final_score": final_score, "game_state": state})

        if render:
            self.render_summary()

        return total_reward, trajectory

    def get_state_log(self):
        """Returns the list of game states recorded at each step."""
        return self.state_log

    def render_summary(self):
        """
        Prints a post-game summary similar to your pretty_print_game_log, but based on
        the in-memory event log.
        """
        # Group events into turns. Each turn is a series of roll events followed by a score event.
        turns = []
        current_turn = []

        for event in self.event_log:
            if event["event_type"] == "roll":
                # Record roll event: display the dice values.
                current_turn.append({
                    "type": "roll",
                    "dice": [d["value"] for d in event["data"].get("dice", [])]
                })
            elif event["event_type"] == "score":
                current_turn.append({
                    "type": "score",
                    "category": event["data"].get("category"),
                    "scorecard": event["data"].get("game_state", {}).get("scorecard", {})
                })
                turns.append(current_turn)
                current_turn = []

        # Retrieve start and end event info.
        new_game_event = next((e for e in self.event_log if e["event_type"] == "new_game"), None)
        game_over_event = next((e for e in self.event_log if e["event_type"] == "game_over"), None)
        start_time = new_game_event["timestamp"] if new_game_event else "unknown"
        end_time = game_over_event["timestamp"] if game_over_event else "unknown"
        final_score = game_over_event["data"].get("final_score") if game_over_event else "?"

        print(f"\n🎲 Game ID: {self.game_id}")
        print(f"🕒 Started at: {start_time}\n")
        for i, turn in enumerate(turns, 1):
            print(f"Turn {i}")
            for action in turn:
                if action["type"] == "roll":
                    print(f"  - Rolled: {action['dice']}")
                elif action["type"] == "score":
                    cat = action["category"]
                    score = action["scorecard"].get("scores", {}).get(cat, "N/A")
                    print(f"  - Scored: {cat} → {score} pts")
            print()
        print(f"🏁 Game Over at {end_time}")
        print(f"🔢 Final Score: {final_score}")
        print("=" * 40)



In [164]:
sim = Simulation()

In [247]:
sim = Simulation()
total_reward, trajectory = sim.run_episode(render=True)


GAME SUMMARY:

🎲 Game ID: 0
🕒 Started at: 2025-04-03T22:11:09.900367

Turn 1
  - Rolled: [3, 4, 2, 3, 1, 4]
  - Scored: threes → 6 pts

Turn 2
  - Rolled: [1, 3, 3, 1, 6, 1]
  - Scored: small_straight → 0 pts

Turn 3
  - Scored: pair → None pts

🏁 Game Over at unknown
🔢 Final Score: ?


  "timestamp": datetime.utcnow().isoformat(),


RuntimeError: Simulation error: turn_scored

In [251]:
sim.state_log

[{'dice': [{'value': 1, 'held': False},
   {'value': 1, 'held': False},
   {'value': 1, 'held': False},
   {'value': 1, 'held': False},
   {'value': 1, 'held': False},
   {'value': 1, 'held': False}],
  'rolls_left': 3,
  'saved_rolls': 0,
  'scorecard': {'scores': {'ones': None,
    'twos': None,
    'threes': 6,
    'fours': None,
    'fives': None,
    'sixes': None,
    'pair': None,
    'two_pair': None,
    'three_of_a_kind': None,
    'four_of_a_kind': None,
    'full_house': None,
    'small_straight': 0,
    'large_straight': None,
    'yahtzee': None,
    'chance': None},
   'total': 0,
   'upper_section_total': 0,
   'upper_bonus': False,
   'upper_bonus_threshold': 73},
  'current_turn_scored': False,
  'game_started': False},
 {'dice': [{'value': 3, 'held': False},
   {'value': 4, 'held': False},
   {'value': 2, 'held': False},
   {'value': 3, 'held': False},
   {'value': 1, 'held': False},
   {'value': 4, 'held': False}],
  'rolls_left': 2,
  'saved_rolls': 0,
  'scorecar

In [248]:
len(trajectory)

123

In [250]:
trajectory

[({'dice': [{'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False}],
   'rolls_left': 3,
   'saved_rolls': 0,
   'scorecard': {'scores': {'ones': 1,
     'twos': 0,
     'threes': 0,
     'fours': 0,
     'fives': 0,
     'sixes': 12,
     'pair': 4,
     'two_pair': 10,
     'three_of_a_kind': 0,
     'four_of_a_kind': 0,
     'full_house': 0,
     'small_straight': 0,
     'large_straight': 0,
     'yahtzee': 0,
     'chance': 16},
    'total': 0,
    'upper_section_total': 0,
    'upper_bonus': False,
    'upper_bonus_threshold': 73},
   'current_turn_scored': False,
   'game_started': False},
  {'type': 'roll'},
  0,
  {'dice': [{'value': 6, 'held': False},
    {'value': 5, 'held': False},
    {'value': 5, 'held': False},
    {'value': 6, 'held': False},
    {'value': 4, 'held': False},
    {'value': 4, 'held': False}],
   'rolls_left': 2,
   'sa

In [249]:

for j in range(len(trajectory)):
    
    for i in range(len(trajectory[j])):
        print(trajectory[j][i])
        print("-----")
    print("___________________________________")
    print(" ")

{'dice': [{'value': 1, 'held': False}, {'value': 1, 'held': False}, {'value': 1, 'held': False}, {'value': 1, 'held': False}, {'value': 1, 'held': False}, {'value': 1, 'held': False}], 'rolls_left': 3, 'saved_rolls': 0, 'scorecard': {'scores': {'ones': 1, 'twos': 0, 'threes': 0, 'fours': 0, 'fives': 0, 'sixes': 12, 'pair': 4, 'two_pair': 10, 'three_of_a_kind': 0, 'four_of_a_kind': 0, 'full_house': 0, 'small_straight': 0, 'large_straight': 0, 'yahtzee': 0, 'chance': 16}, 'total': 0, 'upper_section_total': 0, 'upper_bonus': False, 'upper_bonus_threshold': 73}, 'current_turn_scored': False, 'game_started': False}
-----
{'type': 'roll'}
-----
0
-----
{'dice': [{'value': 6, 'held': False}, {'value': 5, 'held': False}, {'value': 5, 'held': False}, {'value': 6, 'held': False}, {'value': 4, 'held': False}, {'value': 4, 'held': False}], 'rolls_left': 2, 'saved_rolls': 0, 'scorecard': {'scores': {'ones': 1, 'twos': 0, 'threes': 0, 'fours': 0, 'fives': 0, 'sixes': 12, 'pair': 4, 'two_pair': 10, '

In [None]:
state, action, reward, next_state, done, info