In [1]:
import numpy as np
import json
import random
import itertools

from yahtzee import Game
from yahtzee import Scorecard



In [6]:
test_env = YahtzeeEnv()
test_agent = RandomAgent()

In [7]:
test_env.get_state()

{'dice': [{'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False},
  {'value': 1, 'held': False}],
 'rolls_left': 3,
 'saved_rolls': 0,
 'scorecard': {'scores': {'ones': None,
   'twos': None,
   'threes': None,
   'fours': None,
   'fives': None,
   'sixes': None,
   'pair': None,
   'two_pair': None,
   'three_of_a_kind': None,
   'four_of_a_kind': None,
   'full_house': None,
   'small_straight': None,
   'large_straight': None,
   'yahtzee': None,
   'chance': None},
  'total': 0,
  'upper_section_total': 0,
  'upper_bonus': False,
  'upper_bonus_threshold': 73},
 'current_turn_scored': False,
 'game_started': False}

In [8]:
test_env.step({'type':'roll'})

({'dice': [{'value': 1, 'held': False},
   {'value': 1, 'held': False},
   {'value': 6, 'held': False},
   {'value': 6, 'held': False},
   {'value': 4, 'held': False},
   {'value': 3, 'held': False}],
  'rolls_left': 2,
  'saved_rolls': 0,
  'scorecard': {'scores': {'ones': None,
    'twos': None,
    'threes': None,
    'fours': None,
    'fives': None,
    'sixes': None,
    'pair': None,
    'two_pair': None,
    'three_of_a_kind': None,
    'four_of_a_kind': None,
    'full_house': None,
    'small_straight': None,
    'large_straight': None,
    'yahtzee': None,
    'chance': None},
   'total': 0,
   'upper_section_total': 0,
   'upper_bonus': False,
   'upper_bonus_threshold': 73},
  'current_turn_scored': False,
  'game_started': True},
 0,
 False,
 {})

In [9]:
# game_env.py

class YahtzeeEnv:
    def __init__(self):
        self.game = Game()
    
    def reset(self):
        """Starts a new game and returns the initial state."""
        self.game = Game()
        return self.get_state()
    
    def step(self, action: dict):
        """
        Executes an action and returns a tuple of (state, reward, done, info).
    
        The action should be a dictionary with a key "type" and any required
        additional parameters. Supported actions:
          - {"type": "roll"}
          - {"type": "toggle_hold", "die_index": index}
          - {"type": hold_roll, "held_dice": held_dice_indices}
          - {"type": "score", "category": category_name}
        """
        info = {}
        reward = 0
    
        if action.get("type") == "roll":
            success = self.game.roll_dice()
            if not success:
                info["error"] = "No rolls left"
                
        elif action.get("type") == "hold_roll":
            held_dice_indices = action.get("held_dice")

            if held_dice_indices:
                success = True
                dice = self.game.to_dict()["dice"]
                
                for die_index in range(len(dice)):
                    die = dice[die_index]
                    
                    if (not die["held"]) and (die_index in held_dice_indices):
                        #print("die_index", die_index)
                        #print("game dice", dice)
                        #print('held_dice_indices', held_dice_indices)

                        
                        success = success and self.game.toggle_hold(die_index)

                        if not success:
                            print("here!")

                    elif die["held"] and (not die_index in held_dice_indices):
                        success = success and self.game.toggle_hold(die_index)
                        if not success:
                            print("no, here")
                    
                if not success:
                    info["error"] = "Invalid toggle hold action in hold_roll"
                
                success = self.game.roll_dice()
                if not success:
                    info["error"] = "not able to roll in hold_roll, possibly no rolls left"
                
            else:
                info["error"] = "held_dice not provided in hold_roll action"
            
        
        elif action.get("type") == "toggle_hold":
            die_index = action.get("die_index")
            if die_index is not None:
                success = self.game.toggle_hold(die_index)
                if not success:
                    info["error"] = "Invalid toggle hold action"
            else:
                info["error"] = "die_index not provided"
        elif action.get("type") == "score":
            category = action.get("category")
            if category is not None:
                # Calculate reward as the difference in total score after scoring
                prev_total = self.game.scorecard.get_total()
                result = self.game.score(category)
                if result == "success":
                    new_total = self.game.scorecard.get_total()
                    reward = new_total - prev_total
                else:
                    info["error"] = result
            else:
                info["error"] = "category not provided"
        else:
            info["error"] = "Invalid action type"
    
        # Check if the game is over by verifying if all score categories are used.
        done = self.game.scorecard.is_game_over()
        state = self.get_state()
        return state, reward, done, info
    
    def get_state(self):
        """Returns the current game state as a dictionary."""
        return self.game.to_dict()
    
    def render(self):
        """Prints a JSON-formatted representation of the current game state."""
        print(json.dumps(self.get_state(), indent=2))


In [31]:
np.random.binomial(1,1/2)

0

In [10]:
# agent.py

import random

class YahtzeeAgent:
    def act(self, state):
        """
        Given the current game state, choose an action.
        
        The state is expected to be a dictionary containing keys such as:
          - 'rolls_left'
          - 'game_started'
          - 'current_turn_scored'
          - 'dice'
          - 'scorecard' (which contains 'scores')
        
        Returns an action dictionary. For example:
          {"type": "roll"}
          {"type": "toggle_hold", "die_index": 2}
          {"type": "score", "category": "ones"}
        """
        raise NotImplementedError("This method should be overridden by subclasses.")

class RandomAgent(YahtzeeAgent):
    def __init__(self):
        # Track dice indices that have already been toggled in the current turn
        self.toggled_indices = set()
        # Store the number of rolls left from the previous state to detect turn changes
        self.last_rolls_left = None

    def act(self, state):
        """
        A random policy agent that chooses a random valid action based on the state.
        This version avoids offering toggle hold actions for dice that have already
        been toggled in the current turn.
        """
        
         # if game has not started yet, must roll

        game_started = state.get('game_started')

        if not game_started:
            return {'type': 'roll'}

        # if new turn, must also roll
        
        # to detect new turn: if rolls_left increases compared to the previous state OR if current_turn_scored is True, we know it's a new turn
        
        current_rolls_left = state.get('rolls_left')
        current_turn_scored = state.get('current_turn_scored')
        
        if (self.last_rolls_left is not None and current_rolls_left > self.last_rolls_left) or current_turn_scored:
            self.last_rolls_left = current_rolls_left
            return {'type': 'roll'}

        
        valid_actions = []
        current_rolls_left = state.get("rolls_left", 0)

        # Detect new turn: if rolls_left increases compared to the previous state, reset toggled_indices.
        if self.last_rolls_left is not None and current_rolls_left > self.last_rolls_left:
            self.toggled_indices = set()
        self.last_rolls_left = current_rolls_left

        # If there are rolls left, consider a roll action.
        if current_rolls_left > 0:
            valid_actions.append({"type": "roll"})

        # If the game has started and the current turn hasn't been scored,

        #######################################33
        # for testing only, remove:

        X = np.random.binomial(1,3/4)

        if X > 0 and state.get("rolls_left", 0) > 0:
            return {"type": "hold_roll", "held_dice": [0,2,3]}

        #######################################33
        
        # allow toggle hold actions only for dice that haven't been toggled already.
        if state.get("game_started", False) and not state.get("current_turn_scored", True):
            dice = state.get("dice", [])
            for i, die in enumerate(dice):
                if i not in self.toggled_indices:
                    valid_actions.append({"type": "toggle_hold", "die_index": i})

        # Only allow scoring if a roll has been made in the current turn (i.e., rolls_left is less than 3)
        if state.get("game_started", False) and state.get("rolls_left", 3) < 3:
            scorecard = state.get("scorecard", {})
            scores = scorecard.get("scores", {})
            for category, score in scores.items():
                if score is None:
                    valid_actions.append({"type": "score", "category": category})

        # Choose a random action from the valid ones.
        if valid_actions:
            chosen_action = random.choice(valid_actions)
            # If the chosen action is toggling a die, mark that die as toggled in this turn.
            if chosen_action.get("type") == "toggle_hold":
                self.toggled_indices.add(chosen_action["die_index"])
            return chosen_action
        else:
            # Fallback action, if no valid actions are found.
            return {"type": "roll"}


In [11]:

all_combinations = itertools.product(range(1, 7), repeat=6)

In [12]:
def calculate_score(category, dice):
        values = dice
        value_counts = {i: values.count(i) for i in range(1, 7)}
        
        
        if category in ["ones", "twos", "threes", "fours", "fives", "sixes"]:
            number = {"ones": 1, "twos": 2, "threes": 3, "fours": 4, "fives": 5, "sixes": 6}[category]
            
            return sum(v for v in values if v == number)
        
        
        elif category == "pair":
            if max(value_counts.values()) >= 2:
                for i in range(6,0, -1):
                    if value_counts[i] >= 2:
                        return 2*i
            return 0
        # treats four of a kind as 0
        elif category == "two_pair":
            
            pairs = [value for value, count in value_counts.items() if count >= 2]
            if len(pairs) >= 2:
                pairs.sort(reverse=True)
                score = (pairs[0] * 2) + (pairs[1] * 2)
                return score
            return 0
            
            
        elif category == "three_of_a_kind":
            if max(value_counts.values()) >= 3:
                for i in range(6,0,-1):
                    if value_counts[i] >= 3:
                        return 3*i                       
            return 0
            
        elif category == "four_of_a_kind":
            if max(value_counts.values()) >= 4:
                for i in range(6,0,-1):
                    if value_counts[i] >= 4:
                        return 4*i
            return 0
        
        elif category == "full_house":
            if 2 in value_counts.values() and 3 in value_counts.values():
                
                candidates = [(value, count) for value, count in value_counts.items() if count >= 2]
                
                candidates.sort(reverse = True)
                
                if candidates[0][1] >= 3:
                    return 3*candidates[0][0] + 2*candidates[1][0]
                else:
                    return 2*candidates[0][0] + 3*candidates[1][0]
              
            elif 3 in value_counts.values():
                candidates = [(value, count) for value, count in value_counts.items() if count >= 3]
                
                if len(candidates) <= 1:
                    return 0
                
                candidates.sort(reverse = True)
                
                return 3*candidates[0][0] + 2*candidates[1][0]
                
            elif 4 in value_counts.values() and 2 in value_counts.values():
                candidates = [(value, count) for value, count in value_counts.items() if count >= 2]
                
                candidates.sort(reverse = True)
                
                if candidates[0][1] == 2:
                    return 2*candidates[0][0] + 3*candidates[1][0]
                elif candidates[0][1] == 4:
                    return 3*candidates[0][0] + 2*candidates[1][0]                
                
            return 0
            
        
        elif category == "small_straight":
            sorted_values = sorted(list(set(values)))
            if len(sorted_values) >= 5 and sorted_values[0] == 1 and sorted_values[4] == 5:
                return 15
          
            return 0
        
        elif category == "large_straight":
            value_set = set(values)
            for i in range(2,7):
                if i not in value_set:
                    return 0
            return 20
            
        elif category == "yahtzee":
            if max(value_counts.values()) == 6:
                return 100
            return 0
            
        elif category == "chance":
            return sum(values)
            
        return 0


In [13]:

def average_scores():

    
    all_combinations = list(itertools.product(range(1, 7), repeat=6))
    
    scorecard = Scorecard()
    categories = scorecard.scores.keys()
    
    d = {}

    for cat in categories:
        avg = 0
        positive_count = 0
        
        for combination in all_combinations:
            
            score = calculate_score(cat, combination)
            
            if score > 0:
                avg += score
                positive_count += 1
                            
        avg = avg/positive_count
        d[cat] = avg
    return d
        

    
    

In [14]:
average_scores_dict = average_scores()

In [15]:
average_scores_dict

{'ones': 1.503528729335181,
 'twos': 3.007057458670362,
 'threes': 4.510586188005543,
 'fours': 6.014114917340724,
 'fives': 7.5176436466759045,
 'sixes': 9.021172376011085,
 'pair': 8.363854057819575,
 'two_pair': 14.242774566473988,
 'three_of_a_kind': 10.561274509803921,
 'four_of_a_kind': 14.0,
 'full_house': 17.544025157232703,
 'small_straight': 15.0,
 'large_straight': 20.0,
 'yahtzee': 100.0,
 'chance': 21.0}

In [16]:
sum(average_scores_dict.values())

252.28603160736898

In [17]:
class GreedyAgent(YahtzeeAgent):
    def __init__(self, threshold=0.9):
        super().__init__()
        self.threshold = threshold
        # Precomputed average positive scores for each category.
        self.avg_positive_scores = average_scores_dict

        # used to detect new turn

        self.last_rolls_left = None

    def compute_score(self, category, dice):
        dice_list = [die['value'] for die in dice]
        
        score = calculate_score(category, dice_list)
        
        return score
       

    def act(self, state):

        # if game has not started yet, must roll

        game_started = state.get('game_started')

        if not game_started:
            return {'type': 'roll'}

        # if new turn, must also roll
        
        # to detect new turn: if rolls_left increases compared to the previous state OR if current_turn_scored is True, we know it's a new turn
        
        current_rolls_left = state.get('rolls_left')
        current_turn_scored = state.get('current_turn_scored')
        
        if (self.last_rolls_left is not None and current_rolls_left > self.last_rolls_left) or current_turn_scored:
            self.last_rolls_left = current_rolls_left
            return {'type': 'roll'}


        # Extract the current dice and remaining rolls from the state.
        dice = state.get("dice", [])
        rolls_left = state.get("rolls_left", 0)

        true_rolls_left = rolls_left + state.get('saved_rolls', 0)
        
        # Determine which categories haven't been scored yet.
        scored = state.get("scorecard", {}).get("scores", {})
        unscored = [cat for cat, score in scored.items() if score is None]

        best_category = None
        best_score = -1

        # Evaluate each unscored category using the current dice.

        best_cat_above_threshold = None
        best_above_score = -1
        
        for category in unscored:
            potential = self.compute_score(category, dice)
            avg = self.avg_positive_scores.get(category, 0)
            
            # if a positive score is achieved and it meets our threshold, and is better than previous above threshold, save it.
            if potential > best_above_score and avg > 0 and potential >= self.threshold * avg:
                best_cat_above_threshold = category
                best_above_score = potential
                
            # if it's maybe not above the threshold but better than the best previous score, we also save it
            if potential > best_score:
                best_score = potential
                best_category = category

        if best_cat_above_threshold:
            return {'type': 'score', 'category': best_cat_above_threshold}

        # If no category reaches the threshold and there are rolls remaining, roll again.
        
        if true_rolls_left > 0:
            return {"type": "roll"}
        else:
            # Out of rolls: score the best available category.
            return {"type": "score", "category": best_category}


In [18]:
test_agent.act(test_env.get_state()) 

{'type': 'score', 'category': 'twos'}

In [19]:
test_agent.toggled_indices

set()

In [20]:
def pretty_print_state(state):
    """
    Prints a human-friendly summary of the current game state.
    """
    print("=== Yahtzee Game State ===")
    # Dice
    dice = state.get("dice", [])
    dice_str = ", ".join([f"{die['value']}{' (held)' if die['held'] else ''}" for die in dice])
    print("Dice:", dice_str)
    
    # Rolls and saved rolls
    print("Rolls left:", state.get("rolls_left", 0))
    print("Saved rolls:", state.get("saved_rolls", 0))
    
    # Scorecard details
    scorecard = state.get("scorecard", {})
    scores = scorecard.get("scores", {})
    print("Scorecard:")
    for category, score in scores.items():
        status = score if score is not None else "Not scored"
        print(f"  {category}: {status}")
    print("Total Score:", scorecard.get("total", 0))
    print("===========================")

In [21]:
# simulation.py

# simulation.py

from datetime import datetime
# from game_env import YahtzeeEnv
# from agent import RandomAgent

class Simulation:
    def __init__(self, env=None, agent=None):
        """
        Initialize the simulation with a given environment and agent.
        If none are provided, defaults to YahtzeeEnv and RandomAgent.
        """
        self.env = env or YahtzeeEnv()
        self.agent = agent or RandomAgent()
        self.state_log = []  # To store game states at each step.
        self.event_log = []  # In-memory event log.
        self.game_id = 0     # Using a constant game ID for simplicity.

    def log_event(self, event_type, data):
        """Records an event in the in-memory event log."""
        event = {
            "timestamp": datetime.utcnow().isoformat(),
            "game_id": self.game_id,
            "event_type": event_type,
            "data": data
        }
        self.event_log.append(event)

    def run_episode(self, render=False):
        """
        Runs a single episode until the game is over.

        Parameters:
            render (bool): If True, prints a post-game summary at the end.
        
        Returns:
            total_reward (int): The cumulative reward achieved in the episode.
            trajectory (list): A list of tuples representing (state, action, reward, next_state, done, info).
        """
        state = self.env.reset()
        self.state_log = [state]
        self.event_log = []  # Clear the event log for the new game.

        # Log the start of a new game.
        self.log_event("new_game", {"game_state": state})

        done = False
        total_reward = 0
        trajectory = []

        while not done:
            action = self.agent.act(state)

            if action.get("type") == "score" and not state.get("game_started", False):  
                raise RuntimeError("Score action attempted before any roll. This is not allowed.")
                

            next_state, reward, done, info = self.env.step(action)
            
            if action["type"] == "roll":
                dice = next_state.get("dice", [])
                dice_values = [die["value"] for die in dice]
                self.log_event("roll", {"dice": dice, "dice_values": dice_values})

            elif action["type"] == "hold_roll":
                dice = next_state.get("dice", [])
                dice_values = [die["value"] for die in dice]
                held_dice_indices = action.get("held_dice") # in agent, say what "hold_roll" does
                
                self.log_event("hold_roll", {"dice": dice, "dice_values":dice_values, "held_dice": held_dice_indices} )
                
            elif action["type"] == "score":
                self.log_event("score", {"category": action.get("category"), "game_state": next_state})


            

            if "error" in info:
                print("GAME SUMMARY:")
                self.render_summary()
                raise RuntimeError(f"Simulation error: {info['error']}")
                
                

            
            trajectory.append((state, action, reward, next_state, done, info))
            total_reward += reward
            state = next_state
            self.state_log.append(state)

        # Log game over event.
        final_score = state.get("scorecard", {}).get("total", 0)
        self.log_event("game_over", {"final_score": final_score, "game_state": state})

        if render:
            self.render_summary()

        return total_reward, trajectory

    def get_state_log(self):
        """Returns the list of game states recorded at each step."""
        return self.state_log

    def render_summary(self):
        """
        Prints a post-game summary similar to your pretty_print_game_log, but based on
        the in-memory event log.
        """
        # Group events into turns. Each turn is a series of roll events followed by a score event.
        turns = []
        current_turn = []

        for event in self.event_log:
            if event["event_type"] == "roll":
                # Record roll event: display the dice values.
                current_turn.append({
                    "type": "roll",
                    "dice": [d["value"] for d in event["data"].get("dice", [])]
                })

            elif event["event_type"] == "hold_roll":
                # record roll event with held dice: display dice values and which dice were held
                current_turn.append({
                    "type": "hold_roll",
                    "dice": [d["value"] for d in event["data"].get("dice", [])],
                    "held_dice": event["data"].get("held_dice")                
                })
            
            elif event["event_type"] == "score":
                current_turn.append({
                    "type": "score",
                    "category": event["data"].get("category"),
                    "scorecard": event["data"].get("game_state", {}).get("scorecard", {})
                })
                turns.append(current_turn)
                current_turn = []

        # Retrieve start and end event info.
        new_game_event = next((e for e in self.event_log if e["event_type"] == "new_game"), None)
        game_over_event = next((e for e in self.event_log if e["event_type"] == "game_over"), None)
        start_time = new_game_event["timestamp"] if new_game_event else "unknown"
        end_time = game_over_event["timestamp"] if game_over_event else "unknown"
        final_score = game_over_event["data"].get("final_score") if game_over_event else "?"

        print(f"\n🎲 Game ID: {self.game_id}")
        print(f"🕒 Started at: {start_time}\n")
        for i, turn in enumerate(turns, 1):
            print(f"Turn {i}")
            for action in turn:
                if action["type"] == "roll":
                    print(f"  - Rolled: {action['dice']}")
                elif action["type"] == "hold_roll":
                    print_roll(action['dice'], held_indices = action['held_dice'])
                    
                elif action["type"] == "score":
                    cat = action["category"]
                    score = action["scorecard"].get("scores", {}).get(cat, "N/A")
                    print(f"  - Scored: {cat} → {score} pts")
            print()
        print(f"🏁 Game Over at {end_time}")
        print(f"🔢 Final Score: {final_score}")
        print("=" * 40)



In [22]:
def print_roll(dice, held_indices=None):
    if held_indices is None:
        held_indices = []
    
    formatted_dice = []
    for i, die in enumerate(dice):
        if i in held_indices:
            # Add a symbol like * after held dice
            formatted_dice.append(f"{die}*")
        else:
            formatted_dice.append(str(die))
    
    print(f"  - Rolled: [{', '.join(formatted_dice)}]")

In [None]:
l = [3,5,7,7,8]
l[3] = 5

l

In [23]:
sim = Simulation()

In [49]:
# agent = GreedyAgent()
# agent = RandomAgent()
agent = OptimalAgent()

sim = Simulation(agent = agent)
total_reward, trajectory = sim.run_episode(render=True)




🎲 Game ID: 0
🕒 Started at: 2025-09-19T05:22:28.004404

Turn 1
  - Rolled: [5, 1, 3, 4, 2, 6]
  - Scored: chance → 21 pts

Turn 2
  - Rolled: [5, 3, 5, 4, 3, 3]
  - Scored: full_house → 19 pts

Turn 3
  - Rolled: [6, 1, 2, 4, 2, 1]
  - Scored: twos → 4 pts

Turn 4
  - Rolled: [3, 3, 1, 4, 6, 3]
  - Scored: threes → 9 pts

Turn 5
  - Rolled: [6, 1, 6, 2, 6, 3]
  - Scored: sixes → 18 pts

Turn 6
  - Rolled: [2, 3, 3, 2, 6, 1]
  - Rolled: [6, 4, 3, 2, 6, 2]
  - Scored: two_pair → 16 pts

Turn 7
  - Rolled: [2, 3, 2, 1, 2, 6]
  - Rolled: [2, 6, 5, 6, 5, 1]
  - Scored: pair → 12 pts

Turn 8
  - Rolled: [3, 2, 3, 6, 4, 1]
  - Rolled: [2, 2, 4, 1, 6, 5]
  - Rolled: [5, 2, 4, 3, 1, 2]
  - Scored: small_straight → 15 pts

Turn 9
  - Rolled: [4, 5, 5, 6, 6, 1]
  - Scored: fives → 10 pts

Turn 10
  - Rolled: [6, 1, 6, 5, 3, 5]
  - Rolled: [4, 3, 6, 5, 5, 6]
  - Rolled: [6, 4, 5, 3, 3, 2]
  - Scored: large_straight → 20 pts

Turn 11
  - Rolled: [3, 4, 2, 4, 1, 2]
  - Scored: fours → 8 pts

Turn 12

  "timestamp": datetime.utcnow().isoformat(),


In [60]:
iterations = 10000

avg = 0
max_rew = 0
best_traj = None

for i in range(iterations):
    agent = OptimalAgent()
    sim = Simulation(agent = agent)

    total_reward, trajectory = sim.run_episode(render = False)

    if total_reward > max_rew:
        max_rew = total_reward
        best_traj = trajectory
        
    avg += total_reward

avg = avg/iterations

print('average reward: ', avg)
print('maximum reward ', max_rew)

    

  "timestamp": datetime.utcnow().isoformat(),


average reward:  207.9774
maximum reward  324


In [63]:
trajectory

[({'dice': [{'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False},
    {'value': 1, 'held': False}],
   'rolls_left': 3,
   'saved_rolls': 0,
   'scorecard': {'scores': {'ones': 2,
     'twos': 4,
     'threes': 6,
     'fours': 8,
     'fives': 10,
     'sixes': 12,
     'pair': 10,
     'two_pair': 16,
     'three_of_a_kind': 12,
     'four_of_a_kind': 16,
     'full_house': 16,
     'small_straight': 15,
     'large_straight': 20,
     'yahtzee': 100,
     'chance': 29},
    'total': 0,
    'upper_section_total': 0,
    'upper_bonus': False,
    'upper_bonus_threshold': 73},
   'current_turn_scored': False,
   'game_started': False},
  {'type': 'roll'},
  0,
  {'dice': [{'value': 6, 'held': False},
    {'value': 5, 'held': False},
    {'value': 4, 'held': False},
    {'value': 6, 'held': False},
    {'value': 2, 'held': False},
    {'value': 6, 'held': False}],
   'rolls_left': 

In [40]:
class OptimalAgent(YahtzeeAgent):
    def __init__(self):
        super().__init__()
        
        # Precomputed average positive scores for each category.
        # self.avg_positive_scores = average_scores_dict

        # used to detect new turn

        self.last_rolls_left = None

        self.greedy_helper = GreedyAgent()

    def compute_score(self, category, dice):
        dice_list = [die['value'] for die in dice]
        
        score = calculate_score(category, dice_list)
        
        return score
       

    def act(self, state):

        # if game has not started yet, must roll

        game_started = state.get('game_started')

        if not game_started:
            return {'type': 'roll'}

        # if new turn, must also roll
        
        # to detect new turn: if rolls_left increases compared to the previous state OR if current_turn_scored is True, we know it's a new turn
        
        current_rolls_left = state.get('rolls_left')
        current_turn_scored = state.get('current_turn_scored')
        
        if (self.last_rolls_left is not None and current_rolls_left > self.last_rolls_left) or current_turn_scored:
            self.last_rolls_left = current_rolls_left
            return {'type': 'roll'}


        # Extract the current dice and remaining rolls from the state.
        dice = state.get("dice", [])
        
        rolls_left = state.get("rolls_left", 0)

        true_rolls_left = rolls_left + state.get('saved_rolls', 0)
        
        # Determine which categories haven't been scored yet.
        scored = state.get("scorecard", {}).get("scores", {})
        unscored = [cat for cat, score in scored.items() if score is None]
        if len(unscored) != 1:
            return self.greedy_helper.act(state)
        
        elif len(unscored) == 1:
            last_unscored_cat = unscored[0]

            # if no rolls are left, must score last unscored category
            if true_rolls_left == 0:
                return {'type': 'score', 'category': last_unscored_cat}

            if last_unscored_cat == "yahtzee":

                # find the number that occurs the most among the current dice, will attempt yahtzee with that number (if tie, choose largest such number)
                dice_list = [ die['value'] for die in dice]
                max_count = 0
                max_occurring = -1
                
                for i in range(1,7):
                    value_count = dice_list.count(i)
                    if value_count >= max_count:
                        max_occurring = i
                        max_count = value_count
                target = max_occurring
                target_count = max_count

                if target_count == 6:
                    return {'type': 'score', 'category': 'yahtzee'}

                else:
                    held_dice = []
                    for i in range(6):
                        if dice[i]['value'] == target:
                            held_dice.append(i)
                    if len(held_dice) == 0:
                        return "prr prr patapin"

                    return {"type": "hold_roll", "held_dice": held_dice}

                print("something weird happened")
                return
                
            elif last_unscored_cat != "yahtzee":
                return self.greedy_helper.act(state)
                
                    

                    
                    
                    

            
                

                
            

        
        

        



In [None]:
class MCTSAgent(YahtzeeAgent):
    def __init__(self):
        super().__init__()
        
        # Precomputed average positive scores for each category.
        # self.avg_positive_scores = average_scores_dict

        # used to detect new turn

        self.last_rolls_left = None

        self.greedy_helper = GreedyAgent()

    def compute_score(self, category, dice):
        dice_list = [die['value'] for die in dice]
        
        score = calculate_score(category, dice_list)
        
        return score

    def find_unscored(self, state):
        # Determine which categories haven't been scored yet.
        scored = state.get("scorecard", {}).get("scores", {})
        unscored = [cat for cat, score in scored.items() if score is None]

    def find_best_unscored_category(self, state, unscored, dice):
        cur_max = 0
        cur_best_cat = None
        for category in unscored:
            potential = self.compute_score(category, dice)
            if potential > cur_max:
                cur_max = potential
                cur_best_cat = category
        return cur_best_cat

    def act(self, state):

        # if game has not started yet, must roll

        game_started = state.get('game_started')

        if not game_started:
            return {'type': 'roll'}

        # if new turn, must also roll
        
        # to detect new turn: if rolls_left increases compared to the previous state OR if current_turn_scored is True, we know it's a new turn
        
        current_rolls_left = state.get('rolls_left')
        current_turn_scored = state.get('current_turn_scored')
        
        if (self.last_rolls_left is not None and current_rolls_left > self.last_rolls_left) or current_turn_scored:
            self.last_rolls_left = current_rolls_left
            return {'type': 'roll'}


        # Extract the current dice and remaining rolls from the state.
        dice = state.get("dice", [])
        
        rolls_left = state.get("rolls_left", 0)

        true_rolls_left = rolls_left + state.get('saved_rolls', 0)
        
        unscored = self.find_unscored(state)
        
        if len(unscored) != 1:
            return self.greedy_helper.act(state)
        
        elif len(unscored) == 1:
            last_unscored_cat = unscored[0]

            # if no rolls are left, must score last unscored category
            if true_rolls_left == 0:
                return {'type': 'score', 'category': last_unscored_cat}

            if last_unscored_cat == "yahtzee":

                # find the number that occurs the most among the current dice, will attempt yahtzee with that number (if tie, choose largest such number)
                dice_list = [ die['value'] for die in dice]
                max_count = 0
                max_occurring = -1
                
                for i in range(1,7):
                    value_count = dice_list.count(i)
                    if value_count >= max_count:
                        max_occurring = i
                        max_count = value_count
                target = max_occurring
                target_count = max_count

                if target_count == 6:
                    return {'type': 'score', 'category': 'yahtzee'}

                else:
                    held_dice = []
                    for i in range(6):
                        if dice[i]['value'] == target:
                            held_dice.append(i)
                    if len(held_dice) == 0:
                        return "prr prr patapin"

                    return {"type": "hold_roll", "held_dice": held_dice}

                print("something weird happened")
                return
                
            elif last_unscored_cat != "yahtzee":
                return self.greedy_helper.act(state)
                
                    

In [58]:
tuple(sorted(["asdng", "aioptp", "alskt"]))

('aioptp', 'alskt', 'asdng')

In [None]:
opt_play = {}

In [None]:
def fetch_best(dice, true_rolls_left, unscored):
    turns_left = len(unscored)
    unscored_tuple = tuple(sorted(unscored))

    dice_values = [die["value"] for die in dice]
    indexed_dice_values = [d, i for i, d in enumerate(dice_values)]

    indexed_dice_values.sort(key = lambda x: x[0])
    sorted_dice_values = [d for d,i in indexed_dice_values]

    tuple_dice_values = tuple(sorted_dice_values)

    if turns_left in opt_play:
        if unscored_tuple in opt_play[turns_left]:
            if true_rolls_left in opt_play[turns_left][unscored_tuple]:
                if tuple_dice_values in opt_play[turns_left][unscored_tuple][true_rolls_left]:
                    
                    best_action = opt_play[turns_left][unscored_tuple][true_rolls_left][tuple_dice_values]['action']
                    return best_action
    return 

In [None]:
def fill_optimal(dice, true_rolls_left, unscored):
    turns_left = len(unscored)
    
    dice_values = [die["value"] for die in dice]
    indexed_dice_values = [d, i for i, d in enumerate(dice_values)]

    indexed_dice_values.sort(key = lambda x: x[0])
    sorted_dice_values = [d for d,i in indexed_dice_values]

    tuple_dice_values = tuple(sorted_dice_values)
    
    if fetch_best(dice, true_rolls_left, unscored):
        return
    elif turns_left == 1:
        cat = unscored[0]

        if true_rolls_left == 0:
            action = {'type': 'score', 'category': cat}
            expected_score = calculate_score(cat, dice)

            opt_play[1][(cat,)][true_rolls_left][tuple_dice_values] = {'action': action, 'expected_score': expected_score}
            return 

        elif true_rolls_left > 0:
            points_if_score = calculate_score(cat, dice)

            cur_max = points_if_score
            cur_best_action = {'type': 'score', 'category': cat}

            for choice_of_held_dice in blah:

                expected_score = "average of all possible expected scores after holding those dice"

                if expected_score > cur_max:
                    cur_max = max(cur_max, expected_score)
                    cur_best_action = {'type':'hold_roll', 'held_dice': indices_of_held_dice}

            opt_play[1][(cat,)][true_rolls_left][tuple_dice_values] = {'action': cur_best_action, 'expected_score': cur_max}
            return
            

    elif turns_left > 1:
        raise NotImplementedError()

        
    

In [64]:
from collections import Counter

def get_best_possible_cat_score(dice, true_rolls_left, cat):
    cur_score = calculate_score(cat, dice)
    
    action = None
    
    if cur_score == ideal_scores[cat]:
        action = {"type": "score", "category": cat}
        return action

    else:
        held_dice = []
        if cat in ['ones', 'twos', 'threes', 'fours', 'fives', 'sixes']:
            n_dict = dict(zip(['ones', 'twos', 'threes', 'fours', 'fives', 'sixes'], list(range(1,7))))

            number = n_dict[cat]

            for i, die in enumerate(dice):
                if die["value"] == number:
                    held_dice.append(i)

            action = {'type': 'hold_roll', 'held_dice': held_dice}

        elif cat == 'small_straight' or cat == 'large_straight':
            if cat == 'small_straight':
                low = 1
                high = 5
            elif cat == 'large_straight':
                low = 2
                high = 6
                
            for num in range(low, high + 1):
                for i,die in enumerate(dice):
                    if die['value'] == num:
                        held_dice.append(i)
                        
                        break
            action = {'type': 'hold_roll', 'held_dice': held_dice}

        elif cat in ['three_of_a_kind', 'four_of_a_kind']:
            if cat == 'three_of_a_kind':
                n = 3
            elif cat == 'four_of_a_kind':
                n = 4

            

    return action
                    
    
    

In [74]:
dp_3_of_a_kind = {}

# 

In [None]:
def dp_3():
    

In [82]:
def tuple_dice(dice):
    dice_values = [die["value"] for die in dice]
    indexed_dice_values = [(d, i) for i, d in enumerate(dice_values)]

    indexed_dice_values.sort(key = lambda x: x[0])
    sorted_dice_values = [d for d,i in indexed_dice_values]

    tuple_dice_values = tuple(sorted_dice_values)

    return tuple_dice_values

In [72]:
l = [3,4,5,6,7,2,5,4,2,6,7,7,7,7]

counter = Counter(l)

counter.most_common(1)

[(7, 5)]

In [73]:
ideal_scores = {'ones': 6.0,
 'twos': 12.0,
 'threes': 18.0,
 'fours': 24.0,
 'fives': 30.0,
 'sixes': 36.0,
 'pair': 12.0,
 'two_pair': 22.0,
 'three_of_a_kind': 18.0,
 'four_of_a_kind': 24.0,
 'full_house': 28.0,
 'small_straight': 15.0,
 'large_straight': 20.0,
 'yahtzee': 100.0,
 'chance': 36.0}

In [None]:
sim.state_log

In [None]:
len(trajectory)

In [None]:

for j in range(len(trajectory)):
    
    for i in range(len(trajectory[j])):
        print(trajectory[j][i])
        print("-----")
    print("___________________________________")
    print(" ")

In [None]:
state, action, reward, next_state, done, info