# CE811 Assignment 2

# Can Stacked Genetic Algorithms Following Stacked Rules Stack Cards Following Stacking Rules?

In this notebook, I shall attempt to compare the performance of a variety of genetic algorithm
chromosome types for controlling the behaviour of an agent playing *Hanabi* with 3 copies of itself,
with the aim of seeing which variety of chromosome controller is the most effective,
and the aim of seeing what

In [3]:
# But first, a word from our sponsors.
# And by that, I mean 'heres a bunch of imports have fun'

import matplotlib.pyplot as plt
from matplotlib.pyplot import Figure, Axes
import numpy as np
import math
from typing import TypedDict, Literal, Set, Union, List, TypeVar, Tuple, Callable, Iterable, Any, Dict, Iterator, Type, Optional
from enum import Enum, auto, IntFlag
import hanabi_learning_environment
from hanabi_learning_environment import rl_env
from hanabi_learning_environment.rl_env import Agent
import json
import scipy as sp
import abc
import functools


In [11]:
# And now time for another word from our other sponsors.
# By which I mean 'here are a bunch of type annotations for the sake
# of making it easier for me to use the hanabi-learning-environment'

# Yes, I know, I spelt 'Colour' as 'Color' here.
# This is mostly because the Hanabi Learning Environment
# contains the word 'color' quite a bit, and I figured that
# consistent spelling of that within the code would save
# a few headaches later on.

Color: Type = Literal["B", "G", "R", "W", "Y"]
CardColor: Type = Optional[Color]
Rank: Type = Literal[0, 1, 2, 3, 4]
CardRank: Type = Literal[-1, Rank]

ActionPD: Type = Literal["PLAY", "DISCARD"]
ActionColor: Type = Literal["REVEAL_COLOR"]
ActionRank: Type = Literal["REVEAL_RANK"]

class BaseActionDict(TypedDict):
    pass

class ActionPDDict(BaseActionDict):
    action_type: ActionPD
    card_index: int

class BaseActionRevealDict(BaseActionDict):
    target_offset: int

class ActionColorDict(BaseActionRevealDict):
    action_type: ActionColor
    color: Color

class ActionRankDict(BaseActionRevealDict):
    action_type: ActionRank
    rank: Rank

ActionDict = Union[ActionPDDict, ActionColorDict, ActionRankDict]
Action = Union[ActionDict, int]
ActionType = Literal[ActionPD, ActionColor, ActionRank]

class HandCard(TypedDict):
    color: CardColor
    rank: CardRank

OwnHand = List[HandCard]

class KnownCard(TypedDict):
    color: Color
    rank: Rank

KnownHand = List[KnownCard]

Card = Union[HandCard, KnownCard]

TCard = TypeVar("TCard", bound=Card)

class FireworksDict(TypedDict):
    B: int
    G: int
    R: int
    W: int
    Y: int

class ObservationDict(TypedDict):
    current_player: int
    current_player_offset: int
    deck_size: int
    discard_pile: List[KnownCard]
    fireworks: FireworksDict
    information_tokens: int
    legal_moves: List[ActionDict]
    life_tokens: int
    card_knowledge: List[OwnHand]
    observed_hands: List[Union[OwnHand, KnownHand]]
    num_players: int
    vectorized: List[Literal[0, 1]]
    
    

class OtherPlayerInfo(TypedDict):
    playable: List[KnownCard]
    useless: List[KnownCard]
    unknown_ranks: List[KnownCard]
    unknown_colors: List[KnownCard]

<class 'typing._LiteralGenericAlias'>
<class 'typing._LiteralGenericAlias'>


As mentioned earlier, the aim for this is to find out which
approach to playing Hanabi produces an optimal agent for
playing Hanabi.

And then, this optimal Hanabi agent will have to be tested based
on a set Hanabi agent evaluation environment.

The evaluation environment for the final agent involves
playing a game with 4 players, a hand size of 4,
5 suits of cards with values 0-4 (effectively 1-5 rank but
in an indexable range), and an overall deck size of 50.

Therefore, this agent shall be set up to be able to
comprehend and play this variety of Hanabi.


Here is an enum for the rules that the agent will follow,
along with a class that attempts to play hanabi,
following aforementioned rules,
given as a list of the rule enum items.




In [15]:
class RuleFilter(IntFlag):
    allow_nothing = 0
    "A special value to indicate that nothing at all should be allowed."
    idc = auto()
    "Used to indicate 'I don't care'"
    TELL = auto()
    DISCARD = auto()
    MY_FULL_INFO = auto()
    OTHER_HAS_UNKNOWN_PLAYABLE_RANK = auto()
    OTHER_HAS_UNKNOWN_PLAYABLE_COLOR = auto()
    OTHER_HAS_UNKNOWN_USELESS_RANK = auto()
    OTHER_HAS_UNKNOWN_USELESS_COLOR = auto()

    @classmethod
    @property
    def OTHER_HAS_UNKNOWN_PLAYABLE(cls) -> "RuleFilter":
        return cls.OTHER_HAS_UNKNOWN_PLAYABLE_RANK | cls.OTHER_HAS_UNKNOWN_PLAYABLE_COLOR

    @classmethod
    @property
    def OTHER_HAS_UNKNOWN_USELESS(cls) -> "RuleFilter":
        return cls.OTHER_HAS_UNKNOWN_USELESS_RANK | cls.OTHER_HAS_UNKNOWN_USELESS_COLOR




class RulesEnum(Enum):
    def __new__(
            cls,
            arity: int,
            filter: Optional[RuleFilter] = None
    ) -> "RulesEnum":
        """
        Attempts to create the individual members of this enum.
        :param arity: How many trainable arguments does this rule have?
        :param filter: Are there any special conditions that need
        to be met for this rule to be applied? If None or idc, it's always applicable.
        """
        val: int = len(cls.__members__)
        r: "RulesEnum" = object.__new__(cls)
        r._value_ = val
        r._arity = arity
        # ensures that, no matter what, every rule satisfies the 'idc' filter.
        if filter is None:
            r._filter = RuleFilter.idc
        else:
            r._filter = filter | RuleFilter.idc
        return r

    PLAY_HIGHEST_KNOWN_PLAYABLE = (0, RuleFilter.MY_FULL_INFO)
    "Play the highest card which is known to be playable"
    PLAY_LOWEST_KNOWN_PLAYABLE = (0, RuleFilter.MY_FULL_INFO)
    "Play the lowest card which is known to be playable"
    PLAY_MOST_PLAYABLE_THRESH_9998 = 0
    "Play most playable card as long as it's 99.98% playable"
    PLAY_MOST_PLAYABLE_THRESH_90 = 0
    "Play most playable card as long as it's 90% playable"
    PLAY_MOST_PLAYABLE_THRESH_80 = 0
    "Play most playable card as long as it's 80% playable"
    PLAY_MOST_PLAYABLE_THRESH_A = 1
    "Plays most playable card, as long as its playability is above a TRAINABLE threshold"
    PLAY_MOST_PLAYABLE_THRESH_B = 1
    "Plays most playable card, as long as its playability is above a(nother) TRAINABLE threshold"
    PLAY_MOST_PLAYABLE_DECK_EMPTY = 0
    "Plays most playable card anyway when the deck is empty"

    PLAY_MOST_PLAYABLE_LAST_HINTED_CARD_75 = 0
    """Plays the oldest card, which we are least 75% sure is playable,
    that received a hint since our last turn."""
    PLAY_OLDEST_PREVIOUSLY_UNKNOWN_LAST_HINTED_CARD_75 = 0
    """Plays the oldest card, which are are at least 75% sure is playable,
    that we previously knew nothing about last turn."""

    PLAY_MOST_PLAYABLE_LAST_HINTED_CARD_T = 1
    """Plays the oldest card, based on a playability threshold of T,
    that received a hint since our last turn."""
    PLAY_OLDEST_PREVIOUSLY_UNKNOWN_LAST_HINTED_CARD_T = 1
    """Plays the oldest card, based on a playability threshold of T,
    that we previously knew nothing about last turn."""

    TELL_ANY_FULL_INFO_PLAYABLE_CARD = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_PLAYABLE)
    """Attempts to tell the next possible player who has a playable card,
    and half of the information about that card, the rest of the information about that card."""
    TELL_NEXT_FULL_INFO_PLAYABLE_CARD = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_PLAYABLE)
    """Attempts to tell ONLY the next player the remaining unknown info about their card"""

    TELL_ANY_ABOUT_ONES = (0, RuleFilter.TELL)
    """Attempts to tell the next player who has ones that they aren't aware of about the ones they have"""
    TELL_ANY_ABOUT_KNOWN_ONES_COLOR = (0, RuleFilter.TELL)
    """Attempts to tell the next player who has ones that they know of,
    but don't know the colour, of what colour those ones are"""
    TELL_ANY_ABOUT_FIVES = (0, RuleFilter.TELL)
    """Attempts to tell the next player who has fives that they don't know are fives about them
    being fives"""
    TELL_ANY_ABOUT_KNOWN_FIVES_COLOR = (0, RuleFilter.TELL)
    """Tells the next player, who knows they have fives but not what colour those fives are,
    about what colour those fives are."""

    TELL_ANY_PLAYABLE_CARD_RANK = (0, RuleFilter.TELL)
    """Tells the next player who has a playable card,
    that they don't know the rank of, what the rank of that card is"""
    TELL_ANY_PLAYABLE_CARD_COLOR = (0, RuleFilter.TELL)
    """Tells the next player who has a playable card,
    that they don't know the colour of, what the colour of that card is"""
    TELL_ANY_PLAYABLE_CARD_RANDOM = (0, RuleFilter.TELL)
    """If player has a playable card they know nothing about,
    it picks randomly (50%) whether to tell them about the colour or rank of it.
    If one attribute about that card is known, it will just tell the other attribute of it."""
    TELL_ANY_PLAYABLE_CARD_THRESH_A = (1, RuleFilter.TELL)
    """Like 'Tell any playable card random', but has a TRAINABLE
    weight for the RNG thing to pick between telling rank or colour"""
    TELL_ANY_PLAYABLE_CARD_THRESH_B = (1, RuleFilter.TELL)
    """Ditto, but has a secondary TRAINABLE threshold"""

    TELL_HIGHEST_PLAYABLE_CARD_RANK = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_PLAYABLE)
    """Tell the player who has the playable card of the highest rank what the rank of that card is"""


    TELL_ANY_USELESS_CARD_RANDOM = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_USELESS)
    """Tells next player with a useless card, that they don't know stuff about,
    a random fact they don't already know about it (rank or colour)"""
    TELL_ANY_USELESS_CARD_COLOR = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_USELESS)
    "Tells next player with a useless card that they don't know the colour of about its colour"
    TELL_ANY_USELESS_CARD_RANK = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_USELESS)
    "Tells next player with a useless card that they don't know the rank of about it being useless"
    TELL_ANY_USELESS_CARD_BELOW_MIN_PLAYED_RANK = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_USELESS)
    """Tells next player with a useless card that has a rank below the current rank for
    every colour what the rank of that useless card is."""
    TELL_ANY_USELESS_CARD_HALF_INFO = (0, RuleFilter.TELL | RuleFilter.OTHER_HAS_UNKNOWN_USELESS)
    """Tells next player, who knows half of the info about their useless card,
    the other half of the info about their useless card"""

    TELL_NEXT_MOST_COMMON_RANK = (0, RuleFilter.TELL)
    """Attempts to tell the next player about their most common unknown rank
    (don't do anything if no most common unknown rank is present)"""
    TELL_NEXT_MOST_COMMON_COLOR = (0, RuleFilter.TELL)
    """Attempts to tell next player about their most common unknown colour
    (Don't do anything if there's no most common unknown colour)"""

    TELL_NEXT_OLDEST_CARD = (0, RuleFilter.TELL)
    """Attempts to tell next player about their oldest unknown card"""

    DISCARD_KNOWN_USELESS_CARD = (0, RuleFilter.DISCARD | RuleFilter.MY_FULL_INFO)
    """Discard a card that we know is useless"""
    DISCARD_KNOWN_REDUNDANT_CARD = (0, RuleFilter.DISCARD | RuleFilter.MY_FULL_INFO)
    """Discard a card that isn't strictly needed in order to win"""

    DISCARD_MOST_DISPOSABLE_CARD_9998 = (0, RuleFilter.DISCARD)
    """Discard a card that we're 99.98% sure is useless or redundant"""
    DISCARD_MOST_DISPOSABLE_CARD_95 = (0, RuleFilter.DISCARD)
    "Discard a card that we're 95% sure is useless or redundant"
    DISCARD_MOST_DISPOSABLE_CARD_90 = (0, RuleFilter.DISCARD)
    "Discard a card that we're 90% sure is useless or redundant"
    DISCARD_MOST_DISPOSABLE_CARD_80 = (0, RuleFilter.DISCARD)
    "Discard a card that we're 80% sure is useless or redundant"
    DISCARD_MOST_DISPOSABLE_CARD_THRESH_A = (1, RuleFilter.DISCARD)
    """Uses a TRAINABLE threshold to dictate how useless/redundant
    a card needs to be to be considered discardable by this rule"""
    DISCARD_MOST_DISPOSABLE_CARD_THRESH_B = (1, RuleFilter.DISCARD)
    "ditto, but uses a different threshold"

    DISCARD_OLDEST_CARD = (0, RuleFilter.DISCARD)
    "Discards the oldest card (lowest index)"
    DISCARD_OLDEST_UNKNOWN_CARD = (0, RuleFilter.DISCARD)
    "Discards the oldest card (lowest index) we know nothing about."
    DISCARD_LAST_HINTED_CARD_75 = (0, RuleFilter.DISCARD)
    """Discard the least playable card, with a disposability of at least 75%,
    that we recieved a hint about last turn"""
    DISCARD_PREVIOUSLY_UNKNOWN_LAST_HINTED_CARD_75 = (0, RuleFilter.DISCARD)
    """Discard the least playable card, with a disposability of at least 75%,
    that we previously didn't know anything about last turn."""
    DISCARD_LAST_HINTED_CARD_T = (1, RuleFilter.DISCARD)
    """Discard the least playable card, with a disposability from a trainable threshold
    that we recieved a hint about last turn"""
    DISCARD_PREVIOUSLY_UNKNOWN_LAST_HINTED_CARD_T = (1, RuleFilter.DISCARD)
    """Discard the least playable card, with a disposability from a trainable threshold
    that we previously didn't know anything about last turn."""


    @property
    def arity(self) -> int:
        "How many arguments does this rule have?"
        return self._arity

    @property
    def filter(self) -> RuleFilter:
        "Find the filter conditions for this rule."
        return self._filter

    def apply_filter(self, f: RuleFilter) -> bool:
        """
        Sees if this rule passes the given filter. This is here so I can avoid wasting time on
        checking rules that don't have
        :param f:
        :return:
        """
        return bool(f & self._filter)

    @classmethod
    def filtered(
            cls,
            members: Optional[List["RulesEnum"]] = None,
            filter_by: Optional[RuleFilter] = None
    ) -> List["RulesEnum"]:
        """
        Attempts to filter a list of RulesEnum members, only returning a list of those members who
        satisfy the given condition
        :param members: the list of RulesEnum members to filter. If None,
        will obtain a view of all of the RulesEnums, and will filter based on that..
        :param filter_by: the condition that must be satisfied for the filtering.
        If not given, or if RuleFilter.idc is given, no filtering will happen.
        :return: A copy of the given list, filtered according to the given filter.
        """
        if members is None:
            members = [*RulesEnum.__members__.values()]
        if (filter_by is None) or (filter_by == RuleFilter.idc):
            return members[:]
        return [m for m in members if m.apply_filter(filter_by)]



NameError: name 'TELL_ANY_USELESS_CARD' is not defined

In [12]:


class ChromosomeInfoDict(TypedDict):
    """
    A dictionary of information which will be given to each chromosome when
    they're being asked to provide info to the agent. This may or may not be
    used when a chromosome decides which list of rules to give to the agent.
    """
    info_tokens: int
    lives: int
    fireworks: FireworksDict
    deck_left: int
    player_index: int



class I_AgentChromosome(abc.ABC):

    @abc.abstractmethod
    def get_rules_and_parameters(self, info: ChromosomeInfoDict) -> Tuple[
        List[RulesEnum],
        Dict[
            RulesEnum, Union[int, Tuple[int,...]]
        ]]:
        """
        Given a ChromosomeInfoDict, return the appropriate ordered list of rules,
        along with a dictionary holding the relevant trained parameters for
        the rules that have trained parameters.
        :param info: The info to give to the chromosome, for purposes of choosing
        what ruleset to return
        :return: the appropriate ruleset.
        """
        pass




"""
I'm calling this 'MyAgent' because that'll save me from having to rename this agent when I'm submitting it.
"""


class MyAgent(Agent):

    colors: Tuple[Color] = ('Y', 'B', 'W', 'R', 'G')
    """
    A tuple containing the possible colours.
    """
    ranks: Tuple[Rank] = (0, 1, 2, 3, 4)

    individual_hanabi_cards: Tuple[KnownCard] = tuple(
        {"color": c, "rank": r}
        for c in colors
        for r in [0, 1, 2, 3, 4]
    )
    full_hanabi_deck: Tuple[KnownCard] = tuple(
        {"color": c, "rank": r}
        for c in colors
        for r in [0, 0, 0, 1, 1, 2, 2, 3, 3, 4]
    )

    @staticmethod
    def card_counts() -> Dict[Color, Dict[Rank, int]]:
        return dict(
            (c, dict(
                (r, 3 if r == 0 else 1 if r == 4 else 2)
                for r in MyAgent.ranks)
            ) for c in MyAgent.colors)

    def __init__(self, rules: I_AgentChromosome, config, *args, **kwargs):
        """
        Attepts to initialize this agent.
        :param rules: The I_AgentChromosome providing a view of the
        :param config: The configuration for the game that this agent is playing.
        """
        self.ruler: I_AgentChromosome = rules
        self.config = config
        # Extract max info tokens or set default to 8.
        self.max_information_tokens: int = config.get('information_tokens', 8)

        self._playernum: int = 0
        """May or may not be used for any TOTALLY SYMMETRIC not-entirely-symmetric chromosomes
        determining agent behaviours."""
        self._not_played_yet: bool = True
        """This is true if this agent has not yet had a first turn"""
        self._last_turn_cards: List[HandCard] = [
            {"color": None, "rank": -1} for i in range(4)
        ]
        """A list holding everything we knew about our cards last turn."""

    def reset(self, config):

        self.config = config
        # Extract max info tokens or set default to 8.
        self.max_information_tokens: int = config.get('information_tokens', 8)
        self._playernum = 0
        self._not_played_yet = True
        self._last_turn_cards = [
            {"color": None, "rank": -1} for i in range(4)
        ]

    @staticmethod
    def calculate_all_unseen_cards(
            discard_pile: List[KnownCard],
            player_hands: List[List[KnownCard]],
            fireworks: FireworksDict
    ) -> List[KnownCard]:
        """
        All of the cards which we cannot see are either in our own hand, or in the deck.
        The other cards are either in the discard pile, held by someone else, or we know we have them.
        :param discard_pile:
        :param player_hands:
        :param fireworks:
        :return:
        """
        assert len(MyAgent.full_hanabi_deck)==50 # full hanabi deck size.

        result: List[KnownCard] = list(MyAgent.full_hanabi_deck)
        # subtract off all cards that have been discarded...
        for card in discard_pile:
            if card in result:
                result.remove(card)

        # subtract off all cards that we can see in the other players' hands...
        for hand in player_hands[1:]:
            for card in hand:
                if card in result:
                    result.remove(card)

        for (color, height) in fireworks.items():
            for rank in range(height):
                card: KnownCard = {"color":color, "rank":rank}
                if card in result:
                    result.remove(card)

        # Now we left with only the cards we have never seen before in the game
        # (so these are the cards in the deck UNION our own hand).
        return result

    @staticmethod
    def filter_card_list_by_hint(
            not_impossible_cards: List[KnownCard],
            hinted: Card
    ) -> List[Card]:
        """
        Given a list of cards which are not impossible to get,
        returns only the cards which have a rank/colour matching those in the hint
        :param not_impossible_cards: list of cards that aren't impossible to get
        :param hinted: the card with the info we know about it
        :return: the list of cards that the given 'hinted' card could be.
        """
        filtered_card_list: List[Card] = not_impossible_cards.copy()
        if hinted["color"] is not None:
            tmpc: Color = hinted["color"]
            filtered_card_list = [c for c in filtered_card_list if c["color"] == tmpc]
        if hinted["rank"] is not None:
            tmpr: Rank = hinted["rank"]
            filtered_card_list = [c for c in filtered_card_list if c["rank"] == tmpr]
        return filtered_card_list

    @staticmethod
    def is_card_playable(card: KnownCard, fireworks: FireworksDict) -> bool:
        """
        Checks if a card is immediately playable or not
        :param card: the card we're checking
        :param fireworks: the fireworks dict
        :return: true if the given card can be played.
        """
        return card['rank'] == fireworks[card['color']]

    @classmethod
    def filter_card_list_by_playability(
            cls,
            card_list: List[KnownCard],
            fireworks: FireworksDict
    ) -> List[KnownCard]:
        """
        Attempts to obtain only the playable cards in the given card list.
        :param card_list: list of cards to check for playability
        :param fireworks: the fireworks dictionary (current state of the game)
        :return: true if the
        """
        return [c for c in card_list if cls.is_card_playable(c,fireworks)]

    @classmethod
    def get_unplayables_from_discard_pile(cls, discard_pile: List[KnownCard]) -> Set[KnownCard]:
        """
        Attempts to get all of the cards that are currently unplayable,
        given the state of the discard pile.
        :param discard_pile: The list of all of the cards that have been discarded
        :return: A set containing all of the cards which are known to not be playable.
        This consists both of cards which are 'unplayable' because they've been discarded,
        but also those which are 'unplayable' because the card that needed to be played
        before playing this one can't be played due to all of them being discarded instead.
        """

        undiscarded_counts: Dict[Color,Dict[Rank, int]] = cls.card_counts()

        unplayables: Set[KnownCard] = set()

        for d in discard_pile:
            if d in unplayables:
                continue
            undiscarded_counts[d["color"]][d["rank"]] -= 1
            if undiscarded_counts[d["color"]][d["rank"]] <= 0:
                unplayables.add(d)
                drank: Rank = d["rank"]
                if drank < 4:
                    dcol: Color = d["color"]
                    for i in range(drank, 5):
                        unplayables.add({"color":dcol, "rank":i})

        return unplayables

    @staticmethod
    def filter_card_list_by_unplayable(
            card_list: List[KnownCard],
            fireworks: FireworksDict,
            discard_unplayable: Set[KnownCard]
    )-> List[KnownCard]:
        """
        finds out which cards in card list are always going to be unplayable on its colour's firework,
        due to it being a number that's already been played.
        :param card_list: the card list to filter
        :param fireworks: the current fireworks dictionary
        :param discard_unplayable: the set of cards that we know are unplayable according to the discard pile
        :return: the remaining playable cards
        """
        # noinspection PyTypedDict
        return [c for c in card_list if c not in discard_unplayable and c["rank"] < fireworks[c["color"]]]

    def act(self, observation: ObservationDict) -> Optional[Action]:

        if observation['current_player_offset'] != 0:
            # but only the player with offset 0 is allowed to make an action.
            # The other players are just observing.

            if self._not_played_yet: # also updating the cheeky not-that-symmetric thing
                self._playernum += 1
            return None
        if self._not_played_yet:
            self._not_played_yet = False

        # attempts to filter out which rules can be skipped.
        filter_for_actions: RuleFilter = RuleFilter.allow_nothing
        if observation['information_tokens'] < self.max_information_tokens:
            filter_for_actions |= RuleFilter.DISCARD
        if observation['information_tokens'] > 0:
            filter_for_actions |= RuleFilter.TELL

        fireworks: FireworksDict = observation['fireworks']
        card_hints: OwnHand = observation['card_knowledge'][0]

        hand_size=len(card_hints)

        discarded: List[KnownCard] = observation['discard_pile']

        discard_unplayables: Set[KnownCard] = MyAgent.get_unplayables_from_discard_pile(discarded)

        # build some useful lists of information about what we hold in our hand and what team-mates know about their hands.

        all_unseen_cards: List[KnownCard] = self.calculate_all_unseen_cards(
            discarded, observation['observed_hands'], fireworks
        )
        possible_cards_by_hand: List[List[Card]] = [self.filter_card_list_by_hint(all_unseen_cards, h) for h in card_hints]
        playable_cards_by_hand: List[List[KnownCard]] =[self.filter_card_list_by_playability(posscards, fireworks) for posscards in possible_cards_by_hand]
        probability_cards_playable: List[float] =[len(playable_cards_by_hand[index])/len(possible_cards_by_hand[index]) for index in range(hand_size)]
        useless_cards_by_hand: List[List[KnownCard]] = [self.filter_card_list_by_unplayable(posscards, fireworks, discard_unplayables) for posscards in possible_cards_by_hand]
        probability_cards_useless: List[float] =[len(useless_cards_by_hand[index])/len(possible_cards_by_hand[index]) for index in range(hand_size)]

        other_player_info = TypedDict("other_player_info", {"playable": List[KnownCard],"useless": List[KnownCard],
                                                            "unknown ranks": List[KnownCard], "unknown colors": List[KnownCard]})

        
        others_info: Dict[int, other_player_info] = {}

        for i in range(1, observation['num_players']):
            other_cards: List[KnownCard] = observation['observed_hands'][i]
            other_hand: List[HandCard] = observation['card_knowledge'][i]
            other_playable: List[KnownCard] =  self.filter_card_list_by_playability(other_cards, fireworks)
            other_useless: List[KnownCard] = self.filter_card_list_by_unplayable(other_cards, fireworks, discard_unplayables)
            other_unknown_ranks: List[KnownCard] = [other_cards[i] for i in range(len(other_cards)) if other_hand[i]["rank"] is None]
            other_unknown_colors: List[KnownCard] = [other_cards[i] for i in range(len(other_cards)) if other_hand[i]["color"] is None]
            others_info[i] = {
                "playable": other_playable,
                "useless": other_useless,
                "unknown ranks": other_unknown_ranks,
                "unknown colors": other_unknown_colors
            }
            if len(other_useless) > 0:
                if not filter_for_actions & RuleFilter.OTHER_HAS_UNKNOWN_USELESS_RANK:
                    if any(i for i in other_useless if i in other_unknown_ranks):
                        filter_for_actions |= RuleFilter.OTHER_HAS_UNKNOWN_USELESS_RANK
                    


        my_unknown_cards: List[Card] = [c for c in card_hints if c["rank"] is None or c["color"] is None]
        my_known_cards: List[KnownCard] = [c for c in card_hints if c["rank"] is not None or c["color"] is not None]
        my_known_ranks: List[Card] = [c for c in my_known_cards if c["rank"]  is not None]
        my_known_colors:List[Card] = [c for c in my_known_cards if c["color"] is not None]
        if len(my_known_cards) > 0:
            filter_for_actions |= RuleFilter.MY_FULL_INFO

        # noinspection PyTypeChecker
        most_played_colours: List[Color] = [kv[0] for kv in fireworks.items() if kv[1] == max(fireworks.values())]
        # noinspection PyTypeChecker
        least_played_colors: List[Color] = [kv[0] for kv in fireworks.items() if kv[1] == min(fireworks.values())]



        # TODO: filter based on opponent status
        # TODO: rule applications


