In [49]:
# What things can you do on your turn in Hanabi?
# 1. Play a card from your hand. -- Choose to play, choose a card.
# 2. Discard a card from your hand. -- Choose to discard, choose a card.
# 3. Give a hint to the other player about cards in their hand. -- Choose to give hint, choose a hint.

# What information do you have at any point in the Hanabi Game?
# 1. Hints given for each of your cards.
# 2. Number of remaining hints.
# 3. Number of cards left in the deck.
# 4. Number of lives left.
# 5. Hints given to other players already.
# 6. Cards in play.
# 7. Score.

# Maybe we can describe the state by [[score], [lives remaining], [hints available], [# cards in deck],
#                                     [Cards in play], [Hints in Hand], [Other People's Hands Hints]]

In [78]:
# We will have a game manager script that takes in a list of players and sends them the game state when it is their turn.
# We then instantiate a couple of players and have them play games trying to achieve the highest score.

In [61]:
import itertools
import random

class HanabiGame:
    
    def __init__(self, hand_size = 5, hints = 8, lives = 3, num_players = 2, num_suits = 5, num_values = 5):
                    
        # Hand Size
        if hand_size < 1:
            print("How are you going to play without any cards in your hand, silly?")
        self.hand_size = hand_size
        
        # Hints remaining
        self.hints = hints
        self.max_hints = self.hints
        
        # Lives remaining
        self.lives = lives

        # Number of players
        if num_players < 2:
            print("Hanabi must be played with at least two players.")
            
        self.num_players = num_players
        
        # Suits of Cards
        self.num_suits = num_suits
        self.suits = list(range(self.num_suits))

        # Values of Cards
        # There are 3 zeros of each suit, one of the maximal value, and two of all others.
        if num_values < 2:
            print("Please allow for at least two values.")
        
        self.num_values = num_values
        self.values = [0, 0, 0] + list(range(1, self.num_values-1))*2 + [self.num_values-1]

        # Deck
        # A card in the deck's ten's place is the suit of the card and 
        # the one's place is the value
        self.deck = [(suit,value) for suit, value in itertools.product(self.suits,self.values)]

        # Cards in the Hands of each Player
        self.player_hands = [[] for i in range(num_players)]
        self.initialDeal()

        # Hint information available to each player
        # self.player_hints[0] = [Hints received for suits, Hints received for values, turns_in_hand]
        # Hints received are either 0 (no info), 1 (positive info), or -1 (negative info)
        # So, [[-1, 0, 0, 0, 0], [-1, 1, -1, -1, -1], 3] would correspond to knowing a card is
        # NOT of suit 0 and IS of value 1 and has been in your hand for 3 turns.
        self.player_hints = [[[[0]*self.num_suits,[0]*self.num_values, 0] for j in range(self.hand_size)]
                             for i in range(self.num_players)]
        
        # Keep track of whose turn it is
        self.current_player_turn = 0
        
        # Keep track of the score.
        # Note*: Normally in Hanabi your score is zero until the last turn
        # is played, but to help the AI, the current score will only become
        # zero when all lives are lost.
        self.current_score = 0
        
        # Cards that have been played onto the board
        # -1 corresponds to no card of a particular suit having been played.
        # The current value of self.board[i] = Highest value of card played
        # of that suit.
        self.board = [-1]*self.num_suits
        
        # Discard Pile
        # self.discards[i][j] = The number of suit i value j cards that have been discarded so far.
        self.discards = [[0]*num_values for i in range(self.num_suits)]
        
        # Keep a list of possible next moves
        self.legal_move_list = []
        # Initialize legal_move_list
        self.legalMoves()
        
        # Keep track of the number of moves left when the final round starts
        self.final_round_moves = self.num_players
        self.game_is_ending = False
        
        # When the game ends, the game manager should deal with cleaning everything up.
        self.game_end = False


    def initialDeal(self):
        random.shuffle(self.deck)
        for i in range(self.hand_size):
            for hand in range(self.num_players):
                self.player_hands[hand].append(self.deck.pop())
    
    # Returns the board state from a given player's perspective.
    # In particular, it returns all public information and the cards in the opponents hands.
    # [[score]: 1, [lives remaining]: 1, [hints available]: 1, [# cards in deck]: 1,
    # [Cards in play]: num_suits, [Discards]: num_suits*num_values, [Hints in Hand], [Other People's Hands Hints]]
    def boardState(self, player_number):
        return [self.current_score, self.lives, self.hints, len(self.deck), self.board, self.discards, 
                self.player_hints[player_number]] + [hint for i,hint in enumerate(self.player_hints) if i!= player_number]
    
    def printBoardState(self, player_number=-1):
        
        # If no player is specified, return the view from the current player
        if player_number == -1:
            player_number = self.current_player_turn
#         cur_board = boardState(player_number)
        print(f"From the perspective of player {player_number}")
        print(f"The current score is {self.current_score}.")
        print(f"You have {self.lives} lives and {self.hints} hints left.")
        print(f"There are {len(self.deck)} cards left in the deck.")
        for i, max_card in enumerate(self.board):
            if max_card == -1:
                print(f"You have not played any cards of suit {i}")
            elif max_card == 0:
                print(f"You have played the 0 of suit {i}.")
            else:
                print(f"You have played {max_card} cards of suit {i}.")
        
        for card_index, hint in enumerate(self.player_hints[player_number]):
            if hint == [[0]*self.num_suits, [0]*self.num_values, 0]:
                print(f"You don't know anything about card {card_index}.")
            else:
                if hint[0] == [0]*self.num_suits:
                    print(f"You don't know anything about the suit of card {card_index}.")
                else:
                    if 1 in hint[0]:
                        print(f"You know that {card_index} is of suit {(hint[0]).index(1)}")
                    else:
                        for suit_index, existence in enumerate(hint[0]):
                            if existence == -1:
                                print(f"You know card {card_index} is not of suit {suit_index}.")
                
                if hint[1] == [0]*self.num_values:
                    print(f"You don't know anything about the value of card {card_index}.")
                else:
                    if 1 in hint[1]:
                        print(f"You know that {card_index} is of suit {(hint[0]).index(1)}")
                    else:
                        for value_index, existence in enumerate(hint[1]):
                            if existence == -1:
                                print(f"You know card {card_index} is not of value {value_index}.")

                            
    # Returns a tuple (The index of the current player's turn, [List of allowable moves])    
    def legalMoves(self):
        
        # Moves will be described by a tuple 
        # (Choose to Play/Discard/Hint, Index of Card to Discard/Play [-1 if giving hint instead], 
        #                              Hint Type[(player, suit, value)])
        # Hint Type is (-1,-1,-1) if not giving a hint.
        allowed_moves = []
        
        # Play moves (0,X,-1,-1)
        for i in range(len(self.player_hands[self.current_player_turn])):
            allowed_moves.append((0,i,-1,-1,-1))
        
        # Discard moves (1,X,-1,-1)
        for i in range(len(self.player_hands[self.current_player_turn])):
            allowed_moves.append((1,i,-1,-1,-1))
        
        
        # If hint available, hint moves (2, -1, Player, Suit, Value)
        if self.hints > 0:
            for player_index in range(self.num_players):
                if player_index != self.current_player_turn:
                    for suit_index in range(self.num_suits):
                        allowed_moves.append((2,-1,player_index,suit_index,-1))
                    for value_index in range(self.num_values):
                        allowed_moves.append((2,-1,player_index,-1,value_index))
        
        self.legal_move_list = allowed_moves
        return (self.current_player_turn, allowed_moves)
    
    def printLegalMoves(self):
        self.legalMoves()
        print(f"It is Player {self.current_player_turn}'s turn.")
        if len(self.legal_move_list) == 0:
            print("You done goofed. No legal moves.")
            return
        for index, move in enumerate(self.legal_move_list):
            if move[0] == 0:
                print(f"{index}: You can play your {move[1]} card.")
            elif move[0] == 1:
                print(f"{index}: You can discard your {move[1]} card.")
            else:
                if move[3] != -1:
                    print(f"{index}: You can tell player {move[2]} about their cards of suit {move[3]}.")
                elif move[4] != -1:
                    print(f"{index}: You can tell player {move[2]} about their cards of value {move[4]}.")
                else:
                    print('We definitely should not have gotten here.')
    
    # Current Player Draws a card
    def drawCard(self):
        if len(self.deck) == 0:
            self.game_is_ending = True
            self.final_round_moves -= 1
            if self.final_round_moves == 0:
                self.endGame()

        if len(self.player_hands[self.current_player_turn]) != 4:
            print(f"A player is trying to draw with {len(self.player_hands[self.current_player_turn])} cards in their hand.")
            return
        else:
            self.player_hands[self.current_player_turn].append(self.deck.pop())
            self.player_hints[self.current_player_turn].append([[0]*self.num_suits,[0]*self.num_values])
        
        self.legalMoves()
    
    # Implement this
    def endGame(self):
        self.game_end = True
        return
    
    def playCard(self, index):
        if index > len(self.player_hands[self.current_player_turn]):
            print("That card is unplayable. Uh oh.")
            return
        
        card = self.player_hands[self.current_player_turn][index]
        suit = card[0]
        value = card[1]
        
        
        # If the card is playable, play it, increment score, and draw a card.
        # Otherwise, we'll lose a life, check game end, discard it, and draw a card.
        if self.board[suit] + 1 == value:
            self.board[suit] = value
            self.current_score += 1
        else:
            self.lives -= 1
            if self.lives <= 0:
                endGame()
                return
        self.discardCard(index, gain_hint = False)
        self.drawCard()

    # Only draw a card if gain_hint = True
    # Update discard pile
    def discardCard(self, index, gain_hint = True):
        self.discards[self.player_hands[self.current_player_turn][index][0]][
                            self.player_hands[self.current_player_turn][index][1]]+= 1
        del self.player_hands[self.current_player_turn][index]
        del self.player_hints[self.current_player_turn][index]
        if gain_hint == True:
            self.drawCard()
            if self.hints < self.max_hints:
                self.hints += 1
        return

    # If a hint is given whilst game is ending, remember to decrement final_round_moves
    # self.player_hints = [[[[0]*self.num_suits,[0]*self.num_values] for j in range(self.hand_size)]
    #                        for i in range(self.num_players)]
    def giveHint(self, hint):
        player, suit_hint, value_hint = hint[0], hint[1], hint[2]
        
        if self.hints <= 0:
            print("You cannot give a hint, as you have no hint tokens left!")
        
        # card_hints is of the form [[suit hints], [value hints]]
        for card_index, card_hints in enumerate(self.player_hints[player]):
            card_suit = self.player_hands[player][card_index][0]
            card_value = self.player_hands[player][card_index][1]
            if (suit_hint != -1):
                if suit_hint == card_suit:
                    card_hints[0] = [1 if x == card_suit else -1 for x in range(self.num_suits) ]
                else:
                    card_hints[0][suit_hint] = -1
            elif (value_hint != 1):
                if value_hint == card_value:
                    card_hints[1] = [1 if x == card_value else -1 for x in range(self.num_values) ]
                else:
                    card_hints[1][suit_hint] = -1
            else:
                print("We were given neither a suit nor a value.")
                return
        self.hints -= 1
        return

    
    def performMove(self, index = -1, move = None):
        if index == -1 and move == None:
            print("What move would you like to do?")
            return
        if index > len(self.legal_move_list):
            print("Choose a valid move. [Index out of range.]")
            return
        if move != None and move not in legal_move_list:
            print("Choose a valid move. [Move not legal.]")
            return

        if index != -1:
            move = self.legal_move_list[index]
#         else:
#             index = legal_move_list.index(move)
        
        # Reminder: move = (play/discard/hint, card to play/discard index, player to give hint, suit hint, value hint)
        # Reminder: move[0]: 0 - Play, 1 - Discard, 2 - Hint
        if move[0] == 0:
            self.playCard(move[1])
        elif move[0] == 1:
            self.discardCard(move[1])
        elif move[0] == 2:
            self.giveHint((move[2], move[3], move[4]))
        
        # Next player's turn
        self.legalMoves()
        for card in self.player_hints[self.current_player_turn]:
            card[2] += 1
        self.current_player_turn = (self.current_player_turn + 1) % self.num_players
        

In [69]:
test_game = HanabiGame()

In [70]:
test_game.printBoardState()

From the perspective of player 0
The current score is 0.
You have 3 lives and 8 hints left.
There are 40 cards left in the deck.
You have not played any cards of suit 0
You have not played any cards of suit 1
You have not played any cards of suit 2
You have not played any cards of suit 3
You have not played any cards of suit 4
You don't know anything about card 0.
You don't know anything about card 1.
You don't know anything about card 2.
You don't know anything about card 3.
You don't know anything about card 4.


In [71]:
test_game.printLegalMoves()

It is Player 0's turn.
0: You can play your 0 card.
1: You can play your 1 card.
2: You can play your 2 card.
3: You can play your 3 card.
4: You can play your 4 card.
5: You can discard your 0 card.
6: You can discard your 1 card.
7: You can discard your 2 card.
8: You can discard your 3 card.
9: You can discard your 4 card.
10: You can tell player 1 about their cards of suit 0.
11: You can tell player 1 about their cards of suit 1.
12: You can tell player 1 about their cards of suit 2.
13: You can tell player 1 about their cards of suit 3.
14: You can tell player 1 about their cards of suit 4.
15: You can tell player 1 about their cards of value 0.
16: You can tell player 1 about their cards of value 1.
17: You can tell player 1 about their cards of value 2.
18: You can tell player 1 about their cards of value 3.
19: You can tell player 1 about their cards of value 4.


In [72]:
test_game.performMove(index = 10)

In [73]:
test_game.printLegalMoves()

It is Player 1's turn.
0: You can play your 0 card.
1: You can play your 1 card.
2: You can play your 2 card.
3: You can play your 3 card.
4: You can play your 4 card.
5: You can discard your 0 card.
6: You can discard your 1 card.
7: You can discard your 2 card.
8: You can discard your 3 card.
9: You can discard your 4 card.
10: You can tell player 0 about their cards of suit 0.
11: You can tell player 0 about their cards of suit 1.
12: You can tell player 0 about their cards of suit 2.
13: You can tell player 0 about their cards of suit 3.
14: You can tell player 0 about their cards of suit 4.
15: You can tell player 0 about their cards of value 0.
16: You can tell player 0 about their cards of value 1.
17: You can tell player 0 about their cards of value 2.
18: You can tell player 0 about their cards of value 3.
19: You can tell player 0 about their cards of value 4.


In [74]:
test_game.printBoardState()

From the perspective of player 1
The current score is 0.
You have 3 lives and 7 hints left.
There are 40 cards left in the deck.
You have not played any cards of suit 0
You have not played any cards of suit 1
You have not played any cards of suit 2
You have not played any cards of suit 3
You have not played any cards of suit 4
You know card 0 is not of suit 0.
You don't know anything about the value of card 0.
You know card 1 is not of suit 0.
You don't know anything about the value of card 1.
You know that 2 is of suit 0
You don't know anything about the value of card 2.
You know that 3 is of suit 0
You don't know anything about the value of card 3.
You know card 4 is not of suit 0.
You don't know anything about the value of card 4.


In [75]:
test_game.discards

[[0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0]]

In [None]:
# Implementation of Game Manager that will run games for the player AI and provide training

In [159]:
def flatten(list_to_flatten):
        found_something_to_flatten = True
        while(found_something_to_flatten == True):
            found_something_to_flatten = False
            temp_list = []
            for i, item in enumerate(list_to_flatten):
                if isinstance(item, list):
                    found_something_to_flatten = True
                    temp_list += list_to_flatten[i]
                else:
                    temp_list.append(list_to_flatten[i])
            list_to_flatten = temp_list
        return list_to_flatten


test_game = HanabiGame()

flat_state = flatten(test_game.boardState(0)+[0]*5)
input_size = len(flat_state) # This equals 149 in 2 player normal game


# Genetic Algorithm to find a good HanabiAI
# Initialize with 2*N players (Initial Population)

population = 2*5
current_generation_weights = []

for i in range(population):
    cur_weights = []
    cur_weights.append([random.random() for i in range(input_size)])
    current_generation_weights.append(cur_weights)


# Evalute the fitness of each player (which we'll consider to be the maximum score obtained before game ends.)

# Generate a new generation by randomly choosing two* parents biased upon fitness
# Create a child with new weights based upon the cross-over of parents
# Mutate with some small chance
# Repeat






# model_num and player number are not actually the same... BE CAREFUL!
neural_input_player_0 = np.asarray(flat_state)
neural_input_len = len(neural_input_player_0)
neural_input = np.atleast_2d(neural_input)

test_player_0 = HanabiPlayer(neural_input_len)
test_player_1 = HanabiPlayer(neural_input_len)

current_pool = [test_player_0, test_player_1]

model_num = 0


output_prob = current_pool[model_num].choose_best_move(test_game.legal_move_list, flat_state)

149


In [None]:
# Implementation of Player AI

In [149]:
import numpy as np
import random
import statistics
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

class HanabiPlayer:
    
#     def flatten(self, list_to_flatten):
#         found_something_to_flatten = True
#         while(found_something_to_flatten == True):
#             found_something_to_flatten = False
#             temp_list = []
#             for i, item in enumerate(list_to_flatten):
#                 if isinstance(item, list):
#                     found_something_to_flatten = True
#                     temp_list += list_to_flatten[i]
#                 else:
#                     temp_list.append(list_to_flatten[i])
#             list_to_flatten = temp_list
#         return list_to_flatten
    
    # Game state will be ordered as
    # [[score]: 1, [lives remaining]: 1, [hints available]: 1, [# cards in deck]: 1,
    # [Cards in play]: num_suits, [Discards]: num_suits*num_values, [Hints in Hand], [Other People's Hands Hints]]
    def __init__(self, game_state_size, move_size = 5, weights = None):
        if weights == None:
            self.weights = [0]*(game_state_size+move_size)*64
        
        # convert the game state into a single array of values
        # Maybe the player doesn't need to hold onto the game state all the time?
        # Gonna keep track of it with the game manager... this is just here to see the input size it'll need
#         self.game_state = np.asarray(self.flatten(game_state))
        self.model = Sequential([
            Dense(units=game_state_size, input_shape=(1,), activation='relu'),
            Dense(units=64, activation='relu'),
            Dense(units=16, activation='relu'),
            Dense(units=1, activation='sigmoid')
        ])
    
        # Since we will be updating the weights ourselves, this isn't necessary...
        self.model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy',metrics=['accuracy'])
        
        
        
#     def expected_card_probabilities(self, card):
#         return
    
    def choose_best_move(self, move_list, game_state):
        
        neural_input = np.asarray([])
        return
    
    def update_game(self, game_state):
        self.game_state = game_state
        return
    
    # Genetic Algorithm Stuff
    def dna_crossover(self, partners):
        partners.append(self.weights)
        average_weights = []
        for i in range(len(self.weights)):
            average_weights.append(statistics.mean([player_weight[i] for player_weight in partners]))
        return self.mutate(average_weights)
    
    def mutate(self, dna_weights):
        mutation_rate = 0.01
        new_weights = []
        
        for weight in dna_weights:
            if random.random() < mutation_rate:
                new_weights.append(random.random())
            else:
                new_weights.append(weight)
        
        return new_weights

test_game = HanabiGame()
input_size = len(flatten(test_game.boardState(0)))+len(test_game.legal_move_list[0])
print(input_size)
test_player = HanabiPlayer(game_state_size = input_size)
# len(board_state) = 4 + num_suits + num_suits*num_values + hand_size*(num_suits+num_values+1)*num_players + len(move)
# In 2 player, 5 cards, 5 suits, 5 values, you should get 144
weights = test_player.model.get_weights()
test_player.model.summary()
model_to_save = test_player.model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_to_save)

149
Model: "sequential_58"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_232 (Dense)            (None, 149)               298       
_________________________________________________________________
dense_233 (Dense)            (None, 64)                9600      
_________________________________________________________________
dense_234 (Dense)            (None, 16)                1040      
_________________________________________________________________
dense_235 (Dense)            (None, 1)                 17        
Total params: 10,955
Trainable params: 10,955
Non-trainable params: 0
_________________________________________________________________


In [None]:
[i for i in range(10) if i!=2]

In [None]:
print(f"Is{' not' if True else ''}!")

In [None]:
A = [[1,2],[3,4]]
for a in A:
    a[0] = 0
A

In [22]:
A = [0,0,1]
B = ['a' if x == 0 else 'b' for x in A ]

In [23]:
B

['a', 'a', 'b']

In [158]:
random.random()

0.42255412843089846

In [82]:
flatten([[[1,2],[3,4]],[[5,6],[7,8]]])

[1, 2, 3, 4, 5, 6, 7, 8]

In [80]:
[1,2] + [1,2,3]

[1, 2, 1, 2, 3]

In [157]:
A = [1]
A.append([[1,2],[3,4],[5,6]])
print(A)

[1, [[1, 2], [3, 4], [5, 6]]]
