<a href="https://colab.research.google.com/github/IGieckI/BriscolAI/blob/main/BriscolAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 !pip install tensorflow
!pip install gym==0.25.2
!pip install keras
!pip install keras-rl2

In [1]:
from enum import Enum
import numpy as np
import random
import gym
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
class Card:
    def __init__(self, rank, seed):
        self.rank = int(rank)
        self.seed = int(seed)

    def __str__(self):
        return f"{self.get_rank()} of {self.get_seed()}"

    def __hash__(self):
        return self.seed * 10 + self.rank

    def get_value(self):
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            0: 11,
            1: 0,
            2: 10,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 2,
            8: 3,
            9: 4,
        }
        return point_values.get(self.rank)

    def get_value_from_hash(hash):
        point_values = {
            0: 11,
            1: 0,
            2: 10,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 2,
            8: 3,
            9: 4,
        }

        return point_values[hash%10]

    def get_rank(self):
        ranks = {
            0: "Ace",
            1: "Two",
            2: "Three",
            3: "Four",
            4: "Five",
            5: "Six",
            6: "Seven",
            7: "Knave",
            8: "Knight",
            9: "King"
        }

        return ranks.get(self.rank)

    def get_seed(self):
        seeds = {
            0: "Cups",
            1: "Denari",
            2: "Swords",
            3: "Sticks"
        }

        return seeds.get(self.seed)

    def compare_cards(self, other_card):
        """
        Compare two cards to determine the winner ONLY based on their ranks NOT the seed

        Args:
            other_card : Card, The other card to compare

        Returns:
            Card: The winner card
        """
        if self.get_point_value() > other_card.get_point_value():
            return self
        else:
            return other_card

class CardState(Enum):
    NOT_IN_GAME_YET = 0
    BRISCOLA = 1
    IN_AI_HAND = 2
    PLAYED = 3
    PLAYED_IN_PREVIOUS_TURNS = 4

  and should_run_async(code)


In [12]:
class Briscola():

    def __init__(self):
        """
        Generate the deck, choose a briscola and give three cards each player
        """
        self.reset()

    def __str__(self):
        out = f'''Cards in the deck: {len(self.deck) + 1} \nBriscola: {self.briscola_card} \nPlayed Card: {self.played_card}\n\n'''

        out += 'Your hand:\n'
        for x, i in enumerate(self.p2_hand):
            out += f'{x}) {i}\n'

        return out

    def get_state(self):
        return self.state

    def draw_card(self):
        """
        Each player draw from the deck taking out cards from the deck list
        """
        if len(self.deck) == 0 and self.briscola_took_check is not None:
            _briscola = self.briscola_took_check
            self.briscola_took_check = None
            return _briscola

        if len(self.deck) == 0:
            return []

        return self.deck.pop(0)

    def create_deck(self):
        """
        Create a new deck with cards in random position
        """
        deck = [Card(rank, seed) for rank in np.arange(0,10) for seed in np.arange(0,4)]
        np.random.shuffle(deck)

        return deck

    def get_random_action(self):
        return np.random.uniform(0, len(self.p1_hand))

    def fight(self, first_card, second_card):
        """
        Tells who win between the two cards

        Args:
            first_card : Card, first card played
            second_card : Card, second card played

        Returns:
            Card : The winner Card
        """

        if first_card.seed == second_card.seed:
            return first_card.compare_cards(second_card)

        if first_card.seed == self.briscola_card.seed:
            return first_card

        if second_card.seed == self.briscola_card.seed:
            return second_card

        return first_card

    def fight_hash(self, first_card_hash, second_card_hash):
        """
        Tells who win between the two cards

        Args:
            first_card_hash : First card played's hash
            second_card_hash : Second card played's hash

        Returns:
            Card : The winner Card's hash
        """

        if first_card_hash//10 == second_card_hash//10:
            power = {
                0: 9, # Ace
                1: 0, # Two
                2: 8, # Three
                3: 1, # Four
                4: 2, # Five
                5: 3, # Six
                6: 4, # Seven
                7: 5, # Knave
                8: 6, # Knight
                9: 7  # King
            }
            return power[first_card_hash%10] > power[second_card_hash%10]

        if first_card_hash//10 == self.briscola_card.seed:
            return first_card_hash

        if second_card_hash//10 == self.briscola_card.seed:
            return second_card_hash

        return first_card_hash

    def reset(self, ai_turn: bool = None):
        """Reset the current environement"""

        # Create a deck and give cards to each player
        self.deck = self.create_deck()

        self.p1_hand = []
        self.p2_hand = []

        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())

        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())

        # Choose the briscola of the game and set a variable to check if the briscola (last card of the game) was drawn
        self.briscola_card = self.draw_card()
        self.briscola_drawn = False

        # Initialize the scores
        self.p1_score, self.p2_score = 0, 0

        # Choose who start (0:p1(AI), 1:p2)
        self.turn = random.randint(0, 1) if ai_turn is None else ai_turn
        self.turn_number = 0

        # Create the state to be given to the network
        self.state = np.full((40,40), CardState.NOT_IN_GAME_YET)
        self.card = Card(2,1)
        self.state[hash(self.briscola_card)][:] = CardState.BRISCOLA

        for x in self.p1_hand:
            self.state[hash(x)] = CardState.IN_AI_HAND

        # Last turn played card (if there is a card on the field, helping variable to save complexity later)
        self.played_card = None
        self.episode_ended = False

        return

    def step(self, action):
        """Apply action and return new time_step."""

        # Determine if the player choose a card number he don't have in the hand
        if action < 0 or action >= len(self.p1_hand):
            return self.state, -100, False

        # Generate a new state
        new_state = np.copy(self.state)

        if self.turn_number > 0:
            new_state[:][self.turn_number] = new_state[:][self.turn_number - 1]

        # Set played the chosen card
        new_state[hash(self.p1_hand[action])][self.turn_number] = CardState.PLAYED
        round_points = 0

        # Determine if the turn player played the first card or is a reply to a played card
        if self.played_card is None:
            # Update the previous turn PLAYED cards into PLAYED_IN_PREVIOUS_TURNS cards
            new_state[new_state[:][self.turn_number] == CardState.PLAYED, self.turn_number] = CardState.PLAYED_IN_PREVIOUS_TURNS
            self.played_card = self.p1_hand[action] if self.turn == 0 else self.p2_hand[action]
        else:
            # Determine the winner and assign round_points
            winner = self.fight_hash(hash(self.played_card), hash(self.p1_hand[action]))
            round_points = Card.get_value_from_hash(hash(self.p1_hand[action]) + Card.get_value_from_hash(hash(self.played_card)))

        # Setup reward variable, will be 0 if currently playing the first card on the table
        reward = 0

        if self.turn_number % 2 != 0:
            # Update scores, assign rewards and draw cards
            if winner == hash(self.p1_hand[action]):
                self.p1_score += round_points
                reward = round_points

                self.p1_hand.append(self.draw_card())
                self.p2_hand.append(self.draw_card())
            else:
                self.p2_score += round_points
                reward = -round_points

                self.p2_hand.append(self.draw_card())
                self.p1_hand.append(self.draw_card())

            # Update the state with both of the drew cards
            new_state[hash(self.p1_hand[-1])][self.turn_number] = CardState.IN_AI_HAND

            # Reset for next round
            self.played_card = None

        # Switch turns
        self.turn_number+=1
        self.turn += 1
        self.turn %= 2

        # Remove the used card
        self.p1_hand.pop(action)

        # Check for game termination
        episode_ended = (len(self.p1_hand) == 0 and len(self.p2_hand) == 0)

        # Return the time step
        return new_state, reward, episode_ended

    def is_playing(self):
        return not self.episode_ended

  and should_run_async(code)


In [4]:
total_episodes = 10000        # Total episodes
learning_rate = 0.1           # Learning rate
max_steps = 100                # Max steps per episode
gamma = 0.99                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability
decay_rate = 0.001             # Exponential decay rate for exploration prob

In [5]:
# Create the network

inputs = layers.Input(shape=(40, 40))
x = layers.Dense(128, activation='relu')(inputs)
x = layers.Dense(128, activation='relu')(x)

outputs = layers.Dense(3, activation='softmax')(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
# List of rewards to write a graph representation of the improvements
rewards = []

for episode in range(total_episodes):
    # Reset the environment
    state = Briscola()
    done = False
    total_rewards = 0

    memory = []
    for step in range(max_steps):
        # Add the current state to the memory to keep track of it
        memory.append(np.copy(state))

        ## If the random si greater than epsilon i'll take the biggest Q value for this state otherwise a random one
        if np.random.uniform(0, 1) > epsilon:
            action = model.predict(state.get_state())
        else:
            action = Briscola.get_random_action()

        new_state, reward, episode_ended = Briscola.step(action)

        # Update the network, the rewards and the current state
        total_rewards += reward
        state = new_state

        # Check finish
        if not episode_ended:
            break

    memory.append(state)

    # Reduce of the espsilon factor
    epsilon -= decay_rate
    if (epsilon <= min_epsilon):
        epsilon = min_epsilon

    rewards.append(total_rewards)

print ("Score over time: " +  str(sum(rewards)/total_episodes))

SyntaxError: invalid syntax (<ipython-input-6-6896d5c82bc6>, line 28)

In [21]:
# Play the game

game = Briscola()

# choose who play first
if np.random.uniform(0, 1) < 0.5:
    ai_turn = True
else:
    ai_turn = False

game.reset(ai_turn)

# start game
while game.is_playing():
    print("---------------------------------------------")
    print(game)

    # Variables to display the fight text 5 lines after
    previous_played_card = game.played_card

    if ai_turn:
        print("Player turn:")
        print(f"Select the card by entering the index (ex:\"0\" -> to select {game.p2_hand[0]}):")
        action = int(input())   # CHECK THE INT TYPE !!!
        current_played_card = game.p2_hand[action]
        if action < 0 or action > len(game.p2_hand) - 1:
            continue
        print(f"Player played {current_played_card}\n")
        new_state, reward, episode_ended = game.step(action)
    else:
        print("BriscolAI turn:")
        action = np.random.randint(0, len(game.p1_hand))
        current_played_card = game.p1_hand[action]
        print(f"BriscolAI played {current_played_card}\n")
        new_state, reward, episode_ended = game.step(action)
    print(f"Game turn: {game.turn_number}")

    if game.turn_number % 2 == 0:
        # Human did last turn
        if game.turn == 0:
            print(f"BriscolAI took {previous_played_card} with {current_played_card}")
        else:
            print(f"Human took {previous_played_card} with {current_played_card}")

    game.state = new_state
    ai_turn = not ai_turn

print(game)
winner = game.check_winner()

if winner == ' ':
    print("It's a draw!")
elif winner == "BriscolAI":
    print("You lost!")
else:
    print("You won!")

---------------------------------------------
Cards in the deck: 34 
Briscola: Knave of Swords 
Played Card: None

Your hand:
0) Four of Cups
1) Seven of Swords
2) Ace of Sticks

Player turn:
Select the card by entering the index (ex:"0" -> to select Four of Cups):


KeyboardInterrupt: Interrupted by user

In [None]:
import sys
np.set_printoptions(threshold=sys.maxsize)
np.zeros((40, 40))