<a href="https://colab.research.google.com/github/IGieckI/BriscolAI/blob/main/BriscolAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install gym==0.25.2
!pip install keras
!pip install keras-rl2

In [2]:
from enum import Enum
import numpy as np
import random
import gym
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
class Card:
    def __init__(self, rank, seed):
        self.rank = int(rank)
        self.seed = int(seed)

    def __str__(self):
        return f"{self.get_rank()} of {self.get_seed()}"

    def __hash__(self):
        return self.seed * 10 + self.rank

    def get_value(self):
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            0: 11,
            1: 0,
            2: 10,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 2,
            8: 3,
            9: 4,
        }
        return point_values.get(self.rank)

    def get_value_from_hash(hash):
        point_values = {
            0: 11,
            1: 0,
            2: 10,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 2,
            8: 3,
            9: 4,
        }

        return point_values[hash%10]

    def get_rank(self):
        ranks = {
            0: "Ace",
            1: "Two",
            2: "Three",
            3: "Four",
            4: "Five",
            5: "Six",
            6: "Seven",
            7: "Knave",
            8: "Knight",
            9: "King"
        }

        return ranks.get(self.rank)

    def get_seed(self):
        seeds = {
            0: "Cups",
            1: "Denari",
            2: "Swords",
            3: "Sticks"
        }

        return seeds.get(self.seed)

    def compare_cards(self, other_card):
        """
        Compare two cards to determine the winner ONLY based on their ranks NOT the seed

        Args:
            other_card : Card, The other card to compare

        Returns:
            Card: The winner card
        """
        if self.get_point_value() > other_card.get_point_value():
            return self
        else:
            return other_card

class CardState(Enum):
    NOT_IN_GAME_YET = 0
    BRISCOLA = 1
    IN_AI_HAND = 2
    PLAYED = 3
    PLAYED_IN_PREVIOUS_TURNS = 4

  and should_run_async(code)


In [4]:
class Briscola():

    def __init__(self):
        """
        Generate the deck, choose a briscola and give three cards each player
        """
        self.reset()

    def __hash__(self):
        return self.state

    def __str__(self):
        out = f'''Cards in the deck: {len(self.deck) + 1} \nCards in the deck: {len(self.deck) + 1} \nBriscola: {self.briscola_card}\n\n'''

        out += 'Your hand:\n'
        for x in self.p2_hand:
            out += f'- {x}\n'

        return out

    def draw_card(self):
        """
        Each player draw from the deck taking out cards from the deck list
        """
        if len(self.deck) == 0 and self.briscola is not None:
            _briscola = self.briscola
            self.briscola = None
            return _briscola

        if len(self.deck) == 0:
            return []

        return self.deck.pop(0)

    def create_deck(self):
        """
        Create a new deck with cards in random position
        """
        deck = [Card(rank, seed) for rank in np.arange(0,10) for seed in np.arange(0,4)]
        random.shuffle(deck)

        return deck

    def get_random_action(self):
        return random.uniform(0, len(self.p1_hand))

    def fight(self, first_card, second_card):
        """
        Tells who win between the two cards

        Args:
            first_card : Card, first card played
            second_card : Card, second card played

        Returns:
            Card : The winner Card
        """

        if first_card.seed == second_card.seed:
            return first_card.compare_cards(second_card)

        if first_card.seed == self.briscola.seed:
            return first_card

        if second_card.seed == self.briscola.seed:
            return second_card

        return first_card

    def fight_hash(self, first_card_hash, second_card_hash):
        """
        Tells who win between the two cards

        Args:
            first_card_hash : First card played's hash
            second_card_hash : Second card played's hash

        Returns:
            Card : The winner Card's hash
        """

        if first_card_hash//10 == second_card_hash//10:
            power = {
                0: 9,
                1: 0,
                2: 8,
                3: 1,
                4: 2,
                5: 3,
                6: 4,
                7: 5,
                8: 6,
                9: 7
            }
            return power[first_card_hash%10] > power[second_card_hash%10]

        if first_card_hash//10 == self.briscola.seed:
            return first_card_hash

        if second_card_hash//10 == self.briscola.seed:
            return second_card_hash

        raise Exception("Error in fight_hash function, undefined comparison")

    def reset(self, ai_turn: bool = None):
        """Reset the current environement"""

        # Create a deck and give cards to each player
        self.deck = self.create_deck()

        self.p1_hand = []
        self.p2_hand = []

        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())

        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())

        # Choose the briscola of the game and set a variable to check if the briscola (last card of the game) was drawn
        self.briscola_card = self.draw_card()
        self.briscola_drawn = False

        # Initialize the scores
        self.p1_score, self.p2_score = 0, 0

        # Choose who start (0:p1, 1:p2)
        self.turn = random.randint(0, 1) if ai_turn is None else ai_turn
        self.turn_number = 0

        # Create the state to be given to the network
        self.state = np.full((40,20), CardState.NOT_IN_GAME_YET)
        self.card = Card(2,1)
        self.state[hash(self.briscola_card)][:] = CardState.BRISCOLA

        for x in self.p1_hand:
            self.state[hash(x)] = CardState.IN_AI_HAND

        # Last turn played card (if there is a card on the field, helping variable to save complexity later)
        self.played_card = None

        return

    def step(self, action):
        """Apply action and return new time_step."""

        # Determine if the player choose a card number he don't have in the hand
        if action >= len(self.p1_hand):
            return self.state, -100, False

        # Generate a new state
        new_state = np.copy(self.state)

        # Set played the chosen card
        new_state[hash(self.p1_hand[action])][self.turn_number] = CardState.PLAYED
        round_points = 0

        if self.turn_number > 0:
            new_state[:][self.turn_number] = new_state[:][self.turn_number - 1]

        # Determine if the turn player played the first card or is a reply to a played card
        if self.played_card is None:
            # Update the previous turn PLAYED cards into PLAYED_IN_PREVIOUS_TURNS cards
            for (i, x) in enumerate(new_state[:][self.turn_number]):
                if x == CardState.PLAYED:
                    new_state[i][self.turn_number] = CardState.PLAYED_IN_PREVIOUS_TURNS
        else:
            # Determine the winner and assign round_points
            winner = self.fight_hash(hash(self.p1_hand[action]), hash(self.played_card))
            round_points = Card.get_value_from_hash(hash(self.p1_hand[action]) + Card.get_value_from_hash(hash(self.played_card)))

        if self.played_card is not None:
            # Update scores, assign rewards and draw cards
            if winner == hash(self.p1_hand[action]):
                self.p1_score += round_points
                reward = round_points

                self.p1_hand.append(self.draw_card())
                self.p2_hand.append(self.draw_card())
            else:
                self.p2_score += round_points
                reward = -round_points

                self.p2_hand.append(self.draw_card())
                self.p1_hand.append(self.draw_card())

            # Update the state with both of the drew cards
            new_state[hash(self.p1_hand[-1])][self.turn_number] = CardState.IN_AI_HAND

            # Reset for next round
            self.played_card = None

        # Switch turns
        self.turn_number+=1

        # Check for game termination
        episode_ended = (len(self.p1_hand) == 0 and len(self.p2_hand) == 0)

        # Remove the used card
        self.p1_hand.pop(action)

        # Return the time step
        return new_state, reward, episode_ended

    def is_playing(self):
        return not self.episode_ended

game = Briscola()
print(game)

Cards in the deck: 34 
Cards in the deck: 34 
Briscola: Seven of Swords

Your hand:
- Five of Sticks
- Three of Swords
- Six of Cups



In [7]:
[1,2,3,4,5,6].count(3) == 1

  and should_run_async(code)


True

In [None]:
total_episodes = 10000        # Total episodes
learning_rate = 0.1           # Learning rate
max_steps = 100                # Max steps per episode
gamma = 0.99                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability
decay_rate = 0.001             # Exponential decay rate for exploration prob

In [None]:
# Create the network

inputs = layers.Input(shape=(40, 20))
x = layers.Dense(128, activation='relu')(inputs)
x = layers.Dense(128, activation='relu')(x)

outputs = layers.Dense(3, activation='softmax')(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
# List of rewards to write a graph representation of the improvements
rewards = []

for episode in range(total_episodes):
    # Reset the environment
    state = Briscola()
    done = False
    total_rewards = 0

    memory = []
    for step in range(max_steps):
        # Add the current state to the memory to keep track of it
        memory.append(np.copy(state))

        ## If the random si greater than epsilon i'll take the biggest Q value for this state otherwise a random one
        if random.uniform(0, 1) > epsilon:
            action = model.predict(hash(state))
        else:
            action = Briscola.get_random_action()

        new_state, reward, episode_ended = Briscola.step(action)

        # Update the network, the rewards and the current state
        total_rewards += reward
        state = new_state

        # Check finish
        if !episode_ended:
            break

    memory.append(state)

    # Reduce of the espsilon factor
    epsilon -= decay_rate
    if (epsilon <= min_epsilon):
        epsilon = min_epsilon

    rewards.append(total_rewards)

print ("Score over time: " +  str(sum(rewards)/total_episodes))
print(qtable)

In [None]:
# Play the game

# Convert player input into board cell
def convert_input(input:str):
    cell = 0
    for x in game.p2_hand:
        if str(x) == input
        return x
    return -1

game = Briscola()

# choose who play first
if random.uniform(0, 1) < 0.5:
    ai_turn = True
else:
    ai_turn = False

game.reset(ai_turn)

# start game
while game.is_playing():
    print(game)
    if player_turn:
        print("Player turn:")
        print("Seleziona la carta digitando il nome(ex:\"Three of Sticks\"):")
        action = convert_input(input())
        if action == -1 or action not in board.available_actions():
            continue
        new_game = game.step(action)
    else:
        print("AI turn:")
        action = model1.get_action(board)
        new_board = board.step(action, model1.sign)
        print(model1.qTable[board.get_hash()])

    game = new_game
    player_turn = not player_turn

print(game)
winner = game.check_winner()

if winner == ' ':
    print("It's a draw!")
elif winner == "AI":
    print("You lost!")
else:
    print("You won!")