<a href="https://colab.research.google.com/github/IGieckI/BriscolAI/blob/tensorflow/BriscolAI_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install gym==0.25.2
!pip install keras
!pip install keras-rl2

In [None]:
from enum import Enum
import numpy as np
import random
import gym
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
class Card:
    def __init__(self, rank, seed):
        self.rank = rank
        self.seed = seed

    def __str__(self):
        return f"{self.get_rank()} of {self.get_seed()}"

    def __hash__(self):
        return self.seed * 10 + self.rank

    def get_value(self):
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            1: 11,
            2: 0,
            3: 10,
            4: 0,
            5: 0,
            6: 0,
            7: 0,
            8: 2,
            9: 3,
            10: 4,
        }
        return point_values.get(self.rank)

    def get_value_from_hash(hash):
        point_values = {
            1: 11,
            2: 0,
            3: 10,
            4: 0,
            5: 0,
            6: 0,
            7: 0,
            8: 2,
            9: 3,
            10: 4,
        }

        return point_values[hash%10]

    def get_rank(self):
        ranks = {
            1: "Ace",
            2: "Two",
            3: "Three",
            4: "Four",
            5: "Five",
            6: "Six",
            7: "Seven",
            8: "Knave",
            9: "Knight",
            10: "King"
        }

        return ranks.get(self.rank)

    def get_seed(self):
        seeds = {
            1: "Cups",
            2: "Denari",
            3: "Swords",
            4: "Sticks"
        }

        return seeds.get(self.seed)

    def compare_cards(self, other_card):
        """
        Compare two cards to determine the winner ONLY based on their ranks NOT the seed

        Args:
            other_card : Card, The other card to compare

        Returns:
            Card: The winner card
        """
        if self.get_point_value() > other_card.get_point_value():
            return self
        else:
            return other_card

class CardState(Enum):
    NOT_IN_GAME_YET = 0
    BRISCOLA = 1
    IN_AI_HAND = 2
    PLAYED = 3
    PLAYED_IN_PREVIOUS_TURNS = 4

In [None]:
class Briscola(py_environment.PyEnvironment):

    def __init__(self):
        """
        Generate the deck, choose a briscola and give three cards each player
        """
        self.reset()

    def draw_card(self):
        """
        Each player draw from the deck taking out cards from the deck list
        """
        if len(self.deck) == 0 and self.briscola is not None:
            _briscola = self.briscola
            self.briscola = None
            return _briscola

        if len(self.deck) == 0:
            return []

        return self.deck.pop(0)

    def create_deck(self):
        """
        Create a new deck with cards in random position
        """
        deck = [Card(rank, seed) for rank in np.arange(1,11) for seed in np.arange(1,5)]
        random.shuffle(deck)

        return deck

    def fight(self, first_card, second_card):
        """
        Tells who win between the two cards

        Args:
            first_card : Card, first card played
            second_card : Card, second card played

        Returns:
            Card : The winner Card
        """

        if first_card.seed == second_card.seed:
            return first_card.compare_cards(second_card)

        if first_card.seed == self.briscola.seed:
            return first_card

        if second_card.seed == self.briscola.seed:
            return second_card

        return first_card

    def fight_hash(self, first_card_hash, second_card_hash):
        """
        Tells who win between the two cards

        Args:
            first_card_hash : First card played's hash
            second_card_hash : Second card played's hash

        Returns:
            Card : The winner Card's hash
        """

        if first_card_hash//10 == second_card_hash//10:
            power = {
                1: 9,
                2: 0,
                3: 8,
                4: 1,
                5: 2,
                6: 3,
                7: 4,
                8: 5,
                9: 6,
                10: 7
            }
            return power[first_card_hash%10] > power[second_card_hash%10]

        if first_card_hash//10 == self.briscola.seed:
            return first_card_hash

        if second_card_hash//10 == self.briscola.seed:
            return second_card_hash

        raise Exception("Error in fight_hash function, undefined comparison")

    def reset(self):
        """Reset the current environement"""

        # Create a deck and give cards to each player
        self.deck = self.create_deck()

        self.p1_hand = []
        self.p2_hand = []

        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())

        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())

        # Choose the briscola of the game and set a variable to check if the briscola (last card of the game) was drawn
        self.briscola_card = self.draw_card()
        self.briscola_drawn = False

        # Initialize the scores
        self.p1_score, self.p2_score = 0, 0

        # Choose who start (0:p1, 1:p2)
        self.turn = random.randint(0, 1)
        self.turn_number = 0

        # Create the state to be given to the network
        self.state = np.full((40,20), CardState.NOT_IN_GAME_YET)

        self.state[hash(self.briscola_card)][:] = CardState.BRISCOLA

        for x in self.p1:
            self.state[hash(x)] = CardState.IN_AI_HAND

        # Last turn played card (if there is a card on the field, helping variable to save complexity later)
        self.played_card = None

        return

    def step(self, action):
        """Apply action and return new time_step."""

        # Determine if the player choose a card number he don't have in the hand
        if action >= len(self.p1_hand):
            return self.state, -100, False

        # Generate a new state
        new_state = np.copy(self.state)

        # Set played the chosen card
        new_state[hash(self.p1_hand[action])][self.turn_number] = CardState.PLAYED

        round_points = 0
        turn_over = False

        # Determine if the turn player played the first card or is a reply to a played card
        if CardState.PLAYED not in self.state[:][self.turn_number-1]:
            reward = 0
        else:
            winner = self.fight_hash(hash(self.p1_hand[action]), hash(self.played_card))
            round_points = Card.get_value_from_hash(hash(self.p1_hand[action]) + Card.get_value_from_hash(hash(self.played_card)))
            turn_over = True

        if turn_over:
            # Update scores, assign rewards and draw cards
            if winner == hash(self.p1_hand[action]):
                self.p1_score += round_points
                reward = round_points

                self.p1_hand.append(self.draw_card())
                self.p2_hand.append(self.draw_card())
            else:
                self.p2_score += round_points
                reward = -round_points

                self.p2_hand.append(self.draw_card())
                self.p1_hand.append(self.draw_card())

            # Reset for next round
            self.played_card = None

        # Switch turns
        self.turn = (self.turn + 1) % 2
        self.turn_number+=1

        # Check for game termination
        episode_ended = (len(self.p1_hand) == 0 and len(self.p2_hand) == 0)

        # Update the state
        for x in self.p1_hand:
            new_state[hash(x)][self.turn_number] = CardState.IN_AI_HAND

        # Return the time step
        return new_state, reward, episode_ended



In [None]:
total_episodes = 100000        # Total episodes
learning_rate = 0.1           # Learning rate
max_steps = 100                # Max steps per episode
gamma = 0.99                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability
decay_rate = 0.001             # Exponential decay rate for exploration prob

In [None]:
# Create the network

inputs = layers.Input(shape=(40, 20))
x = layers.Dense(128, activation='relu')(inputs)
x = layers.Dense(128, activation='relu')(x)

outputs = layers.Dense(3, activation='softmax')(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
# List of rewards
rewards = []

for episode in range(total_episodes):
    # Reset the environment
    state = Briscola()
    step = 0
    done = False
    total_rewards = 0

    for step in range(max_steps):
        exp_exp_tradeoff = random.uniform(0, 1)

        ## If the random si greater than epsilon i'll take the biggest Q value for this state otherwise a random one
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])
        else:
            action = Briscola.getValidActions.sample()

        new_state, reward, done, info = Briscola.step(action)

        # Update the qtable, the rewards and the current state
        qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
        total_rewards += reward
        state = new_state

        # Check finish
        if done == True:
            break

    # Reduce of the espsilon factor
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
    rewards.append(total_rewards)

print ("Score over time: " +  str(sum(rewards)/total_episodes))
print(qtable)