<a href="https://colab.research.google.com/github/IGieckI/BriscolAI/blob/tensorflow/BriscolAI_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install gym==0.25.2
!pip install keras
!pip install keras-rl2

In [None]:
import numpy as np
import random
import gym

In [None]:
class Card:
    def __init__(self, rank, seed):
        self.rank = rank
        self.seed = seed

    def __str__(self):
        return f"{self.get_rank()} of {self.get_seed()}"

    def __hash__(self):
        return self.seed * 10 + self.rank

    def get_value(self):
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            1: 11,
            2: 0,
            3: 10,
            4: 0,
            5: 0,
            6: 0,
            7: 0,
            8: 2,
            9: 3,
            10: 4,
        }
        return point_values.get(self.rank)

    def get_value_from_hash(hash):
        point_values = {
            1: 11,
            2: 0,
            3: 10,
            4: 0,
            5: 0,
            6: 0,
            7: 0,
            8: 2,
            9: 3,
            10: 4,
        }

        return point_values[hash%10]

    def get_rank(self):
        ranks = {
            1: "Ace",
            2: "Two",
            3: "Three",
            4: "Four",
            5: "Five",
            6: "Six",
            7: "Seven",
            8: "Knave",
            9: "Knight",
            10: "King"
        }

        return ranks.get(self.rank)

    def get_seed(self):
        seeds = {
            1: "Cups",
            2: "Denari",
            3: "Swords",
            4: "Sticks"
        }

        return seeds.get(self.seed)

    def compare_cards(self, other_card):
        """
        Compare two cards to determine the winner ONLY based on their ranks NOT the seed

        Args:
            other_card : Card, The other card to compare

        Returns:
            Card: The winner card
        """
        if self.get_point_value() > other_card.get_point_value():
            return self
        else:
            return other_card

In [None]:
class Briscola(py_environment.PyEnvironment):

    def __init__(self):
        """
        Generate the deck, choose a briscola and give three cards each player
        """
        self.reset()

    def draw_card(self):
        """
        Each player draw from the deck taking out cards from the deck list
        """
        if len(self.deck) == 0 and self.briscola is not None:
            _briscola = self.briscola
            self.briscola = None
            return _briscola

        if len(self.deck) == 0:
            return []

        return self.deck.pop(0)

    def create_deck(self):
        deck = [Card(rank, seed) for rank in np.arange(1,11) for seed in np.arange(1,5)]
        random.shuffle(deck)

        return deck

    def fight(self, first_card, second_card):
        """
        Tells who win between the two cards

        Args:
            first_card : Card, first card played
            second_card : Card, second card played

        Returns:
            Card : The winner Card
        """

        if first_card.seed == second_card.seed:
            return first_card.compare_cards(second_card)

        if first_card.seed == self.briscola.seed:
            return first_card

        if second_card.seed == self.briscola.seed:
            return second_card

        return first_card

    def fight_hash(self, first_card_hash, second_card_hash):
        """
        Tells who win between the two cards

        Args:
            first_card_hash : First card played's hash
            second_card_hash : Second card played's hash

        Returns:
            Card : The winner Card's hash
        """

        if first_card_hash//10 == second_card_hash//10:
            power = {
                1: 9,
                2: 0,
                3: 8,
                4: 1,
                5: 2,
                6: 3,
                7: 4,
                8: 5,
                9: 6,
                10: 7
            }
            return power[first_card_hash%10] > power[second_card_hash%10]

        if first_card_hash//10 == self.briscola.seed:
            return first_card_hash

        if second_card_hash//10 == self.briscola.seed:
            return second_card_hash

        raise Exception("Error in fight_hash function, undefined comparison")

    def reset(self):
        """Return initial_time_step."""
        # Create a deck and give cards to each player
        self.deck = create_deck()

        self.p1_hand = []
        self.p2_hand = []

        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())

        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())

        # Choose the briscola of the game and set a variable to check if the briscola (last card of the game) was drawn
        self.briscola_card = self.draw_card()
        self.briscola_drawn = False

        # Initialize the scores
        self.p1_score, self.p2_score = 0, 0

        # Choose who start (0:p1, 1:p2)
        self.turn = random.randint(0, 1)

        self.state = {"p1_hand": self.p1_hand,
                        "briscola": self.briscola_card,
                        "turn": self.turn,
                        "played_card": None,
                        "played_cards": []}

        # Save the hash of the state for better performance with the network
        self.hashed_state = hash(tuple(self.state))

        # Actions are represented by the cards in the hand
        self.actions = (hash(element) for element in my_list)

        self.episode_ended = False

        self._current_time_step = self._reset()
        return self._current_time_step

    def step(self, action):
        """Apply action and return new time_step."""
        if self._current_time_step is None:
            return self.reset()

        # Determine the round winner
        self.state["played_cards"].append(action)
        round_points = 0
        turn_over = False

        if self.state["played_card"] is None:
            reward = 0
            self.state["played_card"] = action
        else:
            winner = self.fight_hash(action, self.state["played_card"])
            round_points = Card.get_value_from_hash(action) + Card.get_value_from_hash(self.state["played_card"])
            turn_over = True

        if turn_over:
            # Update scores and assign rewards
            if winner == action:
                self.p1_score += round_points
                reward = round_points
            else:
                self.p2_score += round_points
                reward = -round_points

            # Reset for next round
            self.state["played_card"] = None

            # Draw new cards if possible
            if self.turn == 0:
                self.p1_hand.append(self.draw_card())
                self.p2_hand.append(self.draw_card())
            else:
                self.p2_hand.append(self.draw_card())
                self.p1_hand.append(self.draw_card())

        # Switch turns
        self.state["turn"] = (self.turn + 1) % 2

        # Check for game termination
        self.episode_ended = (len(self.p1_hand) == 0 and len(self.p2_hand) == 0)

        # Prepare the next state
        self.state["p1_hand"] = self.p1_hand
        self.hashed_state = hash(tuple(self.state))

        # Return the time step
        return TimeStep(StepType.MID if not self.episode_ended else StepType.LAST,
                        reward, 0.0,  # Example discount of 0.0
                        self.state)



In [None]:
action_size = len(Briscola.getActions())
state_size = len(env.getStates())

qtable = np.zeros((state_size, action_size))

In [None]:
total_episodes = 100000        # Total episodes
learning_rate = 0.1           # Learning rate
max_steps = 100                # Max steps per episode
gamma = 0.99                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability
decay_rate = 0.001             # Exponential decay rate for exploration prob

In [None]:
# List of rewards
rewards = []

for episode in range(total_episodes):
    # Reset the environment
    state = Briscola()
    step = 0
    done = False
    total_rewards = 0

    for step in range(max_steps):
        exp_exp_tradeoff = random.uniform(0, 1)

        ## If the random si greater than epsilon i'll take the biggest Q value for this state otherwise a random one
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])
        else:
            action = Briscola.getValidActions.sample()

        new_state, reward, done, info = Briscola.step(action)

        # Update the qtable, the rewards and the current state
        qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
        total_rewards += reward
        state = new_state

        # Check finish
        if done == True:
            break

    # Reduce of the espsilon factor
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
    rewards.append(total_rewards)

print ("Score over time: " +  str(sum(rewards)/total_episodes))
print(qtable)