<a href="https://colab.research.google.com/github/IGieckI/BriscolAI/blob/main/BriscolAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random

In [None]:
class Card:
    def __init__(self, rank, seed):
        self.rank = rank
        self.seed = seed

    def __str__(self):
        return f"{self.rank} of {self.seed}"

    def get_value(self):
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            "Ace": 11,
            "Two": 0,
            "Three": 10,
            "Four": 0,
            "Five": 0,
            "Six": 0,
            "Seven": 0,
            "Knave": 2,
            "Knight": 3,
            "King": 4,
        }
        return point_values.get(self.rank)

    def compare_cards(self, other_card):
        """
        Compare two cards of the same seed to determine the winner based on their ranks

        Args:
            other_card : Card, The other card to compare

        Returns:
            Card: The winner card
        """
        if self.get_point_value() > other_card.get_point_value():
            return self
        else:
            return other_card

def create_deck():
    ranks = ["Ace", "Two", "Three", "Four", "Five", "Six", "Seven", "Knave", "Knight", "King"]
    seeds = ["Cups", "Denari", "Swords", "Sticks"]
    deck = [Card(rank, seed) for rank in ranks for seed in seeds]
    return deck

In [None]:
class Briscola():

    def __init__(self):
        """
        Generate the deck, choose a briscola and give three cards each player
        """
        deck = create_deck()

        ai_hand = []
        p2_hand = []

        briscola = self.draw_card()

        ai_hand.append(self.draw_card())
        ai_hand.append(self.draw_card())
        ai_hand.append(self.draw_card())

        p2_hand.append(self.draw_card())
        p2_hand.append(self.draw_card())
        p2_hand.append(self.draw_card())

        p1_score, p2_score = 0, 0

        self.state = {"ai_hand": ai_hand,
                      "briscola": briscola,
                      "played_card": None,
                      "turn": "ai" if random.randint(0, 1) == 0 else "player",
                      "ai_score": 0,
                      "player_score": 0}
        self.actions = ("card1", "card2", "card3")

        init_state = self.state.copy()
        self.history = [init_state]

    def draw_card(self):
        """
        Each player draw from the deck taking out cards from the deck list
        """
        if len(self.deck) == 0:
          return []

        if len(self.deck) == 1 and self.briscola not in self.deck:
          self.deck.append(self.briscola)

        return self.deck.pop(0)

    def fight(self, first_card, second_card):
        """
        Tells who win between the two cards

        Args:
            first_card : Card, first card played
            second_card : Card, second card played

        Returns:
            Card : The winner Card
        """

        if first_card.seed == second_card.seed:
          return first_card.compare_cards(second_card)

        if first_card.seed == self.briscola.seed:
          return first_card

        if second_card.seed == self.briscola.seed:
          return second_card

        return first_card

    def step(self, action):
        """
        Args:
            action : int, the action to pick (0, 1, 2)

        Returns:
            new_state : int, new state reached given the picked action (index in history list)
            reward : int, the reward we get in this new state
        """
        prev_state = self.state.copy()

        if self.state["turn"] == "ai":
            ai_action = action
            ai_card = self.state["ai_hand"][ai_action]


            # Determine the optimal card to play against the player's card
            player_card = self.state["played_card"][1]
            ai_card_index = self.state["ai_hand"].index(ai_card)
            optimal_card_index = None
            best_score_diff = -float("inf")

            for i, card in enumerate(self.state["ai_hand"]):
                score_diff = card.compare_cards(player_card)
                if score_diff > best_score_diff:
                    best_score_diff = score_diff
                    optimal_card_index = i

            # Retrieve the card chosen as the optimal one
            ai_card = self.state["ai_hand"].pop(optimal_card_index)

            # Determine the winner of the fight and update scores accordingly
            winner = self.fight(ai_card, player_card)
            if winner == ai_card:
                self.state["ai_score"] += (ai_card.get_value() + player_card.get_value())
            elif winner == player_card:
                self.state["player_score"] += (ai_card.get_value() + player_card.get_value())

            # Update the played_card and turn in the state
            self.state["played_card"] = (ai_card, player_card)
            self.state["turn"] = "player"

        else:
            player_action = action

            # Retrieve the card played by the player
            player_card = self.state["ai_hand"][int(player_action[-1]) - 1]

            # Determine the optimal card to play against the player's card
            ai_card = self.state["played_card"][0]
            ai_card_index = self.state["ai_hand"].index(ai_card)
            optimal_card_index = None
            best_score_diff = -float("inf")

            for i, card in enumerate(self.state["ai_hand"]):
                score_diff = card.compare_cards(player_card)
                if score_diff > best_score_diff:
                    best_score_diff = score_diff
                    optimal_card_index = i

            # Retrieve the card chosen as the optimal one
            ai_card = self.state["ai_hand"].pop(optimal_card_index)

            # Determine the winner of the fight and update scores accordingly
            winner = self.fight(ai_card, player_card)
            if winner == ai_card:
                self.state["ai_score"] += 1
            elif winner == player_card:
                self.state["player_score"] += 1

            self.state["played_card"] = (ai_card, player_card)
            self.state["turn"] = "ai"

        self.state["ai_hand"].append(self.draw_card())
        self.state["ai_hand"].append(self.draw_card())

        # Check if the game is over
        if len(self.state["ai_hand"]) == 0:
            if self.state["ai_score"] > self.state["player_score"]:
                reward = 1
            elif self.state["ai_score"] < self.state["player_score"]:
                reward = -1
            else:
                reward = 0

            self.state["final_ai_score"] = self.state["ai_score"]
            self.state["final_player_score"] = self.state["player_score"]

            self.history.append(self.state.copy())

            # Return "terminal" as the new state to indicate the end of the game
            return "terminal", reward

        # Add the new state to the history
        self.history.append(self.state.copy())

        # Calculate the reward as the difference in scores between the current and previous states
        reward = self.state["ai_score"] - prev_state["ai_score"]

        # Find the index of the new state in the history list
        new_state = self.history.index(self.state)

        return new_state, reward



In [None]:
import numpy as np

class RLModel:
    def __init__(self, env, actions, learning_rate=0.1, discount_factor=0.9, epsilon=1.0, max_epsilon=1.0, min_epsilon=0.01, decay_rate=0.01):
        self.env = env
        self.actions = actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.max_epsilon = max_epsilon
        self.min_epsilon = min_epsilon
        self.decay_rate = decay_rate
        self.Q = np.zeros([len(env.history), len(actions)])

    def choose_action(self, state):
        if np.random.uniform(0, 1) > self.epsilon:
            action = np.argmax(self.Q[state, :])
        else:
            action = np.random.choice(self.actions)
        return action

    def update_q_table(self, state, action, new_state, reward):
        self.Q[state, action] = self.Q[state, action] + self.learning_rate * (reward + self.discount_factor * np.max(self.Q[new_state, :]) - self.Q[state, action])

    def decay_epsilon(self, episode):
        self.epsilon = self.min_epsilon + (self.max_epsilon - self.min_epsilon) * np.exp(-self.decay_rate * episode)

    def train(self, num_episodes):
        for episode in range(num_episodes):
            state = len(self.env.history) - 1
            done = False
            total_reward = 0

            while not done:
                action = self.choose_action(state)
                new_state, reward = self.env.step(self.actions[action])
                total_reward += reward

                self.update_q_table(state, action, new_state, reward)

                state = len(self.env.history) - 1 if new_state == "terminal" else new_state

                if new_state == "terminal":
                    done = True

            self.decay_epsilon(episode)

            if episode % 1000 == 0:
                print("Episode:", episode, "Total Reward:", total_reward)

# Initialize the environment and RL model
env = Briscola()
rl_model = RLModel(env, actions=(0, 1))


AttributeError: ignored

In [None]:
num_episodes = 10000
rl_model.train(num_episodes)
