<a href="https://colab.research.google.com/github/IGieckI/BriscolAI/blob/main/BriscolAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random

In [None]:
class Card:
    def __init__(self, rank, seed):
        self.rank = rank
        self.seed = seed

    def __str__(self):
        return f"{self.rank} of {self.seed}"

    def get_value(self):
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            "Ace": 11,
            "Two": 0,
            "Three": 10,
            "Four": 0,
            "Five": 0,
            "Six": 0,
            "Seven": 0,
            "Knave": 2,
            "Knight": 3,
            "King": 4,
        }
        return point_values.get(self.rank)

    def compare_cards(self, other_card):
        """
        Compare two cards of the same seed to determine the winner based on their ranks

        Args:
            other_card : Card, The other card to compare

        Returns:
            Card: The winner card
        """
        if self.get_point_value() > other_card.get_point_value():
            return self
        else:
            return other_card

    def create_deck():
        ranks = ["Ace", "Two", "Three", "Four", "Five", "Six", "Seven", "Knave", "Knight", "King"]
        seeds = ["Cups", "Denari", "Swords", "Sticks"]
        deck = [Card(rank, seed) for rank in ranks for seed in seeds]
        return deck

In [None]:
class Briscola():

    def __init__(self):
        """
        Generate the deck, choose a briscola and give three cards each player
        """
        self.deck = Card.create_deck()

        self.p1_hand = []
        self.p2_hand = []

        briscola = self.drawCard()

        self.p1_hand.append(self.drawCard())
        self.p1_hand.append(self.drawCard())
        self.p1_hand.append(self.drawCard())

        self.p2_hand.append(self.drawCard())
        self.p2_hand.append(self.drawCard())
        self.p2_hand.append(self.drawCard())

        p1_score, p2_score = 0, 0

        self.state = {"ai_hand": self.p1_hand,
                      "briscola": briscola,
                      "turn": "p1" if random.randint(0, 1) == 0 else "p2",
                      "played_card": None,
                      "played_cards": []}
        self.actions = (self.p1_hand[0], self.p1_hand[1], self.p1_hand[2])

        init_state = self.state.copy()
        self.history = [init_state]

    def drawCard(self):
        """
        Each player draw from the deck taking out cards from the deck list
        """
        if len(self.deck) == 0:
          return []

        if len(self.deck) == 1 and self.briscola not in self.deck:
          self.deck.append(self.briscola)

        return self.deck.pop(0)

    def fight(self, first_card, second_card):
        """
        Tells who win between the two cards

        Args:
            first_card : Card, first card played
            second_card : Card, second card played

        Returns:
            Card : The winner Card
        """

        if first_card.seed == second_card.seed:
          return first_card.compare_cards(second_card)

        if first_card.seed == self.briscola.seed:
          return first_card

        if second_card.seed == self.briscola.seed:
          return second_card

        return first_card

    def getActions():
        return 40

    def getStates():
        return None

    def getValidActions():
        return self.p1_hand

    def state(action):
      return None

  and should_run_async(code)


In [None]:
action_size = len(Briscola.getActions())
state_size = len(env.getStates())

qtable = np.zeros((state_size, action_size))

In [None]:
total_episodes = 100000        # Total episodes
learning_rate = 0.1           # Learning rate
max_steps = 100                # Max steps per episode
gamma = 0.99                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability
decay_rate = 0.001             # Exponential decay rate for exploration prob

In [None]:
# List of rewards
rewards = []

for episode in range(total_episodes):
    # Reset the environment
    state = Briscola()
    step = 0
    done = False
    total_rewards = 0

    for step in range(max_steps):
        exp_exp_tradeoff = random.uniform(0, 1)

        ## If the random si greater than epsilon i'll take the biggest Q value for this state otherwise a random one
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])
        else:
            action = Briscola.getValidActions.sample()

        new_state, reward, done, info = Briscola.step(action)

        # Update the qtable, the rewards and the current state
        qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
        total_rewards += reward
        state = new_state

        # Check finish
        if done == True:
            break

    # Reduce of the espsilon factor
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
    rewards.append(total_rewards)

print ("Score over time: " +  str(sum(rewards)/total_episodes))
print(qtable)