In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from enum import Enum
from tensorflow import keras
from tensorflow.keras import layers
import random

class Card:
    """
    Game card class
    """
    def __init__(self, rank: int, seed: int):
        self.rank = int(rank)
        self.seed = int(seed)

    def __str__(self):
        return f"{self.get_rank()} of {self.get_seed()}"

    def __hash__(self):
        return self.seed * 10 + self.rank

    def get_value(self) -> int:
        """
        Get the point value of the card based on its rank
        """
        point_values = {
            0: 11,
            1: 0,
            2: 10,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 2,
            8: 3,
            9: 4,
        }
        return point_values.get(self.rank, 0)

    @staticmethod
    def get_value_from_hash(card_hash: int) -> int:
        """
        Get the point value of the card based on its rank from hash
        """
        point_values = {
            0: 11,
            1: 0,
            2: 10,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 2,
            8: 3,
            9: 4,
        }
        return point_values[card_hash % 10]

    def get_rank(self) -> str:
        """
        Get the rank of the card as string
        """
        ranks = {
            0: "Ace",
            1: "Two",
            2: "Three",
            3: "Four",
            4: "Five",
            5: "Six",
            6: "Seven",
            7: "Knave",
            8: "Knight",
            9: "King"
        }
        return ranks.get(self.rank, "Unknown")

    def get_seed(self) -> str:
        """
        Get the seed of the card as string
        """
        seeds = {
            0: "Cups",
            1: "Denari",
            2: "Swords",
            3: "Sticks"
        }
        return seeds.get(self.seed, "Unknown")

    def compare_cards(self, other_card: 'Card') -> 'Card':
        """
        Compare two cards to determine the winner ONLY based on their ranks, NOT the seed
        """
        if self.get_value() > other_card.get_value():
            return self
        else:
            return other_card

class CardState(Enum):
    NOT_IN_GAME_YET = 0
    BRISCOLA = 1
    IN_P1_HAND = 2
    IN_P2_HAND = 3
    PLAYED = 4
    PLAYED_IN_PREVIOUS_TURNS = 5

class Agent:
    """
    BriscolAI default Agent using DQN technique.
    """
    def __init__(self, gamma: float, memory_limit: int = 1000):
        # Hyper-parameters
        self.gamma = gamma

        def create_model():
            model = keras.Sequential(
                [
                    layers.Input(shape=(40, 40)),
                    layers.Flatten(),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(units=3, activation='softmax')
                ]
            )
            # You might try other losses such as MSE loss.
            model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
            return model

        # Create the action and target networks
        self.action_model = create_model()
        self.target_model = create_model()
        self.target_model.set_weights(self.action_model.get_weights())

        # State variables
        self.previous_state = None
        self.state = None

        # Replay memory as a list of experiences
        self.memory = []
        self.memory_limit = memory_limit

    def set_state(self, state: np.ndarray):
        """
        Set the new state and automatically update the previous state.
        """
        if self.state is None:
            self.previous_state = state
        else:
            self.previous_state = self.state
        self.state = state

    def get_state(self) -> np.ndarray:
        return np.copy(self.state)

    def get_previous_state(self) -> np.ndarray:
        return self.previous_state

    def get_action(self) -> int:
        state_input = self.state.reshape(-1, 40, 40)
        q_values = self.action_model.predict(state_input, verbose=0)[0]
        return np.argmax(q_values)

    def save_in_memory(self, game_id: int, player: int, state: np.ndarray, action: int, reward: int, new_state: np.ndarray, done: bool):
        # If memory exceeds the limit, drop the oldest half
        if len(self.memory) >= self.memory_limit:
            self.memory = self.memory[int(self.memory_limit / 2):]
        experience = {
            "game_id": game_id,
            "player": player,
            "current_state": state,
            "action": action,
            "reward": reward,
            "next_state": new_state,
            "done": done
        }
        self.memory.append(experience)

    def train(self, batch_size: int = 30):
        if len(self.memory) < batch_size:
            return  # not enough samples yet

        # Randomly sample a batch of experiences
        batch_sample = random.sample(self.memory, batch_size)

        # Build training batches
        states = np.array([exp["current_state"] for exp in batch_sample]).reshape(batch_size, 40, 40)
        targets = self.action_model.predict(states, verbose=0)

        for i, exp in enumerate(batch_sample):
            # Predict target Q-values for the next state
            next_state = exp["next_state"].reshape(1, 40, 40)
            q_target_next = self.target_model.predict(next_state, verbose=0)[0]
            if not exp["done"]:
                target_value = exp["reward"] + self.gamma * np.amax(q_target_next)
            else:
                target_value = exp["reward"]
            targets[i][exp["action"]] = target_value

        # Train on the batch
        self.action_model.train_on_batch(states, targets)

class Briscola:
    def __init__(self):
        self.reset()

    def __str__(self):
        out = f'''Cards in the deck: {len(self.deck) + 1} 
Briscola: {self.briscola_card} 
Played Card: {self.played_card}\n\n'''
        out += 'Your hand:\n'
        for idx, card in enumerate(self.p2_hand):
            out += f'{idx}) {card}\n'
        return out

    def get_P1_state(self) -> np.ndarray:
        p1_state = np.copy(self.state)
        p1_state[p1_state == CardState.IN_P2_HAND.value] = CardState.NOT_IN_GAME_YET.value
        return p1_state

    def get_P2_state(self) -> np.ndarray:
        p2_state = np.copy(self.state)
        p2_state[p2_state == CardState.IN_P1_HAND.value] = CardState.NOT_IN_GAME_YET.value
        return p2_state

    @staticmethod
    def print_state(state):
        # Utility function to save a state as csv
        df = pd.DataFrame(state)
        df.to_csv('data.csv', index=False)

    def draw_card(self) -> Card:
        """
        Each player draws from the deck, removing cards from the deck list.
        """
        if len(self.deck) == 0:
            self.briscola_drawn = True
            return self.briscola_card
        return self.deck.pop(0)

    def create_deck(self) -> list:
        """
        Create a new deck with cards in random order.
        """
        deck = [Card(rank, seed) for rank in range(10) for seed in range(4)]
        np.random.shuffle(deck)
        return deck

    def fight(self, first_card: Card, second_card: Card):
        """
        Tells who wins between the two cards.
        """
        if first_card.seed == second_card.seed:
            return first_card.compare_cards(second_card)
        if first_card.seed == self.briscola_card.seed:
            return first_card
        if second_card.seed == self.briscola_card.seed:
            return second_card
        return first_card

    def fight_hash(self, first_card_hash: int, second_card_hash: int):
        """
        Determines the winning card based on their hash.
        Returns the hash of the winning card.
        """
        # If same suit, use a "power" lookup
        if first_card_hash // 10 == second_card_hash // 10:
            power = {
                0: 9,  # Ace
                1: 0,  # Two
                2: 8,  # Three
                3: 1,  # Four
                4: 2,  # Five
                5: 3,  # Six
                6: 4,  # Seven
                7: 5,  # Knave
                8: 6,  # Knight
                9: 7   # King
            }
            # Compare power and return winning card's hash
            if power[first_card_hash % 10] > power[second_card_hash % 10]:
                return first_card_hash
            else:
                return second_card_hash

        # If different suit, check for trump suit (briscola)
        if first_card_hash // 10 == self.briscola_card.seed:
            return first_card_hash
        if second_card_hash // 10 == self.briscola_card.seed:
            return second_card_hash
        return first_card_hash

    def reset(self, ai_turn: bool = None):
        """Reset the current environment."""
        self.deck = self.create_deck()
        self.p1_hand = []
        self.p2_hand = []
        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())
        self.p1_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.p2_hand.append(self.draw_card())
        self.briscola_card = self.draw_card()
        self.briscola_drawn = False

        # Initialize scores
        self.p1_score, self.p2_score = 0, 0
        # Choose who starts
        self.turn = 0 if ai_turn is None else int(ai_turn)
        self.turn_number = 0

        # Create the state: a 40x40 board
        self.state = np.full((40, 40), CardState.NOT_IN_GAME_YET.value)
        self.state[hash(self.briscola_card), :] = CardState.BRISCOLA.value

        for card in self.p1_hand:
            self.state[hash(card), 0] = CardState.IN_P1_HAND.value
        for card in self.p2_hand:
            self.state[hash(card), 0] = CardState.IN_P2_HAND.value

        self.played_card = None
        self.episode_ended = False

    def step(self, action: int):
        """Apply action and return new state, reward, and done flag."""
        # Check for invalid action
        current_hand = self.p1_hand if self.turn == 0 else self.p2_hand
        if action < 0 or action >= len(current_hand):
            return self.state, -10000, False

        # Copy previous state column (simulate time progression)
        if self.turn_number > 0:
            self.state[:, self.turn_number] = self.state[:, self.turn_number - 1]

        chosen_card = current_hand[action]
        reward = 0
        # By default, the second card played determines the round
        winner = (self.turn + 1) % 2

        if self.turn_number % 2 == 0:
            # First card of the round
            # Update any previously played cards
            self.state[self.state[:, self.turn_number] == CardState.PLAYED.value, self.turn_number] = CardState.PLAYED_IN_PREVIOUS_TURNS.value
            self.played_card = chosen_card
        else:
            # Second card: decide winner based on fight_hash
            winning_hash = self.fight_hash(hash(self.played_card), hash(chosen_card))
            winner = self.turn if winning_hash == hash(chosen_card) else (self.turn + 1) % 2
            reward = Card.get_value_from_hash(hash(chosen_card)) + Card.get_value_from_hash(hash(self.played_card))

        # Mark the played card in the state
        self.state[hash(chosen_card), self.turn_number] = CardState.PLAYED.value

        if self.turn_number % 2 != 0:
            # End of round: update scores and draw new cards if available
            if winner == 0:
                self.p1_score += reward
                if not self.briscola_drawn:
                    self.p1_hand.append(self.draw_card())
                    self.p2_hand.append(self.draw_card())
            else:
                self.p2_score += reward
                if not self.briscola_drawn:
                    self.p2_hand.append(self.draw_card())
                    self.p1_hand.append(self.draw_card())
            if not self.briscola_drawn:
                self.state[hash(self.p1_hand[-1]), self.turn_number] = CardState.IN_P1_HAND.value
                self.state[hash(self.p2_hand[-1]), self.turn_number] = CardState.IN_P2_HAND.value
            self.played_card = None

        # Remove the played card from the corresponding hand
        if self.turn == 0:
            self.p1_hand.pop(action)
        else:
            self.p2_hand.pop(action)

        self.turn_number += 1
        if winner != self.turn:
            self.turn = (self.turn + 1) % 2

        self.episode_ended = (len(self.p1_hand) == 0 and len(self.p2_hand) == 0)
        return self.state, reward, self.episode_ended

    def is_playing(self):
        return not self.episode_ended

    def get_winner(self):
        if self.p1_score > self.p2_score:
            return 0
        elif self.p2_score > self.p1_score:
            return 1
        return 2

# -------------------------
# Training parameters
total_episodes = 5000        # Total episodes
gamma = 0.99                 # Discount factor

# Exploration parameters
epsilon = 1.0                # Exploration rate
epsilon_min = 0.01           # Minimum exploration probability
epsilon_decay = 0.001        # Exponential decay rate for exploration prob
precalc_epsilon_decay = np.exp(-epsilon_decay)

# System parameters
memory_limit = 500           # Memory limit for the agent
saving_rate = 100            # Saving frequency (in episodes)

# Rewards history for plotting improvements
rewards = []

# Create the agent
briscolAI = Agent(gamma, memory_limit)

for episode in range(1, total_episodes+1):
    print(f"Episode: {episode}/{total_episodes} | Epsilon: {epsilon:.4f}")
    game = Briscola()

    while game.is_playing():
        # Select state depending on whose turn it is
        current_state = game.get_P1_state() if game.turn == 0 else game.get_P2_state()
        briscolAI.set_state(current_state)
        # Choose action: exploration vs. exploitation
        current_hand = game.p1_hand if game.turn == 0 else game.p2_hand
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(0, len(current_hand))
        else:
            action = briscolAI.get_action()
        new_state, reward, episode_ended = game.step(action)
        # Note: the state for saving memory is chosen before the turn change.
        next_state = game.get_P1_state() if game.turn == 1 else game.get_P2_state()
        briscolAI.save_in_memory(episode, game.turn, briscolAI.get_previous_state(), action, reward, next_state, not game.is_playing())

    # Optionally adjust rewards for the last session in memory
    last_session_id = episode
    winning_player = game.get_winner()
    score_difference = abs(game.p1_score - game.p2_score)
    # Iterate backwards over memory and update rewards for experiences from the current game
    for exp in reversed(briscolAI.memory):
        if exp["game_id"] != last_session_id:
            break
        if winning_player != 2:  # not a draw
            if exp["player"] == winning_player:
                exp["reward"] += score_difference
            else:
                exp["reward"] -= score_difference

    print(f"Winner: {winning_player}, Score p1: {game.p1_score}, Score p2: {game.p2_score}")
    briscolAI.train()
    # Save the model every saving_rate episodes
    if episode % saving_rate == 0:
        briscolAI.action_model.save('model.h5')

    epsilon *= precalc_epsilon_decay
    if epsilon < epsilon_min:
        epsilon = epsilon_min

    rewards.append(score_difference)

print("Average score difference over time: " + str(sum(rewards)/len(rewards)))
plt.plot(rewards)
plt.xlabel('Number of games')
plt.ylabel('Score difference')
plt.grid(True)
plt.show()

ModuleNotFoundError: No module named 'pandas'

In [None]:
# -------------------------
# Play the game interactively
# For example, in a Jupyter or Colab environment you might do:
game = Briscola()
ai_turn = np.random.uniform(0, 1) < 0.5
game.reset(ai_turn)

while game.is_playing():
    print("---------------------------------------------")
    print(game)
    previous_played_card = game.played_card

    if ai_turn:
        print("Player turn:")
        print(f"Select the card index (e.g., 0 to select {game.p2_hand[0]}):")
        try:
            action = int(input())
        except ValueError:
            print("Invalid input, try again.")
            continue
        if action < 0 or action >= len(game.p2_hand):
            print("Invalid action index, try again.")
            continue
        current_played_card = game.p2_hand[action]
        print(f"Player played {current_played_card}\n")
        new_state, reward, episode_ended = game.step(action)
    else:
        print("BriscolAI turn:")
        action = np.random.randint(0, len(game.p1_hand))
        current_played_card = game.p1_hand[action]
        print(f"BriscolAI played {current_played_card}\n")
        new_state, reward, episode_ended = game.step(action)
    print(f"Game turn: {game.turn_number}")
    if game.turn_number % 2 == 0 and previous_played_card is not None:
        if game.turn == 0:
            print(f"BriscolAI took {previous_played_card} with {current_played_card}")
        else:
            print(f"Human took {previous_played_card} with {current_played_card}")
    ai_turn = not ai_turn
    # If running in an interactive environment, you might clear output here.
    # For example: from IPython.display import clear_output; clear_output(wait=True)

print(game)
winner = game.get_winner()

if winner == 2:
    print("It's a draw!")
elif winner == 0:
    print("BriscolAI wins!")
else:
    print("You win!")
