# Scopone Scientifico
This notebook will go though the creation of a "Python library" that allows to simulate game of Scopone Scientifico.

In [1]:
%pip install tqdm
%pip install numpy
%pip install pandas

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import random
from typing import List, Callable
import itertools
from tqdm import tqdm

In [3]:
class Card:
    def __init__(self, rank: int, suit: str):
        self.rank = rank
        self.suit = suit

    def __str__(self):
        rank_raster = self.rank

        if rank_raster == 10:
            rank_raster = "King"
        elif rank_raster == 9:
            rank_raster = "Queen"
        elif rank_raster == 8:
            rank_raster = "Jack"

        if self.suit == "bello":
            return f"{self.rank} {self.suit}"
        else:
            return f"{self.rank} di {self.suit}"

In [4]:
class Deck:
    suits = ['picche', 'bello', 'fiori', 'cuori']
    ranks = list(range(1, 11))  # Ranks from 1 to 7, plus 8, 9, and 10 for face cards.

    def __init__(self):
        self.cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]
        self.shuffle()

    def shuffle(self):
        random.shuffle(self.cards, )

    def deal(self, num_cards: int) -> List[Card]:
        return [self.cards.pop() for _ in range(num_cards)]
    
    def __str__(self):
        result = '#' * 10 + f' Deck {self.__hash__()} ' + '#' * 10 + '\n'
        for card in self.cards:
            result += str(card) + '\n'
        result += '#' * 20 + '\n'
        result += f'{len(self.cards)} cards in the deck.\n'
        for suit in self.suits:
            result += f'{sum(1 for card in self.cards if card.suit == suit)} {suit}\n'
        result += '#' * 20
        return result

    def reset(self):
        self.cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]
        self.shuffle()



In [5]:
deck = Deck()

print(deck)

########## Deck 272371453 ##########
8 di picche
10 di cuori
9 di picche
3 bello
4 di fiori
9 di cuori
8 bello
4 di cuori
8 di fiori
2 di cuori
5 di cuori
10 di fiori
6 di cuori
7 di cuori
6 bello
2 di picche
1 di picche
3 di picche
1 di fiori
7 di fiori
9 di fiori
6 di fiori
10 di picche
3 di fiori
1 di cuori
2 di fiori
5 bello
6 di picche
9 bello
8 di cuori
2 bello
7 di picche
5 di fiori
4 di picche
3 di cuori
1 bello
7 bello
4 bello
10 bello
5 di picche
####################
40 cards in the deck.
10 picche
10 bello
10 fiori
10 cuori
####################


In [6]:
class Player:
    def __init__(self, side: int):
        if side not in [1, 2]:
            raise ValueError("Side must be 1 or 2.")
        self.side = side
        self.hand = []
        self.captures = []
        self.scopas = 0


    def play_card(self, card_index: int, v= 0) -> Card:
        card = self.hand.pop(card_index)
        if v >= 2: print(f'[PLAYER] Player {self.__hash__()} played {card}.')
        return card

    def capture(self, cards: List[Card], _with: Card):
        for card in cards:
            self.captures.append(card)
        if _with is not None: self.hand.remove(_with)

    def scopa(self):
        self.scopas += 1
    

    def __str__(self):
        return f'[PLAYER] Player {self.__hash__()} for side {self.side} has {len(self.hand)} cards in hand and {len(self.captures)} captures.'
    
    def show_hand(self):
        out = '#' * 10 + f' Player {self.__hash__()} ' + '#' * 10 + '\n'
        for card in self.hand:
            out += str(card) + '\n'
        out += '#' * 20
        return out
    
    def reset(self):
        self.hand = []
        self.captures = []
        self.scopas = 0

In [7]:
import json
from datetime import datetime

class MatchLogger:
    def __init__(self):
        self.data = {
            'match_id': None,
            'start_time': str(datetime.now()),
            'games': [],
            'winner': None,
            'final_scores': []
        }

    def log_turn(self, players: List[Player], ):
        
        for player in players:
            self.data['turns'].append({
                'player': player.__hash__(),
                'hand': [str(card) for card in player.hand],
                'captures': [str(card) for card in player.captures],
                'scopas': player.scopas
            })

    def __str__(self):
        return '\n'.join(self.log)

In [62]:
class ScoponeGame:
    def __init__(self):
        self.deck = Deck()
        self.players = [Player(i) for i in [1,2,1,2]]
        self.table = []
        self.last_capture = None
        self.step_points = [0, 0]
        self.match_points = [0, 0]
        self.game_tick = 0
        self.match_tick = 0
        
    def deal_initial_hands(self):
        self.deck.reset()
        for player in self.players:
            player.reset()
            player.hand = self.deck.deal(10)
        self.gt()
        self.mt()

        
    def gt(self):
        self.game_tick += 1
    
    def mt(self):
        self.match_tick += 1

    def __str__(self):
        return f"Players: {[player.__hash__() for player in self.players]}, Table: {self.table}"
    
    def player_details(self):
        return [str(player) for player in self.players]
    
    def card_in_table(self, card):

        if len(self.table) == 0:
            return False, []
        

        current_table = [self.table[i] for i in range(len(self.table))]
        all_combinations = []
        for i in range(1, len(current_table) + 1):
            all_combinations.extend(list(itertools.combinations(current_table, i)))

        for comb in all_combinations:
            for c in comb:
                if c is List:
                    if sum([cc.rank for cc in c]) == card.rank:
                        return True, [c]
                    
            if c.rank == card.rank:
                return True, [c]
        return False, []

    def describe_status(self) -> str:
        out = '#' * 10 + ' Game Status ' + '#' * 10 + '\n'
        out += 'Table:\n'
        for card in self.table:
            out += str(card) + '\n'
        out += '#' * 20 + '\n'
        for player in self.players:
            out += 'Player ' + str(player.__hash__()) + f' for side {player.side}\n'
            out += 'Hand:\n'
            out += player.show_hand() + '\n'
            out += 'Captured stack:\n'
            out += f'{[str(c) for c in player.captures]}\n'
        out += '#' * 20 + '\n'

        return out
        
    
    def play_card(self, card, player, v=0):
        if v >= 2: print(f'[GAME] Player {player.__hash__()} plays {card}')

        # ACE CASE
        if card.rank == 1:
                self.table.append(card)
                player.capture(self.table, _with=card)
                if v >= 2: 
                    print(f'[GAME] Player {player.__hash__()} captures {[str(c) for c in self.table]} with {card}')
                self.table=[]
                return


        isin, comb = self.card_in_table(card=card)

        if isin:
            self.last_capture = player
            # TODO comb is important as the agent will have to be able to chose the best capture
            

            for c in comb:
                self.table.remove(c)

            comb.append(card)
            player.capture(comb, _with=card)

            if self.table == []:
                player.scopa()
                if v >= 2: print(f'[GAME] Player {player.__hash__()} scopa!')
            
            if v >= 2: 
                print(f'[GAME] Player {player.__hash__()} captures {[str(c) for c in comb]} with {card}')
        else:
            player.play_card(player.hand.index(card), v=v)
            self.table.append(card)
        self.gt()
        self.mt()

    def evaluate_round(self, players: List[Player], v=0) -> List[int]:

        side1_points = 0
        side2_points = 0


        side1 = []
        side2 = []
        for player in players:
            for card in player.captures:
                if player.side == 1:
                    side1.append(card)
                elif player.side == 2: 
                    side2.append(card)
            if player.scopas > 0:
                if player.side == 1:
                    side1_points += player.scopas
                elif player.side == 2:
                    side2_points += player.scopas

        if len(side1) + len(side2) != 40:
            raise ValueError("Not all cards have been captured.")

        # Key evaulation

        if v >= 2: print(f'[EVAL] Side 1: {side1_points} Side 2: {side2_points}. Next up: Sette Bello')
        
        #SetteBello
        for card in side1:
            if card.rank == 7 and card.suit == 'bello':
                side1_points += 1
                break
        if side1_points == 0:
            side2_points += 1


        if v >= 2: print(f'[EVAL] Side 1: {side1_points} Side 2: {side2_points}. Next up: Cards')
        #Cards
        # Only possible tie is 20 cards each and in that case no points are awarded
        if len(side1) > len(side2):
            side1_points += 1
        elif len(side1) < len(side2):
            side2_points += 1


        if v >= 2: print(f'[EVAL] Side 1: {side1_points} Side 2: {side2_points}. Next up: Ori')
        #Ori
        counter = 0
        for card in side1:
            if card.suit == 'bello':
                counter += 1
        if counter > 5:
            side1_points += 1
        elif counter < 5:
            side2_points += 1

        if v >= 2: print(f'[EVAL] Side 1: {side1_points} Side 2: {side2_points}. Next up: Primiera')
        #primiera
        score1 = [0,0,0,0]
        score2= [0,0,0,0]
        for i,suit in enumerate(['bello', 'picche', 'fiori', 'cuori']):
            for card in side1:
                if card.suit == suit and card.rank >= score1[i]:
                    score1[i] = card.rank
            for card in side2:
                if card.suit == suit and card.rank >= score2[i]:
                    score2[i] = card.rank
        if sum(score1) > sum(score2):
            side1_points += 1
        elif sum(score1) < sum(score2):
            side2_points += 1

        if v >= 2: print(f'[EVAL] Side 1: {side1_points} Side 2: {side2_points}. Next up: Napola')

        #Napola

        side1_belli = []
        side2_belli = []

        for card in side1:
            if card.suit == 'bello':
                side1_belli.append(card)
        for card in side2: 
            if card.suit == 'bello':
                side2_belli.append(card)

        
        side1_belli_ranks = sorted([card.rank for card in side1_belli])
        side2_belli_ranks = sorted([card.rank for card in side2_belli])

        def calculate_sequence_points(ranks):
            points = 0
            if all(rank in ranks for rank in [1, 2, 3]):
                points = 3
                for rank in range(4, 11):
                    if rank in ranks:
                        points += 1 
                    else:
                        break
            return points

        side1_points += calculate_sequence_points(side1_belli_ranks)
        side2_points += calculate_sequence_points(side2_belli_ranks)

        if v >= 2: print(f'[EVAL] Final Score - Side 1: {side1_points} Side 2: {side2_points}')

        return [side1_points, side2_points]
    
    
    
    def play_game(self, v = 0):
        i=0
        while [len(player.hand) == 0 for player in self.players] != [True, True, True, True]:
            if v >= 2: print('#' * 20 + f' Turn {i+1} ' + '#' * 20)
            for player in self.players:
                self.play_card(player.hand[random.randint(0, len(player.hand) - 1)], player, v=v)
            if v >= 2: print('#'*48)

            if [len(player.hand) == 0 for player in self.players] == [True, True, True, True]:
                self.last_capture.capture(self.table, _with=None)
                if v >= 2: print(f'[GAME] Player {self.last_capture.__hash__()} captures the table.')
                if v >= 2: print(f'[GAME] {[str(c) for c in self.table]}')
                self.table = []
                if v >= 2: print('[GAME] \n\n\n>>>>>>>>>>>>>>>>Game over!\n\n\n')
                return
            i+=1

    def is_match_over(self, side1_score, side2_score, winning_threshold = 21, v = 0):

        if abs(side1_score-side2_score) == 1 and min(side1_score, side2_score) >= winning_threshold-1:
            if v >= 1: print(f'[MATCH] DEUCE! old threshold: {winning_threshold} new threshold: {max(side1_score, side2_score) + 1}')
            winning_threshold = max(side1_score, side2_score) + 1

        if side1_score >= winning_threshold and side2_score < side1_score - 1:
            if v >= 0: print(f'[MATCH] Side 1 wins with {side1_score} points!')
            return True
        elif side2_score >= winning_threshold and side1_score < side2_score - 1:
            if v >= 0: print(f'[MATCH] Side 2 wins with {side2_score} points!')
            return True

        return False

    def play_match(self, v = 0, winning_threshold = 21):
        if v == 1: print(f'[MATCH] Starting match with winning threshold {winning_threshold}')
        side1_score = []
        side2_score = []
        i = 0
        while not self.is_match_over(sum(side1_score), sum(side2_score), winning_threshold, v=v):
            self.deal_initial_hands()
            self.play_game(v=v)
            if v>= 2: print(self.describe_status())
            scores = self.evaluate_round(self.players, v=v)
            side1_score.append(scores[0])
            side2_score.append(scores[1])
            if v >= 1: print(f'[MATCH] ROUND {i+1} \t|\tSide 1:\t{sum(side1_score)} Side 2:\t{sum(side2_score)}')
            i+=1

            # Shift player to the right this imitates the rotation in the real game
            upper = self.players[1:]
            lower = self.players[:1]
            self.players = upper + lower

            if i > 50:
                raise ValueError("Too many rounds played.")
        if v >= 1: print(f'[MATCH] --------\n[MATCH] RESULTS \t|\tSide 1:\t{sum(side1_score)} Side 2:\t{sum(side2_score)} ')

        
        if sum(side1_score) > sum(side2_score):
            return 1
        else:
            return 2
        
    def reset(self):
        self.deck = Deck()
        self.players = [Player(i) for i in [1,2,1,2]]
        self.table = []
        self.last_capture = None
        self.step_points = [0, 0]
        self.game_tick = 0
        self.match_tick = 0


    
    def initialise_actions(self, player: Player, v = 0):

        actions_array = [0] * 50

        for card in player.hand:

            indx = card.rank + 30 * (card.suit == 'bello') + 20 * (card.suit == 'fiori') + 10 * (card.suit == 'picche') - 1

            if v>=2: print(f'Card {card} has index {indx}')
            
            isin, comb = self.card_in_table(card=card)
            if isin:
                actions_array[indx] = {'type': 'capture', 'card': str(card), 'with': [str(c) for c in comb], 'leaving': len(self.table)-len(comb)}



        return actions_array
        return {
            i: {'type': 'play', 'card': str(player.hand[i])} for i in range(len(player.hand))
        }
    

    def get_player_actions(self, player: Player, v=0):

        
        return self.initialise_actions(player,v=v)

        actions = self.initialise_actions(player)
        
        for card in player.hand:
            isin, comb = self.card_in_table(card=card)
            if isin:
                actions[player.hand.index(card)] = {'type': 'capture', 'with': str(card), 'card': [str(c) for c in comb], 'leaving': len(self.table)-len(comb)}

        return self.initialise_actions(player)
    

    def map_rank(self, c: Card):
        if c.rank == 1:
            return 12
        elif c.rank == 7:
            return 11
        else:
            return c.rank
        

    

    def get_player_state(self, player: Player, v = 0):
        hand = [(card.rank, card.suit) for card in player.hand]
        current_table = [(card.rank, card.suit) for card in self.table]

        captures = []
        for p in self.players:
            if p.side == player.side:
                captures += [(card.rank, card.suit) for card in p.captures]

        {
            'hand': hand,
            'captures': captures,
            'table': current_table,
        }
        
        if player.side == 1:
            deltapoints = self.step_points[1] - self.step_points[0]
        else:
            deltapoints = self.step_points[0] - self.step_points[1]
       
        cardsplayed = 10 - len(player.hand)

        cardsontable = len(self.table)

        hand_eval = []

        for card in player.hand:
            isin, comb = self.card_in_table(card=card)
            if isin:
                w1 = 3
            else:  
                w1 = 1
            rank_importance = (w1 * self.map_rank(card) / 36 ) * (1.0 - (0.01 * self.game_tick))

            if card.suit == 'bello':
                w2 = 1
            else:
                w2 = 1/3

            suit_importance = w2

            hand_eval.append(rank_importance * suit_importance)

        
        hand_eval.extend([0] * (10 - len(hand_eval)))
        
        table_eval = []

        for card in self.table:
            rank_importance = (self.map_rank(card) / 12 ) * (1.0 - (0.01 * self.game_tick))

            if card.suit == 'bello':
                w2 = 1
            else:
                w2 = 1/3

            suit_importance = w2

            table_eval.append(rank_importance * suit_importance)

        table_eval.extend([0] * (10 - len(table_eval)))
    
        

        if v >= 1: print(f'[STATE] Player {player.__hash__()} state: \n {[deltapoints, cardsplayed, cardsontable, self.game_tick, self.match_tick] + hand_eval + table_eval  + [7] + [len(r.hand) for r in self.players] } for length {len([deltapoints, cardsplayed, cardsontable, self.game_tick, self.match_tick] + hand_eval + table_eval)}')


        return [self.game_tick, self.match_tick,self.step_points[0], self.step_points[1] , deltapoints, cardsplayed, cardsontable] + hand_eval + table_eval
    
    def calculate_reward(self, player: Player, card: Card,):
        isin, comb = self.card_in_table(card=card)

        if comb is None:
            raise ValueError('Combination is None. Comb: ' + str(comb))
        

        reward = 0
        if isin:
            #scopa
            if len(self.table) - len(comb) == 0:
                reward += 10
            elif len(self.table) - len(comb) == 1:
                reward -= 5
            #settebello
            
            comb.append(card)

            for c in comb:
                if c.rank == 7 and c.suit == 'bello':
                    reward += 10
            #cards, ori and napola
            for c in comb:
                if c.suit == 'bello':
                    reward += 1 + c.rank*0.5
                else:
                    reward += 0.25 + c.rank*0.1


        return reward
                

    
    def game_step(self, player: Player, action, v= 0):
        if self.is_match_over(self.step_points[0], self.step_points[1], 21):
            raise ValueError('Match is over. No more steps allowed.')
        card = action['card']

        if card is None:
            raise ValueError('No card selected. Original action: ' + str(action))
        if v >= 1: print(f'[GAME] Player {player.__hash__()} plays {card}')
        comp = card.split(' ')
        rank = int(comp[0])
        if len(comp) == 2:
            suit = comp[1]
        else:
            suit = comp[2]

        t = True
        for c in player.hand:
            if c.rank == rank and c.suit == suit:
                card = c
                t = False
                break

        if card not in player.hand or t:
            raise ValueError(f'Card {card} not in player\'s hand.')



        reward = self.calculate_reward(player, card)

        self.play_card(card, player, v=v)

        if sum([len(p.hand) == 0 for p in self.players]) == 4:
            self.last_capture.capture(self.table, _with=None)
            self.table = []
            eval = self.evaluate_round(self.players, v=v)
            self.match_points[0] += eval[0]
            self.match_points[1] += eval[1]
            if v == 1: print(f'[GAME] Game is over! {self.step_points[0]}|{self.step_points[1]} and {self.match_points[0]}|{self.match_points[1]}')

            for p in self.players:
                p.reset()
            self.deal_initial_hands()
            self.game_tick = 0
            
        return reward, self.is_match_over(self.match_points[0], self.match_points[1], 21)

        

        

    
    


In [9]:
game = ScoponeGame()


import time 
start = time.time()
print(game.play_match(v=1))
end = time.time()

print(f'Time elapsed: {end-start} seconds')

[MATCH] Starting match with winning threshold 21
[MATCH] ROUND 1 	|	Side 1:	3 Side 2:	1
[MATCH] ROUND 2 	|	Side 1:	6 Side 2:	2
[MATCH] ROUND 3 	|	Side 1:	6 Side 2:	10
[MATCH] ROUND 4 	|	Side 1:	9 Side 2:	15
[MATCH] ROUND 5 	|	Side 1:	15 Side 2:	17
[MATCH] ROUND 6 	|	Side 1:	17 Side 2:	20
[MATCH] ROUND 7 	|	Side 1:	22 Side 2:	22
[MATCH] ROUND 8 	|	Side 1:	23 Side 2:	29
[MATCH] Side 2 wins with 29 points!
[MATCH] --------
[MATCH] RESULTS 	|	Side 1:	23 Side 2:	29 
2
Time elapsed: 0.002191781997680664 seconds


In [None]:
import numpy as np

class Agent:
    def __init__(self, action_space: int, state_size: int, player: Player, alpha: float = 0.1, gamma: float = 0.9, epsilon: float = 0.9, epsilon_decay: float = 0.99):
        self.action_space = action_space
        self.state_size = state_size
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        self.epsilon_decay = epsilon_decay
        self.q_table = {}  # Q-values, dynamically initialized
        self.delegation = player

    def get_state_key(self, state: dict) -> str:
        """Encodes the state into a hashable key for the Q-table."""
        return str(state)  # Replace with a more compact encoding if necessary
    
    def update_action_space(self, action_space: int):
        self.action_space = action_space

    def choose_action(self, state: dict, actionspace) -> int:
        """Chooses an action using epsilon-greedy policy."""
        out = -1
        i=-1
        prev = []
        while actionspace[out] == 0:
            state_key = self.get_state_key(state)
            if random.random() < self.epsilon:
                out = random.randint(0, self.action_space- 1)  # Explore
                #print(f'[EXPLORE] {out}')
            else:
                out = np.argmax(self.q_table.get(state_key, np.zeros(self.action_space))) # Exploit
                #print(f'[EXPLOIT] {out}')
            prev.append(out)
            i+=1
            if i >= 50: raise ValueError(f'Too many corrections. [{actionspace[out] == 0}] Current choice: {out} with {i} corrections over the actionspace {actionspace}\n{prev}')
       
        print(f'[DEBUG] Out: {out}, Actionspace: {actionspace}, self.action_space: {self.action_space} with {i} corrections.')
        print(f'[DEBUG] Exploit')

        return out

    def update(self, state: dict, action: int, reward: float, next_state: dict):
        """Updates the Q-value using the Q-learning update rule."""
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)

        current_q = self.q_table.get(state_key, np.zeros(self.action_space))[action-1]
        max_next_q = max(self.q_table.get(next_state_key, np.zeros(self.action_space)))

        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)

        # Save the updated Q-value
        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(self.action_space)
        self.q_table[state_key][action] = new_q

    def decay_epsilon(self):
        """Decays the exploration rate."""
        self.epsilon = max(0.01, self.epsilon * self.epsilon_decay)

    def encode_player_state(self, state):
        
        # only bello cards are important
        suit_mapping = {
            'picche': 1,
            'fiori': 1,
            'cuori': 1,
            'bello': 5
        }

        def encode_card(card) -> int:
            rank, suit = card
            return rank * suit_mapping[suit]
        
        encode_hand = [encode_card(card) for card in state['hand']]
        encode_table = [encode_card(card) for card in state['table']]
        encode_captures = [encode_card(card) for card in state['captures']]
        
        return [encode_table, encode_hand, encode_captures]


State space MK1:

Player related information:

Hand captures and taple

In [None]:
import pandas as pd
import os

def train_agent(game: ScoponeGame, agents: List[Agent], epochs: int, v=0):
    for epoch in tqdm(range(epochs)):
        game.reset()
        game.deal_initial_hands()
        done = False
        state = pd.DataFrame()

        for i, agent in enumerate(agents):
            agent.delegation = game.players[i]

        for i, player in enumerate(game.players):
            state[i] = game.get_player_state(player)

        while not done:
            for i, agent in enumerate(agents): 


                possible_actions = game.get_player_actions(game.players[i])
                if sum(possible_actions) == 0:
                    done = True
                    break
                    #raise Exception('[SYSTEM][FATAL] No possible actions.')
                if sum(possible_actions) != 0: action = agent.choose_action(state[i], actionspace= possible_actions)
                #print(f'Player {agent.__hash__()} chose action {action} from possible actions w cap #{len(possible_actions)-1} {game.get_player_actions(game.players[i])} ')\

                agent.update_action_space(len(possible_actions))
                reward, done = game.game_step(game.players[i], possible_actions[action], v = v)
                if done == True:
                    break
                #print(f'Player {agent.__hash__()} received reward {reward} and done is {done}')
                agent.update(state[i], action, reward, game.get_player_state(game.players[i]))
                state[i] = game.get_player_state(game.players[i])

            #print(game.describe_status())


        for agent in agents:
            agent.decay_epsilon()




In [97]:
game = ScoponeGame()

agents = [QLearningAgent(40, 25, player) for player in game.players]

train_agent(game, agents, 10000, v=2)

100%|██████████| 10000/10000 [00:05<00:00, 1995.31it/s]


In [98]:
game = ScoponeGame()

game.deal_initial_hands()

smartone = game.players[0]



for opp in tqdm(game.players):
    if opp == smartone:
        continue

    game.play_card(opp.hand[random.randint(0, len(opp.hand) - 1)], opp, v= 2)


print(game.describe_status())



100%|██████████| 4/4 [00:00<00:00, 21372.25it/s]

[GAME] Player 314244187 plays 9 di picche
[PLAYER] Player 314244187 played 9 di picche.
[GAME] Player 314244130 plays 6 di cuori
[PLAYER] Player 314244130 played 6 di cuori.
[GAME] Player 314244106 plays 2 bello
[PLAYER] Player 314244106 played 2 bello.
########## Game Status ##########
Table:
9 di picche
6 di cuori
2 bello
####################
Player 314267194 for side 1
Hand:
########## Player 314267194 ##########
9 bello
1 di picche
5 bello
5 di picche
6 di fiori
8 bello
5 di fiori
7 di picche
6 di picche
10 di picche
####################
Captured stack:
[]
Player 314244187 for side 2
Hand:
########## Player 314244187 ##########
7 di fiori
2 di picche
9 di cuori
10 bello
1 bello
10 di cuori
9 di fiori
1 di fiori
6 bello
####################
Captured stack:
[]
Player 314244130 for side 1
Hand:
########## Player 314244130 ##########
5 di cuori
3 di picche
8 di cuori
1 di cuori
4 di cuori
2 di cuori
4 bello
3 di fiori
8 di picche
####################
Captured stack:
[]
Player 314244106




In [100]:
agent = agents[0]

current_possible_actions = game.get_player_actions(smartone)

agent.update_action_space(len(current_possible_actions))
print(current_possible_actions[agent.choose_action(game.get_player_state(smartone), actionspace= current_possible_actions)])

[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0
[EXPLOIT] 0


ValueError: Too many corrections. [True] Current choice: 0 with 50 corrections over the actionspace [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {'type': 'capture', 'card': '6 di picche', 'with': ['6 di cuori'], 'leaving': 2}, 0, 0, 0, 0, 0, 0, 0, 0, 0, {'type': 'capture', 'card': '6 di fiori', 'with': ['6 di cuori'], 'leaving': 2}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {'type': 'capture', 'card': '9 bello', 'with': ['9 di picche'], 'leaving': 2}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(0)]