In [268]:
from random import randint, random, shuffle
import numpy as np
from time import time, sleep
import pickle
import os

# Card
rappresenta una certa del gioco briscola, gestisce info quali seme e punti associati alla carta

- attributi
    - number (il numero rappresentativo della carta)

- funzioni  
    - getSeed()
    - getValue()
    - getPoints()

In [269]:
class Card:

    def __init__(self, number) -> None:
        self.number = number
    
    def getSeed(self) -> int:
        return self.number // 10
    
    def getValue(self) -> int:
        return self.number % 10 + 1
    
    def getPoints(self) -> int:
        match self.getValue():
            case 1: return 11
            case 3: return 10
            case x if x < 8: return 0
            case x: return x - 6

    

# Deck
rappresenta il mazzo di carte del gioco briscola

- attributi
    - cards (lista delle carte nel mazzo)
    - lastCard (l'ultima carta del mazzo e di conseguenza la briscola)

- funzioni
    - reset() 
    - draw()
    - cardsLeft()
    - getLastCard()

In [270]:
class Deck:
    
    def __init__(self) -> None:
        self.cards = []
        for i in range(0,40):
            self.cards.append(Card(i))
        
        # si randomizza il mazzo
        shuffle(self.cards)
    
    def reset(self) -> None:
        self.__init__()
    
    def draw(self) -> Card:
        return self.cards.pop(0)

    def cardsLeft(self) -> int:
        return len(self.cards)
    
    def getLastCard(self) -> Card:
        return self.cards[-1]

# PlayerStatus
tiene traccia delle info relative allo stato di un giocatore 

- **PlayerStatus**
    - oppoOverThreshold [true, flase]
    - fstCard (seed, value) [0, 1, 2, 3] [0, 1, 2] 
    - sndCard (seed, value) [0, 1, 2, 3] [0, 1, 2] 
    - trdCard (seed, value) [0, 1, 2, 3] [0, 1, 2] 
- attributi
        - hand 
        - oppoOverThreshold 
        - points
- funzioni
    - reset()
    - getStatus()
    - addCard(card)
    - removeCard(handPosition)
    - toggleOppoOverThreshold()
    - victoryPassed()
    - getPoints() 
    - addPoints(points)

In [271]:
class PlayerStatus:
    POINT_SPLITTER = 9

    def __init__(self) -> None:
        self.hand = [] # 3 x Cards
        self.oppoOverThreshold = False #[true, flase]
        self.points = 0
    
    def reset(self) -> None:
        self.__init__()
    
    def getStatus(self) -> tuple:
        handStatus = []
        for card in self.hand:
            handStatus.append(card.getSeed())
            match card.getPoints():
                case 0 : handStatus.append(0) 
                case x if x < self.POINT_SPLITTER: handStatus.append(1)
                case _: handStatus.append(2)

        return (self.oppoOverThreshold,) + tuple(handStatus)
    
    def addCard(self, card) -> None:
        self.hand.append(card)
    
    def removeCard(self, handPosition) -> Card:
        return self.hand.pop(handPosition)

    def toggleOppoOverThreshold(self) -> None:
        self.oppoOverThreshold = True
    
    def victoryPassed(self) -> bool:
        return self.points > 60
    
    def getPoints(self) -> int:
        return self.points
    
    def addPoints(self, points) -> None:
        self.points += points

## Status
- **EnvStatus**
    - briscolaOverThreshold [true, flase]
    - briscolaSeed [0, 1, 2, 3]
    - briscoleOverThreshold [true, false]
    - loadBySeed (denara, spade, bastoni, coppe) [true, false]
    - fstPlay (seed, pointValue) [0, 1, 2, 3] [0, 1, 2] 


- **PlayerStatus**
    - oppoOverThreshold [true, flase]
    - fstCard (seed, value) [0, 1, 2, 3] [0, 1, 2] 
    - sndCard (seed, value) [0, 1, 2, 3] [0, 1, 2] 
    - trdCard (seed, value) [0, 1, 2, 3] [0, 1, 2] 





tuple dimension -> 16 <br>
state dimension -> 2 * 4 * 2 * 2 * 2 * 2 * 2 * 4 * 3 * 2 * 4 * 3 * 4 * 3 * 4 * 3 = 10616832


## Action
- **play** [0, 1, 2]

# Environment
- **costanti**
    - STATUS_DIM
    - ACTION_DIM
    - BONUS_FACTOR
    - WIN_REWARD 
    - VICTORY_THRESHOLD 
    - BRISCOLA_THRESHOLD 
    - BRISCOLE_THRESHOLD 
- **attributi**
    - briscolaOverThreshold [true, flase]
    - briscolaSeed [0, 1, 2, 3]
    - briscoleOverThreshold [true, false]
    - briscoleOut
    - loadBySeed (denara, spade, bastoni, coppe) [true, false]
    - deck 
    - broker
    - slave 
    - players
    - winByPlayer

- **funzioni**
    - envStatus()
    - getStatus(playerIndex) [restituisce lo stato riferito ad un player specifo]
    - reset(currPlayerIndex, nextPlayerIndex) [restituisce lo stato iniziale di entrambi i giocatori]
    - processPlaya(currPlay, nextPlay) [valuta le giocate restituendo il vincitore e il corrispondente reward]
    - updateStatus(currPlay, currPlayerIndex, nextPlay, nextPlayerIndex)
    - step(currPlayerAction, currPlayerIndex, nextPlayerAction, nextPlayerIndex) [restituisce il next state di p1 e p2, i reward di p1 e p2 e done a specificare se la partita è conclusa]
    

In [272]:
class Environment():

    WIN_REWARD = 200
    VICTORY_THRESHOLD = 45
    BRISCOLA_THRESHOLD = 10
    BRISCOLE_THRESHOLD = 7
    BONUS_FACTOR = 1
    STATUS_DIM = (2, 4, 2) + (2, 2, 2, 2) + (4, 3) + (2, 4, 3, 4, 3, 4, 3)
    ACTION_DIM = (3,)

    def __init__(self) -> None:
        # info sullo stato della partita
        self.briscolaOverThreshold = False  # [true, flase]
        self.briscolaSeed = None  # [0, 1, 2, 3]
        self.briscoleOut = 0
        self.briscoleOverThreshold = False # [true, false]
        self.loadBySeed = [False, False, False, False] # (denara, spade, bastoni, coppe) [true, false]
        self.winByPlayer = [0,0,0] # [vittorie p1, vittorie p2, pareggi]

        # collegamenti esterni per la gestione dell'apprendimento della IA secondaria
        self.deck = Deck()
        self.broker = None
        self.slave = None
        self.players = (PlayerStatus(), PlayerStatus())
    
    def envStatus(self) -> tuple:
        return (self.briscolaOverThreshold, self.briscolaSeed, self.briscoleOverThreshold) + tuple(self.loadBySeed)
    
    def getStatus(self, playerIndex) -> tuple:
        return self.envStatus() + self.players[playerIndex].getStatus()
    
    def reset(self, currPlayerIndex, nextPlayerIndex) -> tuple:
        self.players[0].reset()
        self.players[1].reset()
        self.deck.reset()

        # ripristino le info sullo stato della partita
        self.loadBySeed = [False, False, False, False]
        self.briscoleOverThreshold = False

        # aggiorno le info sulla nuova briscola
        self.briscolaSeed = self.deck.getLastCard().getSeed()
        self.briscolaOverThreshold = self.deck.getLastCard().getPoints() >= self.BRISCOLA_THRESHOLD

        # ridistribuisco le carte ai giocatori
        for _ in range(3):
            self.players[nextPlayerIndex].addCard(self.deck.draw())
            self.players[currPlayerIndex].addCard(self.deck.draw())
        
        # il primo a giocare è colui che ha ricevuto le carte
        return (self.getStatus(currPlayerIndex), self.getStatus(nextPlayerIndex)) 
    
    # restituisce la coppia (winnerIndex, reward)
    def processPlays(self, currPlay, nextPlay, currPlayerIndex, nextPlayerIndex) -> tuple:
        totPoints = (currPlay.getPoints() + nextPlay.getPoints()) * self.BONUS_FACTOR
        winnerIndex = nextPlayerIndex

        if currPlay.getSeed() == nextPlay.getSeed():
            if currPlay.getValue() > nextPlay.getValue(): winnerIndex = currPlayerIndex
        elif nextPlay.getSeed() != self.briscolaSeed: winnerIndex = currPlayerIndex
        
        return (winnerIndex, totPoints)
    
    def updateStatus(self, currPlay, currPlayerIndex, nextPlay, nextPlayerIndex) -> None:
        if self.players[nextPlayerIndex].getPoints() > self.VICTORY_THRESHOLD: self.players[currPlayerIndex].toggleOppoOverThreshold()
        if self.players[currPlayerIndex].getPoints() > self.VICTORY_THRESHOLD: self.players[nextPlayerIndex].toggleOppoOverThreshold()

        # aggiorno il contatore delle briscole
        for play in [currPlay, nextPlay]:
            if play.getSeed() == self.briscolaSeed: 
                self.briscoleOut += 1
            if play.getPoints() >= 10: self.loadBySeed[play.getSeed()] = True
        
        if self.briscoleOut > self.briscoleOverThreshold: self.briscoleOverThreshold = True

    def step(self, currPlayerAction, currPlayerIndex, nextPlayerAction, nextPlayerIndex) -> tuple:
        currPlay = self.players[currPlayerIndex].removeCard(currPlayerAction)
        nextPlay = self.players[nextPlayerIndex].removeCard(nextPlayerAction)

        # valuto le giocate e aggiorno i punti
        stepWinner, reward = self.processPlays(currPlay, nextPlay, currPlayerIndex, nextPlayerIndex)
        self.players[stepWinner].addPoints(reward)
        print(stepWinner, reward)

        # genero i reward
        if stepWinner == currPlayerIndex: rewards = [reward, -reward]
        else: rewards = [-reward, reward]

        # aggiorno le info sullo stato
        self.updateStatus(currPlay, currPlayerIndex, nextPlay, nextPlayerIndex)

        # ridistribuisco le carte
        self.players[stepWinner].addCard(self.deck.draw())
        self.players[(stepWinner + 1) % 2].addCard(self.deck.draw())

        # controllo la vittoria o il fine partita
        done = False

        for playerIndex in range(len(self.players)):
            if self.players[playerIndex].victoryPassed():
                done = True
                self.winByPlayer[playerIndex] += 1
                if playerIndex == currPlayerIndex: 
                    rewards[0] += self.WIN_REWARD
                    rewards[1] -= self.WIN_REWARD
                else: 
                    rewards[0] -= self.WIN_REWARD
                    rewards[1] += self.WIN_REWARD
                break
        
        # se nessuno ha vinto e le carte rimaste sono 0 allora è un pareggio 60 a 60
        if self.deck.cardsLeft() == 0 and not done:
            done = True
            self.winByPlayer[2] += 1

        return (self.getStatus(currPlayerIndex), self.getStatus(nextPlayerIndex), rewards[0], rewards[1], done)


# Broker

agisce da intermediario con la comunicazione tra Environment ed IA

- attributi
    - currentPlayer

- funzioni
    - nextPlayer() [restituisce l'indice del giocatore successivo]
    - turnSwap() [alterna il giocatore di turno]
    - reset()
    - step(action) [restituisce la tripla (next_state, rew, done)]

In [273]:
class Broker:

    def __init__(self) -> None:
        self.currentPlayer = 0

    def nextPlayer(self) -> int:
        return (self.currentPlayer + 1) % 2
    
    def turnSwap(self) -> None:
        self.currentPlayer = self.nextPlayer()

# MasterIA

ha il compito di gestire l'apprendimento

- funzioni: 
    - epsGreedy()
    - greedy()
    - learn()
    

# SlaveIA

ha il compito di supportare l'apprendimento

In [274]:
Q = np.zeros((2, 4, 2, 2, 2, 2, 2, 4, 3, 2, 4, 3, 4, 3, 4, 3)+(3,))
print(Q[0][0][0][0][0][0][0][0][0][0][0][0][0][0][0][0])

[0. 0. 0.]


In [276]:
env = Environment()
env.reset(0,1)
env.players[1].addPoints(61)
env.deck.cards = [Card(0), Card(1)]
print(env.players[0].getStatus(), env.players[1].getStatus())
print(env.step(0,1,0,0))
print(env.winByPlayer)

(False, 3, 1, 1, 0, 3, 1) (False, 0, 2, 2, 0, 2, 1)
1 13
((False, 1, False, True, False, False, False, False, 2, 0, 2, 1, 0, 2), (False, 1, False, True, False, False, False, True, 1, 0, 3, 1, 0, 0), 213, -213, True)
[0, 1, 0]
