# Flip 7

In [58]:
import random
import gymnasium as gym
import numpy as np
import typing

## Game Classes

### Card

In [59]:
class Card:
    def __init__(self, val: str):
        self.val = int(val)
        self.label = val

    def __repr__(self) -> str:
        return self.label

### Deck

In [60]:
_cardList = {
    "0": 1,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8,
    "9": 9,
    "10": 10,
    "11": 11,
    "12": 12,
    # "+2": 1,
    # "+4": 1,
    # "+6": 1,
    # "+8": 1,
    # "+10": 1,
    # "x2": 1,
    # "Freeze": 3,
    # "Flip Three": 3,
    # "Second Chance": 3
}

In [61]:
class Deck:
    def __init__(self):
        self.cards = []

        for c in _cardList:
            for _ in range(_cardList[c]):
                self.cards.append(Card(c))

        self.trash = []

    def shuffle(self) -> None:
        random.shuffle(self.cards)

    def draw(self) -> Card:
        return self.cards.pop() #Last term in list is top card in deck

    def __repr__(self) -> str:
        deckString = "["

        for c in self.cards:
            deckString += c.label + " "

        deckString += "]"
        return deckString

### Player

In [62]:
class Player:
    def __init__(self, deck: Deck):
        self.points = 0
        self.active = True
        self.hand = []
        self.deck = deck

    def calcVal(self) -> int:
        val = 0
        for c in self.hand:
            val += c.val
        
        return val
    
    def turn(self, choice) -> tuple[bool, int]:
        if choice == 0:
            return self.hit()
        
        else:
            return self.stay()
    
    def hit(self) -> tuple[bool, int]:
        card = self.deck.draw()
        if card.label in [c.label for c in self.hand]: #Bust
            self.active = False
            return False, 0

        self.hand.append(card)
        return True, 0

    def stay(self) -> tuple[bool, int]:
        self.active = False
        reward = self.calcVal()
        self.points += reward
        return False, reward
    
    def discardHand(self) -> None:
        self.deck.trash += self.hand
        self.hand = []

In [63]:
d = Deck()
d.shuffle()

print(d)

[7 6 12 11 7 7 5 4 4 11 2 11 9 10 2 5 12 9 8 5 9 12 11 8 4 6 10 12 12 6 11 10 4 10 11 10 3 8 12 5 6 3 10 9 12 8 12 10 9 8 11 7 9 7 12 11 6 5 7 11 9 8 12 8 10 10 9 6 0 7 11 12 11 9 10 3 1 12 8 ]


In [64]:
p = Player(d)
p.hit()

print(p.hand)

[8]


### Opponents

#### Random Choice

In [None]:
class RCOpponent(Player):
    def __init__(self, deck: Deck, risk):
        super().__init__(deck)
        self.risk = risk

    def turn(self) -> tuple[bool, int]:
        if not self.active:
            return True, 0 # Returns True because active status does not change
        
        if self.hand:
            choice = random.uniform(0,1) 
            
        else:
            choice = 0 # No cards in hand, must hit

        if choice < self.risk: #Chooses between hit and stay with given riskiness
            return self.hit()

        else:
            return self.stay()

## Env 

In [None]:
class Flip7(gym.Env):
    def __init__(self):
        self.deck = Deck()
        self.agent = Player(self.deck)

        self.players = 2
        self.activePlayers = self.players

        self.opponents = []

        for _ in range(self.players - 1):
            riskiness = random.randint(4,9)
            self.opponents.append(RCOpponent(self.deck, 0.1*riskiness))

        self.turnOrder = 0
        self.i = [0]

        self.observation_space = gym.spaces.Dict(
            {
                "turnOrder":gym.spaces.Box(1,8, shape=(1,), dtype=int),
                #"hand": gym.spaces.Box(0,2, shape=(21,), dtype = int),
                #"points": gym.spaces.Box(0,485, shape=(1,), dtype=int),
                #"oppHands": gym.spaces.Box(0,2, shape=(21,self.players - 1), dtype = int),
                #"oppPoints": gym.spaces.Box(0,485, shape=(self.players - 1,), dtype=int),
                #"trash": gym.spaces.Box(0,12, shape=(21,), dtype = int)
            }
        )

        self.action_space = gym.spaces.Discrete(2) # 0 -> Hit, 1 -> Stay

    
    def _get_obs(self) -> dict[str, typing.Union[np.ndarray, int]]:
        hand = np.zeros(21)

        for c in self.agent.hand:
            hand[list(_cardList.keys()).index(c.label)] += 1 #Array counting number of each card

        oppHands = np.array([])
        for op in self.opponents:
            opHand = np.zeros(21)
            for c in op.hand:
                opHand[list(_cardList.keys()).index(c.label)] += 1 #Array counting number of each card

            np.append(oppHands, opHand)

        oppPoints = np.array([o.points for o in self.opponents]) #Array of opponent scores

        trash = np.zeros(21)

        for c in self.deck.trash:
            trash[list(_cardList.keys()).index(c.label)] += 1 #Array counting number of each card
        
        return {"turnOrder": np.array([self.turnOrder])}#,"hand": hand, "points": np.array([self.agent.points]), "oppHands": oppHands, "oppPoints": oppPoints, "trash": trash}
    
    
    def _get_info(self) -> dict[str, np.ndarray]:
        return {
            "Current": [c.label for c in self.agent.hand]
        }
    
    
    def opponentRound(self): # Performs opponent actions between steps
        print("a")
        for opp in self.opponents[self.turnOrder - 1:]: # Opponents after agent in order
            #print(f"Opp time {self.i[0]}: ", opp.hand)
            self.i[0] += 1
            activeStatus, _ = opp.turn()

            if not activeStatus:
                self.activePlayers -= 1

        for opp in self.opponents[:self.turnOrder - 1]: # Opponents before agent in order
            #print(f"Opp time {self.i[0]}: ", opp.hand)
            self.i[0] += 1
            activeStatus, _ = opp.turn()

            if not activeStatus:
                self.activePlayers -= 1
    

    def reset(self, seed: typing.Optional[int] = None, options: typing.Optional[dict] = None) -> tuple[dict[str, typing.Union[np.ndarray, int]], dict[str, np.ndarray]]: 
        self.deck = Deck()
        self.agent = Player(self.deck)

        self.i =[0]

        self.players = 2
        self.activePlayers = self.players

        self.opponents = []
        for _ in range(self.players - 1):
            riskiness = random.randint(4,9)
            self.opponents.append(RCOpponent(self.deck, 0.1*riskiness))

        self.turnOrder = random.randint(1, self.players) # 1 -> agent goes first, 2 -> agent goes second, ...

        # Players that are earlier in turnOrder than agent take turn before first step
        for order in range(1,self.turnOrder):
            turnPlayer = self.opponents[-order]

            activeStatus, _ = turnPlayer.turn()

            if not activeStatus:
                self.activePlayers -= 1

        observation = self._get_obs()
        info = self._get_info()

        return observation, info
    
    
    def step(self, action):
        if not self.agent.hand or action == 0:
            activeStatus, reward = self.agent.hit()
        
        else:
            activeStatus, reward = self.agent.stay()
            self.activePlayers -= 1
            
        if not activeStatus:
            self.activePlayers -= 1

        if self.agent.active:
            self.opponentRound()

        else:
            while self.activePlayers:
                self.opponentRound()

        observation = self._get_obs()
        info = self._get_info()
        print(self.activePlayers)
        terminated = (self.activePlayers == 0)
        truncated = False

        return observation, reward, terminated, truncated, info

### Register Env

In [153]:
gym.register(
    id = "barrys_zone/Flip7-v0",
    entry_point=Flip7,
)

In [81]:
#gym.pprint_registry()

## Train Agent

In [154]:
env = gym.make('barrys_zone/Flip7-v0')

In [90]:
env.action_space

Discrete(2)

In [91]:
env.observation_space

Dict('hand': Box(0, 2, (21,), int64), 'oppHands': Box(0, 2, (21, 1), int64), 'oppPoints': Box(0, 485, (1,), int64), 'points': Box(0, 485, (1,), int64), 'trash': Box(0, 12, (21,), int64), 'turnOrder': Box(1, 8, (1,), int64))

Play an episode of the environment using random actions

In [162]:
obs, _ = env.reset()

terminate = False
truncate = False

while not (terminate or truncate):
    action = env.action_space.sample()
    obs, reward, terminate, truncate, info = env.step(action)


a
Opp time: 0 [12]
0.9460888584897993
1
0
