# Flip 7

In [None]:
import random
import gymnasium as gym
import numpy as np
import typing

## Game Classes

### Card

In [2]:
class Card:
    def __init__(self, val: str):
        self.val = int(val)
        self.label = val

    def __repr__(self) -> str:
        return self.label

### Deck

In [3]:
_cardList = {
    "0": 1,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8,
    "9": 9,
    "10": 10,
    "11": 11,
    "12": 12,
    # "+2": 1,
    # "+4": 1,
    # "+6": 1,
    # "+8": 1,
    # "+10": 1,
    # "x2": 1,
    # "Freeze": 3,
    # "Flip Three": 3,
    # "Second Chance": 3
}

In [24]:
class Deck:
    def __init__(self):
        self.cards = np.array([])

        for c in _cardList:
            for _ in range(_cardList[c]):
                self.cards = np.append(self.cards, Card(c))

        self.trash = np.array([])

    def shuffle(self) -> None:
        random.shuffle(self.cards)

    def draw(self) -> Card:
        return self.cards.pop() #Last term in list is top card in deck


    def __repr__(self) -> str:
        deckString = "["

        for c in self.cards:
            deckString += c.label + " "

        deckString += "]"
        return deckString

### Player

In [None]:
class Player:
    def __init__(self, deck: Deck):
        self.points = 0
        self.active = True
        self.hand = np.array([])
        self.deck = deck

    def calcVal(self) -> int:
        val = 0
        for c in self.hand:
            val += c.val
        
        return val
    
    def hit(self) -> tuple[bool, int]:
        card = self.deck.draw()
        if card.label in [c.label for c in self.hand]: #Bust
            self.active = False
            return False, 0

        np.append(self.hand, card)
        return True, 0

    def stay(self) -> int:
        self.active = False
        reward = self.calcVal()
        self.points += reward
        return reward
    
    def discardHand(self) -> None:
        self.deck.trash = np.concatenate((self.deck.trash, self.hand))


In [26]:
d = Deck()
d.shuffle()

print(d)

[12 6 11 5 11 7 11 12 4 2 12 8 6 3 7 8 10 1 7 4 6 2 11 11 6 0 11 10 9 7 12 8 4 9 9 5 11 7 8 9 6 8 9 9 12 12 12 11 7 6 12 3 11 10 9 8 8 5 4 12 12 10 7 9 10 10 8 5 11 12 3 12 11 10 10 10 9 10 5 ]


### Opponents

#### Random Choice

In [None]:
class RCOpponent(Player):
    def __init__(self, deck: Deck, risk):
        super().__init__(deck)
        self.risk = risk

    def turn(self):
        if len(self.hand) == 0:
            choice = 0
        else:
            choice = random.uniform(0,1)

        if choice < self.risk: #Chooses between hit and stay with given riskiness
            self.hit()

        else:
            self.stay()

## Env 

In [None]:
class Flip7(gym.Env):
    def __init__(self):
        self.deck = Deck()
        self.agent = Player(self.deck)

        self.players = 2
        self.activePlayers = 2

        self.opponents = []

        for _ in range(self.players - 1):
            riskiness = random.randint(4,9)
            self.opponents.append(RCOpponent(self.deck, 0.1*riskiness))

        self.turnOrder = random.randint(0,self.players - 1)

        self.observation_space = gym.spaces.Dict(
            {
                "turnOrder":gym.spaces.Box(0,7, shape=(1,), dtype=int),
                "hand": gym.spaces.Box(0,2, shape=(21,), dtype = int),
                "points": gym.spaces.Box(0,485, shape=(1,), dtype=int),
                "oppHands": gym.spaces.Box(0,2, shape=(21,self.players - 1), dtype = int),
                "oppPoints": gym.spaces.MultiBinary([21,self.players - 1]),
                "trash": gym.spaces.Box(0,12, shape=(21,), dtype = int)
            }
        )

        self.action_space = gym.spaces.Discrete(2) # 0 -> Hit, 1 -> Stay

    
    def _get_obs(self) -> dict[str, typing.Union[np.ndarray, int]]:
        hand = np.zeros(21)

        for c in self.agent.hand:
            hand[_cardList.keys().index(c.label)] += 1 #Array counting number of each card

        oppHands = np.array([])
        for op in self.opponents:
            opHand = np.zeros(21)
            for c in op.hand:
                opHand[_cardList.keys().index(c.label)] += 1 #Array counting number of each card

            oppHands.append(opHand)

        oppPoints = np.array([o.points for o in self.opponents]) #Array of opponent scores

        trash = np.zeros(21)

        for c in self.deck.trash:
            trash[_cardList.keys().index(c.label)] += 1 #Array counting number of each card
        
        return {"turnOrder": self.turnOrder,"hand": hand, "points": self.agent.points, "oppHands": oppHands, "oppPoints": oppPoints, "trash": trash}
    
    
    def _get_info(self) -> dict[str, np.ndarray]:
        return {
            "Current": [c.label for c in self.agent.hand]
        }
    

    def reset(self) -> tuple[dict[str, typing.Union[np.ndarray, int]], dict[str, np.ndarray]]: 
        self.agent = Player()

        self.players = 2
        self.opponents = []
        for _ in range(self.players - 1):
            riskiness = random.randint(4,9)
            self.opponents.append(RCOpponent(self.deck, 0.1*riskiness))

        self.deck = Deck()

        observation = self._get_obs()
        info = self._get_info()

        return observation, info
    
    
    def step(self, action):
        if action == 1:
            reward = self.agent.stay()
            self.activePlayers -= 1

        else:
            cont, reward = self.agent.hit()


        observation = self._get_obs()
        info = self._get_info()
        terminated = (self.activePlayers == 0)
        truncated = False

        return observation, reward, terminated, truncated, info