# Flip 7

In [None]:
import random
import gymnasium as gym
import numpy as np
import typing

## Game Classes

### Card

In [2]:
class Card:
    def __init__(self, val: str):
        self.val = int(val)
        self.label = val

    def __repr__(self) -> str:
        return self.label

### Deck

In [3]:
_cardList = {
    "0": 1,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8,
    "9": 9,
    "10": 10,
    "11": 11,
    "12": 12,
    # "+2": 1,
    # "+4": 1,
    # "+6": 1,
    # "+8": 1,
    # "+10": 1,
    # "x2": 1,
    # "Freeze": 3,
    # "Flip Three": 3,
    # "Second Chance": 3
}

In [4]:
class Deck:
    def __init__(self):
        self.cards = []

        for c in _cardList:
            for _ in range(_cardList[c]):
                self.cards.append(Card(c))

    def shuffle(self) -> None:
        random.shuffle(self.cards)

    def draw(self) -> Card:
        return self.cards.pop() #Last term in list is top card in deck


    def __repr__(self) -> str:
        deckString = ""

        for c in self.cards:
            deckString += c.label + " "
        return deckString

### Player

In [None]:
class Player:
    def __init__(self):
        self.points = 0
        self.active = True
        self.hand = []

    def calcVal(self) -> int:
        val = 0
        for c in self.hand:
            val += c.val
        
        return val
    
    def hit(self, card) -> tuple[bool, int]:
        if card.label in [c.label for c in self.hand]: #Bust
            self.active = False
            return False, 0

        self.hand.append(card)
        return True, 0

    def stay(self) -> int:
        self.active = False
        reward = self.calcVal()
        self.points += reward
        return reward

In [6]:
d = Deck()
d.shuffle()

print(d)

10 5 10 9 5 4 12 11 10 7 1 3 10 10 7 11 8 12 7 10 8 8 9 9 6 11 6 5 0 10 11 12 7 5 2 6 9 9 12 11 7 12 9 11 12 9 12 3 11 8 12 7 9 6 12 8 7 10 11 12 11 11 10 4 8 11 3 8 4 12 6 12 10 6 2 8 4 5 9 


## Env

In [None]:
class Flip7(gym.Env):
    def __init__(self):
        self.agent = Player()

        self.players = 2
        self.activePlayers = 2
        self.opponents = []
        for _ in range(self.players - 1):
            self.opponents.append(Player())

        self.deck = Deck()
        self.trash = np.array([])

        self.observation_space = gym.spaces.Dict(
            {
                "hand": gym.spaces.Box(0,2, shape=(21,), dtype = int),
                "points": gym.spaces.Box(0,485, shape=(1,), dtype=int),
                "oppHands": gym.spaces.Box(0,2, shape=(21,self.players - 1), dtype = int),
                "oppPoints": gym.spaces.MultiBinary([21,self.players - 1]),
                "Trash": gym.spaces.Box(0,12, shape=(21,), dtype = int)
            }
        )

        self.action_space = gym.spaces.Discrete(2) # 0 -> Hit, 1 -> Stay

    
    def _get_obs(self) -> dict[str, typing.Union[np.ndarray, int]]:
        hand = np.zeros(21)

        for c in self.agent.hand:
            hand[_cardList.keys().index(c.label)] += 1 #Array counting number of each card

        oppHands = np.array([])
        for op in self.opponents:
            opHand = np.zeros(21)
            for c in op.hand:
                opHand[_cardList.keys().index(c.label)] += 1 #Array counting number of each card

            oppHands.append(opHand)

        oppPoints = np.array([o.points for o in self.opponents]) #Array of opponent scores

        trash = np.zeros(21)

        for c in self.trash:
            trash[_cardList.keys().index(c.label)] += 1 #Array counting number of each card
        
        return {"hand": hand, "points": self.agent.points, "oppHands": oppHands, "oppPoints": oppPoints, "Trash": trash}
    
    
    def _get_info(self) -> dict[str, np.ndarray]:
        return {
            "Current": [c.label for c in self.agent.hand]
        }
    

    def reset(self) -> tuple[dict[str, typing.Union[np.ndarray, int]], dict[str, np.ndarray]]: 
        self.agent = Player()

        self.players = 2
        self.opponents = []
        for _ in range(self.players - 1):
            self.opponents.append(Player())

        self.deck = Deck()
        self.trash = np.array([])

        observation = self._get_obs()
        info = self._get_info()

        return observation, info
    
    
    def step(self, action):
        if action == 1:
            reward = self.agent.stay()
            self.activePlayers -= 1

        else:
            cont, reward = self.agent.hit()


        observation = self._get_obs()
        info = self._get_info()
        terminated = (self.activePlayers == 0)
        truncated = False

        return observation, reward, terminated, truncated, info