In [4]:
import random
import json
from copy import deepcopy
from tqdm import tqdm

In [5]:
colors = ["Green", "Red", "Blue", "Yellow", "Purple"]
color_counts = {"Green": 8, "Red": 7, "Blue": 7, "Yellow": 7, "Purple": 7}

def init_deck():
    deck = []
    for color in colors:
        deck += [color] * color_counts[color]
    random.shuffle(deck)
    return deck

def deal_hands(deck):
    hand1 = [deck.pop() for _ in range(14)]
    hand2 = [deck.pop() for _ in range(14)]
    return hand1, hand2

def get_hang1_range(board):
    hang1_pos = [int(pos.split('-')[1]) for pos in board if pos.startswith('1-')]
    if not hang1_pos:
        return None, None
    return min(hang1_pos), max(hang1_pos)

def legal_moves(board, hand):
    moves = []
    min1, max1 = get_hang1_range(board)
    cnt_hang1 = sum([1 for pos in board if pos.startswith('1-')])
    if cnt_hang1 == 0:
        for color in set(hand):
            moves.append(('1-7', color))
    else:
        if cnt_hang1 < 7 and min1 > 1:
            for color in set(hand):
                moves.append((f"1-{min1-1}", color))
        if cnt_hang1 < 7 and max1 < 13:
            for color in set(hand):
                moves.append((f"1-{max1+1}", color))
        for row in range(2, 8):  # Hàng 2 đến 7
            for col in range(1, 14 - row + 1):
                pos = f"{row}-{col}"
                below_left = f"{row-1}-{col}"
                below_right = f"{row-1}-{col+1}"
                if below_left in board and below_right in board and pos not in board:
                    for color in set(hand):
                        if board[below_left] == color or board[below_right] == color:
                            moves.append((pos, color))
    return moves

def is_game_end(hands, board):
    for i in [0, 1]:
        if hands[i] and legal_moves(board, hands[i]):
            return False
    return True

def simulate_one_game_collect_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []

    move_count = 0
    while True:
        legal = legal_moves(board, hands[turn])
        if not legal or not hands[turn]:
            # Không còn nước đi hợp lệ hoặc hết bài
            break
        # Nước đi đầu tiên luôn là 1-7
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = random.choice(legal2) if legal2 else random.choice(legal)
        else:
            move = random.choice(legal)
        pos_played, color_played = move
        # Ghi sample cho tất cả legal moves ở lượt này
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        # Cập nhật board và hand
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1

    # Ai còn ít bài hơn là thắng (hoặc random nếu hòa)
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples

In [7]:
N_GAMES = 20000

with open("penguin_party_20000_dataset.jsonl", "w", encoding="utf-8") as f:
    for i in tqdm(range(N_GAMES)):
        samples = simulate_one_game_collect_samples()
        for sample in samples:
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")

100%|██████████| 20000/20000 [00:49<00:00, 406.22it/s]
